diff --git a/.azure-pipelines/scripts/ut/env_setup.sh b/.azure-pipelines/scripts/ut/env_setup.sh
index 0ea8e47029b..19f01b8a63c 100644
--- a/.azure-pipelines/scripts/ut/env_setup.sh
+++ b/.azure-pipelines/scripts/ut/env_setup.sh
@@ -92,7 +92,7 @@ pip install horovod
 pip install transformers
 
 if [[ $(echo "${test_case}" | grep -c "others") != 0 ]];then
-    pip install tf_slim xgboost accelerate==0.21.0
+    pip install tf_slim xgboost accelerate==0.21.0 peft
 elif [[ $(echo "${test_case}" | grep -c "nas") != 0 ]]; then
     pip install dynast==1.6.0rc1
 elif [[ $(echo "${test_case}" | grep -c "tf pruning") != 0 ]]; then
diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 6548eb36b87..b6c4f944da6 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -1833,7 +1833,7 @@ def _apply_pre_optimization(self, model, tune_cfg, recover=False):
                 absorb_layer = op_name
                 absorbed_layer = info["absorbed_layer"]
                 input_minmax = info["input_minmax"]
-                weight_max = info["weight_max"]
+                weight_max = info["weight_max"].clamp(min=1e-5)
                 abs_input_max = torch.max(torch.abs(input_minmax[0]), torch.abs(input_minmax[1]))
                 input_power = torch.pow(abs_input_max, alpha)
                 weight_power = torch.pow(weight_max, 1 - alpha)
@@ -1858,11 +1858,12 @@ def qdq_quantize(self, model, tune_cfg):
         """
         q_model = model._model
         from .torch_utils.model_wrapper import QDQLinear, SQLinearWrapper
-        from .torch_utils.util import fetch_module, set_module
+        from .torch_utils.smooth_quant import get_module, set_module
 
         smoothquant_scale_info = {}
         fallback_op_name_list = []
         stats_result = {}
+        stats_result["Linear(failed when SQ)"] = {"INT8(QDQ)": 0, "BF16": 0, "FP32": 0}
         for (op_name, op_type), qconfig in tune_cfg["op"].items():
             if op_type == "Linear" and qconfig["weight"]["dtype"] != "int8":
                 fallback_op_name_list.append(op_name)
@@ -1876,13 +1877,16 @@ def qdq_quantize(self, model, tune_cfg):
                 alpha = info["alpha"]
                 absorbed_layer = info["absorbed_layer"]
                 input_minmax = info["input_minmax"]
-                weight_max = info["weight_max"]
+                weight_max = info["weight_max"].clamp(min=1e-5)
                 abs_input_max = torch.max(torch.abs(input_minmax[0]), torch.abs(input_minmax[1]))
                 input_power = torch.pow(abs_input_max, alpha)
                 weight_power = torch.pow(weight_max, 1 - alpha)
                 scale = torch.clip(input_power / weight_power, min=1e-5)
+                if torch.isnan(scale).any() or torch.isinf(scale).any():
+                    stats_result["Linear(failed when SQ)"]["FP32"] += 1
+                    continue  # for peft model,lora_B weights is 0.
                 for op_name in absorbed_layer:
-                    module = fetch_module(q_model, op_name)
+                    module = get_module(q_model, op_name)
                     new_module = SQLinearWrapper(module, 1.0 / scale, input_minmax, alpha)
                     set_module(q_model, op_name, new_module)
                     logger.debug(f"Current SmoothQuant alpha of {op_name} is {alpha}")
@@ -2858,7 +2862,7 @@ def _dump_model_op_stats(self, tune_cfg):
             output_data, header="Mixed Precision Statistics", field_names=["Op Type", "Total", "INT8", "BF16", "FP32"]
         ).print_stat()
 
-    def _cfg_to_qconfig(self, tune_cfg):
+    def _cfg_to_qconfig(self, tune_cfg, smooth_quant=False):
         """Convert tune configure to quantization config for each op.
 
         Args:
@@ -2949,7 +2953,7 @@ def _cfg_to_qconfig(self, tune_cfg):
         else:
             op_infos = copy.deepcopy(self.op_infos_from_cfgs)
             self.cfgs = torch_utils.util.check_cfg_and_qconfig(
-                tune_cfg["op"], self.cfgs, op_infos, self.output_tensor_id_op_name
+                tune_cfg["op"], self.cfgs, op_infos, self.output_tensor_id_op_name, smooth_quant
             )
 
             with open(self.ipex_config_path, "w") as write_f:
@@ -3112,7 +3116,7 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops):
                         smooth_quant_args = self.recipes.get("smooth_quant_args", {})
                         folding = smooth_quant_args.get("folding", False)
                         if not folding:
-                            if self.sq_minmax_init:
+                            if self.sq_minmax_init or self.version.release >= Version("2.2").release:
                                 from torch.ao.quantization.observer import MinMaxObserver
 
                                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
@@ -3268,19 +3272,20 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
         if sq_max_info:
             smoothquant_scale_info = {}
             from .torch_utils.model_wrapper import SQLinearWrapper
-            from .torch_utils.util import fetch_module
+            from .torch_utils.smooth_quant import get_module
 
             for _, info in sq_max_info.items():
                 alpha = info["alpha"]
                 absorbed_layer = info["absorbed_layer"]
                 input_minmax = info["input_minmax"]
-                weight_max = info["weight_max"]
+                # for peft model,lora_B weights is 0.
+                weight_max = info["weight_max"].clamp(min=1e-5)
                 abs_input_max = torch.max(torch.abs(input_minmax[0]), torch.abs(input_minmax[1]))
                 input_power = torch.pow(abs_input_max, alpha)
                 weight_power = torch.pow(weight_max, 1 - alpha)
                 scale = torch.clip(input_power / weight_power, min=1e-5)
                 for op_name in absorbed_layer:
-                    module = copy.deepcopy(fetch_module(q_model._model, op_name))
+                    module = copy.deepcopy(get_module(q_model._model, op_name))
                     new_module = SQLinearWrapper(module, 1.0 / scale, input_minmax, alpha)
                     weight_scale = new_module._get_weight_scale()
                     smoothquant_scale_info[op_name] = {
@@ -3296,7 +3301,7 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
         # Check save_qconf_summary part is a workaround for IPEX bug.
         # Sometimes the prepared model from get_op_capablitiy loss this attribute
         if not hasattr(model._model, "save_qconf_summary") or not hasattr(model._model, "load_qconf_summary"):
-            if self.sq_minmax_init:
+            if self.sq_minmax_init or self.version.release >= Version("2.2").release:
                 from torch.ao.quantization.observer import MinMaxObserver
 
                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
@@ -3313,10 +3318,14 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
                     model._model, static_qconfig, example_inputs=self.example_inputs, inplace=inplace
                 )
 
-        # TODO: update_sq_scale is used to update observer, should fuse in _cfg_to_qconfig
+        # The IPEX SmoothQuant observer can only use save/load_qconf_summary once.
+        # The save_qconf_summary API will freeze the scale used in model and calibration won't work anymore.
+        # The load_qconf_summary will overwrite the scales used in model but only work in the first call.
+        # Here, we use INC collected scale for Linear and set normal observer instead of SQObserver \
+        # to make sure calibration works for other ops, like add, bmm.
         from .torch_utils.util import update_sq_scale
 
-        self._cfg_to_qconfig(tune_cfg)
+        self._cfg_to_qconfig(tune_cfg, smooth_quant=True)
         update_sq_scale(self.ipex_config_path, smoothquant_scale_info)
         model._model.load_qconf_summary(qconf_summary=self.ipex_config_path)
 
@@ -3337,10 +3346,6 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
                 + "using scale info from SmoothQuant for Linear and "
                 + "one iter calibration for other ops."
             )
-            # update ipex_config.json with smoothquant_scale_info
-            model._model.save_qconf_summary(qconf_summary=self.ipex_config_path)
-            update_sq_scale(self.ipex_config_path, smoothquant_scale_info)
-            model._model.load_qconf_summary(qconf_summary=self.ipex_config_path)
 
         self._ipex_post_quant_process(model, q_model, dataloader, inplace=inplace)
 
diff --git a/neural_compressor/adaptor/pytorch_ipex.yaml b/neural_compressor/adaptor/pytorch_ipex.yaml
index c6777bfe7b5..bdeb96a181c 100644
--- a/neural_compressor/adaptor/pytorch_ipex.yaml
+++ b/neural_compressor/adaptor/pytorch_ipex.yaml
@@ -48,9 +48,9 @@
                         },
             'activation': {
                         'dtype': ['uint8'],
-                        'scheme': ['asym'],
+                        'scheme': ['asym', 'sym'],
                         'granularity': ['per_tensor'],
-                        'algorithm': ['minmax']
+                        'algorithm': ['minmax', 'kl']
                         }
                     },
           },
diff --git a/neural_compressor/adaptor/torch_utils/smooth_quant.py b/neural_compressor/adaptor/torch_utils/smooth_quant.py
index 7b59b3ce3e5..b91b95e8563 100644
--- a/neural_compressor/adaptor/torch_utils/smooth_quant.py
+++ b/neural_compressor/adaptor/torch_utils/smooth_quant.py
@@ -182,6 +182,12 @@ def get_module(model, key):
     for name in name_list:
         if hasattr(module, name):
             module = getattr(module, name)
+        elif hasattr(module, "sq_linear"):  # for peft models
+            module = getattr(module, "sq_linear")
+            module = getattr(module, name)
+        elif hasattr(module, "orig_layer"):  # for peft models and auto alpha
+            module = getattr(module, "orig_layer")
+            module = getattr(module, name)
         else:
             module = module
     return module
@@ -200,8 +206,19 @@ def set_module(model, key, new_module):
     for name in name_list[:-1]:
         if hasattr(module, name):
             module = getattr(module, name)
+        elif hasattr(module, ("sq_linear")):  # for peft models that Linears are contained in Linear
+            module = getattr(module, "sq_linear")
+            module = getattr(module, name)
+        elif hasattr(module, ("orig_layer")):  # for peft models and auto alpha
+            module = getattr(module, "orig_layer")
+            module = getattr(module, name)
         else:
             module = module
+
+    if hasattr(module, "sq_linear") and name_list[-1] != "sq_linear":  # for peft models
+        module = getattr(module, "sq_linear")
+    if hasattr(module, "orig_layer") and name_list[-1] != "orig_layer":  # for peft models and auto alpha
+        module = getattr(module, "orig_layer")
     setattr(module, name_list[-1], new_module)
 
 
@@ -222,7 +239,7 @@ def cal_scale(input_max, weights, alpha, scale_type="orig"):
 class WrapperLayer(torch.nn.Module):
     def __init__(self, layer, input_min, input_max, save_q_input=False):
         super(WrapperLayer, self).__init__()
-        self.orig_layer = layer
+        self.add_module("orig_layer", layer)  # set orig_layer in get/set_module
         self.quant = False
         self.q_input = None
         self.fp32_output = None
@@ -281,7 +298,7 @@ class TorchSmoothQuant:
     to recover the weights if needed
     """
 
-    def __init__(self, model, dataloader, example_inputs=None, q_func=None, traced_model=None):
+    def __init__(self, model, dataloader=None, example_inputs=None, q_func=None, traced_model=None):
         """
         :param model: Torch model :param dataloader: Calibration dataloader :param traced_model: A specific model
         shares the same architecture as the model and could be traced by torch.jit. If not supplied, we use model
@@ -372,7 +389,7 @@ def _calibrate(self, absorb_to_layer, calib_iter, percentile):
         ##hook all the module
         hook_modules = {}
         for n, module in self.model.named_modules():
-            if module.__class__.__name__.split(".")[-1] in self.op_types:
+            if isinstance(module, tuple(self.op_types)):
                 hook_modules[n] = module
 
         self._add_min_max_observer(hook_modules, percentile)
@@ -547,6 +564,8 @@ def _cal_scales(self, absorb_to_layer, input_maxes, alpha=0.5, tuning=False):
                 alpha_tmp = alpha
             elif isinstance(alpha, dict):
                 alpha_tmp = alpha[key]
+            else:
+                alpha_tmp = alpha
             if alpha_tmp < 0:
                 scale = torch.ones((1), device=self.device)
             else:
@@ -670,7 +689,7 @@ def _get_sq_layer_names(self):
     def _get_all_hook_module_names(self):
         module_names = []
         for n, module in self.model.named_modules():
-            if module.__class__.__name__.split(".")[-1] in self.op_types:
+            if isinstance(module, tuple(self.op_types)):
                 module_names.append(n)
         return module_names
 
@@ -680,18 +699,18 @@ def _qdq_model_wrapper_for_auto(self, save_q_input=False):
         module_names = self._get_all_hook_module_names()
         self.to_unwrap_module_names = module_names
         for name in module_names:
+            if name not in self.input_mins:  # skip module if it's not used in calibration
+                continue
             module = get_module(self.model, name)
-            set_module(
-                self.model,
-                name,
-                WrapperLayer(module, self.input_mins[name], self.input_maxes[name], save_q_input=save_q_input),
-            )
+            new_module = WrapperLayer(module, self.input_mins[name], self.input_maxes[name], save_q_input=save_q_input)
+            set_module(self.model, name, new_module)
 
     def _qdq_model_unwrapper_for_auto(self):
         module_names = self.to_unwrap_module_names
         for name in module_names:
             module = get_module(self.model, name)
-            # print(name, flush=True)
+            if not hasattr(module, "orig_layer"):  # skip module if it's not used in calibration
+                continue
             set_module(self.model, name, module.orig_layer)
 
     def _change_qdq_for_auto(self, enable=True):
@@ -699,6 +718,8 @@ def _change_qdq_for_auto(self, enable=True):
         for name in module_names:
             name = name.split(".orig_layer")[0]
             module = get_module(self.model, name)
+            if not hasattr(module, "orig_layer"):  # skip module if it's not used in calibration
+                continue
             if enable:
                 module.enable_quant()
             else:
@@ -921,7 +942,7 @@ def transform(
         alpha=0.5,
         folding=False,
         percentile=100,
-        op_types=["Linear", "Conv2d"],
+        op_types=[torch.nn.Linear, torch.nn.Conv2d],
         scales_per_op=False,
         calib_iter=100,
         auto_alpha_args={"alpha_min": 0.0, "alpha_max": 1.0, "alpha_step": 0.1, "shared_criterion": "mean"},
@@ -953,12 +974,13 @@ def transform(
         self.recover()
         need_calibration = self._check_need_calibration(alpha, percentile, op_types, scales_per_op, calib_iter)
         with torch.no_grad():
+            str_op_types = [i.__name__ for i in op_types]
             input_maxes_abs = self.input_maxes_abs
             if need_calibration:  ##avoid multiple calibaration during tuning if the only difference is alpha
                 if self.insert_mul:
-                    self.self_absorb_layers = self._get_all_layer_names()  # TODO: only support linear now.
+                    self.self_absorb_layers = self._get_all_layer_names(op_types)  # TODO: only support linear now.
                     # fetch modules with the same input
-                    group_modules = self._trace(op_types, skip_unsupported_layers=False)
+                    group_modules = self._trace(str_op_types, skip_unsupported_layers=False)
                     if group_modules is not None:
                         # use one input for qkv
                         for k, v in group_modules.items():
@@ -969,7 +991,7 @@ def transform(
                         logger.debug(f"self_absorb_layers:{self.self_absorb_layers}")
                 if self.allow_absorb:
                     self.absorb_to_layer, no_absorb_layers = self._trace(
-                        op_types
+                        str_op_types
                     )  ##TODO we need to insert mul layer for no_absorb_layers later
                     if self.absorb_to_layer is None and no_absorb_layers is None:
                         return self.model
@@ -1061,7 +1083,7 @@ def recover(self):
             self.weight_scale_info = {}  ##clear the data
             self.absorb_scales_info = {}
 
-    def _get_all_layer_names(self, op_types=["Linear"]):
+    def _get_all_layer_names(self, op_types=[torch.nn.Linear]):
         """Try the model to find the layers which can be smooth quantized.
 
         :param op_types: The op types to be smooth quantized
@@ -1069,20 +1091,10 @@ def _get_all_layer_names(self, op_types=["Linear"]):
         self_absorb_layer: A dict, absorb layer name (itself): layers to be smooth quantized
         """
         self_absorb_layer = {}
+        op_types = [torch.nn.Linear]  # TODO： only support SQLinearWrapper
         for name, module in self.model.named_modules():
-            for op_type in op_types:
-                if op_type == str(module.__class__.__name__):
-                    self_absorb_layer[name] = [name]
-        # remove duplicate Linear if Linear is wrapped by Linear
-        key_list = list(self_absorb_layer.keys())
-        key_list.sort()
-        duplicate_list = []
-        for i, k1 in enumerate(key_list):
-            for k2 in key_list[i + 1 :]:
-                if k1 in k2:
-                    duplicate_list.append(k1)
-        for i in duplicate_list:
-            self_absorb_layer.pop(i)
+            if isinstance(module, tuple(op_types)):
+                self_absorb_layer[name] = [name]
         return self_absorb_layer
 
     def _get_example_input(self):
@@ -1334,46 +1346,3 @@ def remove_unsupported_layers(self, model, absorb_to_layer, no_absorb_layers):
             if supported:
                 res[key] = absorb_to_layer[key]
         return res
-
-
-def update_sq_scale(ipex_config_path, smoothquant_scale_info):
-    """Update ipex_config.json with smoothquant scale info generated by our algorithm.
-
-    Args:
-        ipex_config_path (str): a path to temporary ipex_config.json file.
-        smoothquant_scale_info (dict): a dict contains smoothquant scale info.
-    """
-    with open(ipex_config_path, "r") as f:
-        ipex_config = json.load(f)
-        for module_name, v in ipex_config.items():
-            if "q_op_infos" in v and v["q_op_infos"]:
-                for op_num, v1 in v["q_op_infos"].items():
-                    # update alpha data instead of updating weight scale
-                    op_name = v1["fqn"]  # fqn always exists even it's empty.
-                    if op_name in smoothquant_scale_info:
-                        # observers were overridden by the fallback step, setting it back.
-                        v1["activation_observer"] = {
-                            "name": "SmoothQuantActivationObserver",
-                            "smooth_quant_enabled": False,
-                            "dtype": "torch.quint8",
-                            "qscheme": "torch.per_tensor_affine",
-                            "reduce_range": False,
-                            "quant_min": 0,
-                            "quant_max": 255,
-                            "alpha": smoothquant_scale_info[op_name]["alpha"],
-                        }
-                        v1["weight_observer"] = {
-                            "name": "SmoothQuantWeightObserver",
-                            "smooth_quant_enabled": False,
-                            "dtype": "torch.qint8",
-                            "qscheme": "torch.per_channel_symmetric",
-                            "reduce_range": False,
-                            "quant_min": -128,
-                            "quant_max": 127,
-                            "alpha": smoothquant_scale_info[op_name]["alpha"],  # only update alpha
-                        }
-        f.close()
-    # overwrite ipex_config_path
-    with open(ipex_config_path, "w") as f1:
-        json.dump(ipex_config, f1, indent=4)
-        f1.close()
diff --git a/neural_compressor/adaptor/torch_utils/util.py b/neural_compressor/adaptor/torch_utils/util.py
index 9b63e51f03b..907111b00f6 100644
--- a/neural_compressor/adaptor/torch_utils/util.py
+++ b/neural_compressor/adaptor/torch_utils/util.py
@@ -151,7 +151,7 @@ def append_attr(fx_model, model, fx_white_list=[]):
     return fx_model
 
 
-def generate_activation_observer(scheme, algorithm):  # pragma: no cover
+def generate_activation_observer(scheme, algorithm, smooth_quant=False, smooth_quant_enable=False):  # pragma: no cover
     """This is a helper method to generate an activation observer.
 
     Args:
@@ -179,6 +179,46 @@ def generate_activation_observer(scheme, algorithm):  # pragma: no cover
         "quant_min": 0,
         "quant_max": 255,
     }
+    smoothquant_kl_activation_observer = {
+        "name": "SmoothQuantActivationObserver",
+        "smooth_quant_enabled": smooth_quant_enable,
+        "dtype": "torch.quint8",
+        "qscheme": "torch.per_tensor_affine",
+        "reduce_range": False,
+        "quant_min": 0,
+        "quant_max": 255,
+        "alpha": 0.5,
+        "act_observer": kl_activation_observer,
+        "act_ic_observer": {
+            "name": "PerChannelMinMaxObserver",
+            "ch_axis": -1,
+            "dtype": "torch.quint8",
+            "qscheme": "torch.per_channel_affine",
+            "reduce_range": False,
+            "quant_min": 0,
+            "quant_max": 255,
+        },
+    }
+    smoothquant_minmax_activation_observer = {
+        "name": "SmoothQuantActivationObserver",
+        "smooth_quant_enabled": smooth_quant_enable,
+        "dtype": "torch.quint8",
+        "qscheme": "torch.per_tensor_affine",
+        "reduce_range": False,
+        "quant_min": 0,
+        "quant_max": 255,
+        "alpha": 0.5,
+        "act_observer": minmax_activation_observer,
+        "act_ic_observer": {
+            "name": "PerChannelMinMaxObserver",
+            "ch_axis": -1,
+            "dtype": "torch.quint8",
+            "qscheme": "torch.per_channel_affine",
+            "reduce_range": False,
+            "quant_min": 0,
+            "quant_max": 255,
+        },
+    }
     REDUCE_RANGE = False if CpuInfo().vnni else True
     if REDUCE_RANGE:
         minmax_activation_observer["reduce_range"] = REDUCE_RANGE
@@ -192,13 +232,21 @@ def generate_activation_observer(scheme, algorithm):  # pragma: no cover
         kl_activation_observer["dtype"] = "torch.qint8"
         kl_activation_observer["quant_min"] = -128
         kl_activation_observer["quant_max"] = 127
-    if algorithm == "kl":
-        return kl_activation_observer
-    if algorithm == "minmax":
-        return minmax_activation_observer
+    if smooth_quant and smooth_quant_enable:
+        if algorithm == "kl":
+            return smoothquant_kl_activation_observer
+        if algorithm == "minmax":
+            return smoothquant_minmax_activation_observer
+    else:
+        if algorithm == "kl":
+            return kl_activation_observer
+        if algorithm == "minmax":
+            return minmax_activation_observer
 
 
-def check_cfg_and_qconfig(tune_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_op_name):  # pragma: no cover
+def check_cfg_and_qconfig(
+    tune_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_op_name, smooth_quant=False
+):  # pragma: no cover
     """Check configs and quantization configs.
 
     Args:
@@ -228,11 +276,21 @@ def check_cfg_and_qconfig(tune_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_
                         inc_scheme = inc_op_cfg["activation"]["scheme"]
                         inc_algorithm = inc_op_cfg["activation"]["algorithm"]
                         ipex_op_cfg["input_tensor_infos"] = input_tensor_infos
-                        activation_observer = generate_activation_observer(inc_scheme, inc_algorithm)
-                        if inc_scheme == "sym":
-                            input_tensor_infos[index]["force_dtype"] = "torch.qint8"
-                        if inc_scheme == "asym":
-                            input_tensor_infos[index]["force_dtype"] = "torch.quint8"
+                        if (
+                            "op_type" in ipex_op_cfg
+                            and ipex_op_cfg["op_type"] == "<class 'torch.nn.modules.linear.Linear'>"
+                        ):
+                            smooth_quant_enable = True
+                        else:
+                            smooth_quant_enable = False
+                        activation_observer = generate_activation_observer(
+                            inc_scheme, inc_algorithm, smooth_quant, smooth_quant_enable
+                        )
+                        if not smooth_quant:
+                            if inc_scheme == "sym":
+                                input_tensor_infos[index]["force_dtype"] = "torch.qint8"
+                            if inc_scheme == "asym":
+                                input_tensor_infos[index]["force_dtype"] = "torch.quint8"
                         ipex_op_cfg["activation_observer"] = activation_observer
                     # int8 -> fp32
                     else:
@@ -397,7 +455,7 @@ def update_sq_scale(ipex_config_path, smoothquant_scale_info):
                 for op_num, v1 in v["q_op_infos"].items():
                     # update alpha data instead of updating weight scale
                     op_name = v1["fqn"]  # fqn always exists even it's empty.
-                    if op_name in smoothquant_scale_info:
+                    if op_name in smoothquant_scale_info and v1["op_type_is_module"]:
                         input_scale_for_mul = smoothquant_scale_info[op_name]["input_scale_for_mul"].tolist()
                         input_scale_after_mul = smoothquant_scale_info[op_name]["input_scale_after_mul"].tolist()
                         input_zero_point_after_mul = smoothquant_scale_info[op_name][
@@ -405,74 +463,19 @@ def update_sq_scale(ipex_config_path, smoothquant_scale_info):
                         ].tolist()
                         weight_scale_for_mul = (1 / smoothquant_scale_info[op_name]["input_scale_for_mul"]).tolist()
                         weight_scale_after_mul = smoothquant_scale_info[op_name]["weight_scale_after_mul"].tolist()
-                        v1["input_tensor_infos"][0]["smooth_quant_scaling_factor"] = input_scale_for_mul
                         v1["input_tensor_infos"][0]["scale"] = input_scale_after_mul
                         v1["input_tensor_infos"][0]["zero_point"] = input_zero_point_after_mul
+                        v1["input_tensor_infos"][0]["smooth_quant_scaling_factor"] = input_scale_for_mul
                         v1["weight_tensor_infos"][0]["smooth_quant_scaling_factor"] = weight_scale_for_mul
                         v1["weight_tensor_infos"][0]["scale"] = weight_scale_after_mul
                         # # observers were overridden by the fallback step, setting it back.
-                        v1["activation_observer"] = {
-                            "name": "SmoothQuantActivationObserver",
-                            "smooth_quant_enabled": True,
-                            "dtype": "torch.quint8",
-                            "qscheme": "torch.per_tensor_affine",
-                            "reduce_range": False,
-                            "quant_min": 0,
-                            "quant_max": 255,
-                            "alpha": smoothquant_scale_info[op_name]["alpha"],
-                            "act_observer": {
-                                "name": "HistogramObserver",
-                                "bins": 2048,
-                                "upsample_rate": 128,
-                                "dtype": "torch.quint8",
-                                "qscheme": "torch.per_tensor_affine",
-                                "reduce_range": False,
-                                "quant_min": 0,
-                                "quant_max": 255,
-                            },
-                            "act_ic_observer": {
-                                "name": "PerChannelMinMaxObserver",
-                                "ch_axis": -1,
-                                "dtype": "torch.quint8",
-                                "qscheme": "torch.per_channel_affine",
-                                "reduce_range": False,
-                                "quant_min": 0,
-                                "quant_max": 255,
-                            },
-                        }
-                        v1["weight_observer"] = {
-                            "name": "SmoothQuantWeightObserver",
-                            "smooth_quant_enabled": True,
-                            "dtype": "torch.qint8",
-                            "qscheme": "torch.per_channel_symmetric",
-                            "reduce_range": False,
-                            "quant_min": -128,
-                            "quant_max": 127,
-                            "alpha": smoothquant_scale_info[op_name]["alpha"],
-                            "wei_observer": {
-                                "name": "PerChannelMinMaxObserver",
-                                "ch_axis": 0,
-                                "dtype": "torch.qint8",
-                                "qscheme": "torch.per_channel_symmetric",
-                                "reduce_range": False,
-                                "quant_min": -128,
-                                "quant_max": 127,
-                            },
-                            "wei_ic_observer": {
-                                "name": "PerChannelMinMaxObserver",
-                                "ch_axis": 1,
-                                "dtype": "torch.qint8",
-                                "qscheme": "torch.per_channel_affine",
-                                "reduce_range": False,
-                                "quant_min": -128,
-                                "quant_max": 127,
-                            },
-                        }
         f.close()
     # overwrite ipex_config_path
     with open(ipex_config_path, "w") as f1:
         json.dump(ipex_config, f1, indent=4)
         f1.close()
+    print(ipex_config_path)
+    # exit(0)
 
 
 def auto_copy(module):  # pragma: no cover
diff --git a/test/algorithm/ipex_config_tmp.json b/test/algorithm/ipex_config_tmp.json
new file mode 100644
index 00000000000..12927ad8b0d
--- /dev/null
+++ b/test/algorithm/ipex_config_tmp.json
@@ -0,0 +1,14881 @@
+{
+    " ": {
+        "q_op_infos": {},
+        "nonq_op_infos": {},
+        "layer_output_infos": [
+            {
+                "id": 299,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model": {
+        "q_op_infos": {},
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<built-in method eq of type object at 0x7fd865476560>",
+                "fqn": "base_model",
+                "input_tensor_infos": [
+                    {
+                        "id": 0,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 293,
+                        "orig_dtype": "torch.bool",
+                        "inf_dtype": "torch.bool"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<method 'long' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model",
+                "input_tensor_infos": [
+                    {
+                        "id": 293,
+                        "orig_dtype": "torch.bool",
+                        "inf_dtype": "torch.bool"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 294,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'argmax' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model",
+                "input_tensor_infos": [
+                    {
+                        "id": 294,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 295,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'sub' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model",
+                "input_tensor_infos": [
+                    {
+                        "id": 295,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 296,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model",
+                "input_tensor_infos": [
+                    {
+                        "id": 296,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 297,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    }
+                ]
+            },
+            "5": {
+                "op_type": "<slot wrapper '__getitem__' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model",
+                "input_tensor_infos": [
+                    {
+                        "id": 292,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 298,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    },
+                    {
+                        "id": 297,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 299,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 299,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model": {
+        "q_op_infos": {},
+        "nonq_op_infos": {},
+        "layer_output_infos": []
+    },
+    "base_model:model:model": {
+        "q_op_infos": {},
+        "nonq_op_infos": {},
+        "layer_output_infos": [
+            {
+                "id": 291,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.quint8"
+            }
+        ]
+    },
+    "base_model:model:model:decoder": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder",
+                "input_tensor_infos": [
+                    {
+                        "id": 2,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            1.1920928955078125e-07
+                        ],
+                        "zero_point": [
+                            0
+                        ]
+                    },
+                    {
+                        "id": 3,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.00036990849184803665
+                        ],
+                        "zero_point": [
+                            128
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 4,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder",
+                "input_tensor_infos": [
+                    {
+                        "id": 0,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "1": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder",
+                "input_tensor_infos": [
+                    {
+                        "id": 0,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 1,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.sparse.Embedding'>",
+                "fqn": "base_model.model.model.decoder.embed_tokens",
+                "input_tensor_infos": [
+                    {
+                        "id": 1,
+                        "orig_dtype": "torch.int64",
+                        "inf_dtype": "torch.int64"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 2,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<class 'torch.nn.modules.normalization.LayerNorm'>",
+                "fqn": "base_model.model.model.decoder.final_layer_norm",
+                "input_tensor_infos": [
+                    {
+                        "id": 290,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 291,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 291,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.quint8"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:embed_positions": {
+        "q_op_infos": {},
+        "nonq_op_infos": {},
+        "layer_output_infos": [
+            {
+                "id": 3,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers": {
+        "q_op_infos": {},
+        "nonq_op_infos": {},
+        "layer_output_infos": []
+    },
+    "base_model:model:model:decoder:layers:0": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.0",
+                "input_tensor_infos": [
+                    {
+                        "id": 4,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.00036990849184803665
+                        ],
+                        "zero_point": [
+                            128
+                        ]
+                    },
+                    {
+                        "id": 53,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            8.216560672735795e-05
+                        ],
+                        "zero_point": [
+                            123
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 54,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.0.fc1",
+                "input_tensor_infos": [
+                    {
+                        "id": 56,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.002199406735599041
+                        ],
+                        "zero_point": [
+                            125
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.10962644964456558,
+                            0.11907318979501724,
+                            0.09595140069723129,
+                            0.13423262536525726,
+                            0.14291484653949738,
+                            0.13098719716072083,
+                            0.12959600985050201,
+                            0.13702590763568878,
+                            0.1146077886223793,
+                            0.14385050535202026,
+                            0.12177281826734543,
+                            0.1325545758008957,
+                            0.10560321807861328,
+                            0.14362241327762604,
+                            0.14758096635341644,
+                            0.1234637051820755
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0018767404835671186,
+                            0.0021353804040700197,
+                            0.002240665489807725,
+                            0.0021580320317298174
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            9.121886253356934,
+                            8.39819622039795,
+                            10.421942710876465,
+                            7.449753761291504,
+                            6.99717378616333,
+                            7.634334087371826,
+                            7.716286659240723,
+                            7.2978901863098145,
+                            8.725410461425781,
+                            6.951661586761475,
+                            8.212013244628906,
+                            7.54406213760376,
+                            9.469408988952637,
+                            6.962701320648193,
+                            6.775941848754883,
+                            8.099546432495117
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 57,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "scale": [
+                            0.0014130824711173773
+                        ],
+                        "zero_point": [
+                            119
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.activation.ReLU'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.0.activation_fn",
+                "input_tensor_infos": [
+                    {
+                        "id": 57,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0014130824711173773
+                        ],
+                        "zero_point": [
+                            119
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 58,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0004062627849634737
+                        ],
+                        "zero_point": [
+                            0
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "3": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.0.fc2",
+                "input_tensor_infos": [
+                    {
+                        "id": 58,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.0004062627849634737
+                        ],
+                        "zero_point": [
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.549189031124115,
+                            0.4980548024177551,
+                            0.539968729019165,
+                            0.5409433245658875
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0008125152671709657,
+                            0.0004290587385185063,
+                            0.00045011902693659067,
+                            0.0003873987589031458,
+                            0.0004886860842816532,
+                            0.00013036445307079703,
+                            0.0004251394420862198,
+                            0.0006477311835624278,
+                            0.0001262984733330086,
+                            0.0002167609054595232,
+                            0.0003500462626107037,
+                            0.00043075167923234403,
+                            0.000206843062187545,
+                            0.0005493324715644121,
+                            0.0005107946344651282,
+                            0.0006017627310939133
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1.820866584777832,
+                            2.0078110694885254,
+                            1.8519591093063354,
+                            1.848622441291809
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 59,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "4": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.0",
+                "input_tensor_infos": [
+                    {
+                        "id": 55,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0003670741862151772
+                        ],
+                        "zero_point": [
+                            126
+                        ]
+                    },
+                    {
+                        "id": 60,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            7.231033669086173e-05
+                        ],
+                        "zero_point": [
+                            107
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 61,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<class 'torch.nn.modules.normalization.LayerNorm'>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn_layer_norm",
+                "input_tensor_infos": [
+                    {
+                        "id": 4,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 5,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.0",
+                "input_tensor_infos": [
+                    {
+                        "id": 52,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 53,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0",
+                "input_tensor_infos": [
+                    {
+                        "id": 54,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": []
+            },
+            "3": {
+                "op_type": "<method 'reshape' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0",
+                "input_tensor_infos": [
+                    {
+                        "id": 54,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 55,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<class 'torch.nn.modules.normalization.LayerNorm'>",
+                "fqn": "base_model.model.model.decoder.layers.0.final_layer_norm",
+                "input_tensor_infos": [
+                    {
+                        "id": 55,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 56,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "5": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.0",
+                "input_tensor_infos": [
+                    {
+                        "id": 59,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 60,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "6": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0",
+                "input_tensor_infos": [
+                    {
+                        "id": 61,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 62,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 62,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:0:self_attn": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.k_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 9,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.0025989431887865067
+                        ],
+                        "zero_point": [
+                            125
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.13708241283893585,
+                            0.16369596123695374,
+                            0.12329072505235672,
+                            0.16909663379192352,
+                            0.1828479766845703,
+                            0.162990003824234,
+                            0.14222995936870575,
+                            0.1607711762189865,
+                            0.12252063304185867,
+                            0.14310497045516968,
+                            0.12626583874225616,
+                            0.18967536091804504,
+                            0.15503790974617004,
+                            0.1411832571029663,
+                            0.1677650362253189,
+                            0.12387727946043015
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.001300352392718196,
+                            0.001692790538072586,
+                            0.0017874225741252303,
+                            0.0020049407612532377,
+                            0.00210148305632174,
+                            0.0023782390635460615,
+                            0.002406417392194271,
+                            0.0021967601496726274,
+                            0.0015098107978701591,
+                            0.0026584486477077007,
+                            0.0022751193027943373,
+                            0.002421419369056821,
+                            0.0025393515825271606,
+                            0.0016408554511144757,
+                            0.0018360354006290436,
+                            0.0018435503588989377
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            7.294881820678711,
+                            6.108886241912842,
+                            8.110910415649414,
+                            5.913778305053711,
+                            5.469024181365967,
+                            6.135345935821533,
+                            7.030867576599121,
+                            6.220020294189453,
+                            8.161890983581543,
+                            6.987877368927002,
+                            7.919798851013184,
+                            5.2721662521362305,
+                            6.450035095214844,
+                            7.0829925537109375,
+                            5.960717678070068,
+                            8.072505950927734
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 16,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<built-in method bmm of type object at 0x7fd865476560>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 35,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0008092291536740959
+                        ],
+                        "zero_point": [
+                            117
+                        ]
+                    },
+                    {
+                        "id": 38,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0016102794324979186
+                        ],
+                        "zero_point": [
+                            142
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 39,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 40,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0001863027282524854
+                        ],
+                        "zero_point": [
+                            119
+                        ]
+                    },
+                    {
+                        "id": 41,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            1.3344405750530544e+36
+                        ],
+                        "zero_point": [
+                            255
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 42,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "3": {
+                "op_type": "<built-in method bmm of type object at 0x7fd865476560>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 47,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.003919653594493866
+                        ],
+                        "zero_point": [
+                            0
+                        ]
+                    },
+                    {
+                        "id": 37,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0015389698091894388
+                        ],
+                        "zero_point": [
+                            132
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 48,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "4": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.out_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 51,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.0006978802848607302
+                        ],
+                        "zero_point": [
+                            119
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.4900006949901581,
+                            0.6840704083442688,
+                            0.7935751080513,
+                            0.6235007047653198,
+                            0.9865836501121521,
+                            0.5714253783226013,
+                            0.8067981004714966,
+                            0.7140294909477234,
+                            0.5045593976974487,
+                            0.5837582349777222,
+                            0.5605868101119995,
+                            0.5316323041915894,
+                            1.5545496940612793,
+                            0.7744913101196289,
+                            0.8083890676498413,
+                            0.619972288608551
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0005827043205499649,
+                            0.00046368170296773314,
+                            0.0003405930183362216,
+                            0.0007429186953231692,
+                            0.00036982446908950806,
+                            0.00032228915370069444,
+                            0.00030829786555841565,
+                            0.0005591728840954602,
+                            0.0005513495416380465,
+                            0.0006021953886374831,
+                            0.0006528276135213673,
+                            0.0005915488582104445,
+                            0.0005453170160762966,
+                            0.000527423806488514,
+                            0.0005745518137700856,
+                            0.0005068411701358855
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            2.040813446044922,
+                            1.4618377685546875,
+                            1.260120153427124,
+                            1.6038473844528198,
+                            1.0135987997055054,
+                            1.7500097751617432,
+                            1.2394675016403198,
+                            1.400502324104309,
+                            1.9819272756576538,
+                            1.7130379676818848,
+                            1.7838449478149414,
+                            1.8809993267059326,
+                            0.643273115158081,
+                            1.2911701202392578,
+                            1.2370281219482422,
+                            1.6129753589630127
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 52,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 5,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "1": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 14,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 15,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 16,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 17,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 17,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 18,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 18,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 19,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "5": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 28,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 29,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "6": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 29,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 30,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "7": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 30,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 31,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "8": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 15,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 32,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "9": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 32,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 33,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "10": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 33,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 34,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "11": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 34,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 35,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "12": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 19,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 36,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "13": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 31,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 37,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "14": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 36,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": []
+            },
+            "15": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 36,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 38,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "16": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 39,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "17": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 39,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 40,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "18": {
+                "op_type": "<built-in method max of type object at 0x7fd865476560>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 42,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 43,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 44,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "19": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 44,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 45,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "20": {
+                "op_type": "<function softmax at 0x7fd863b841f0>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 45,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 46,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "21": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 46,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 47,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "22": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 48,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "23": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 48,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 49,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "24": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 49,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 50,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "25": {
+                "op_type": "<method 'reshape' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 50,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 51,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 52,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:0:self_attn:v_proj": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<built-in function linear>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 9,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.017885278910398483
+                        ],
+                        "zero_point": [
+                            124
+                        ]
+                    },
+                    {
+                        "id": 20,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.000445978861534968,
+                            0.0003330075996927917,
+                            0.00025916812592186034,
+                            0.00038120243698358536,
+                            0.00024034043599385768,
+                            0.00027592855622060597,
+                            0.0003112396807409823,
+                            0.0003448774223215878,
+                            0.0004228033940307796,
+                            0.00035102569381706417,
+                            0.0003638050111476332,
+                            0.00040324265137314796,
+                            0.00014413250028155744,
+                            0.00036997394636273384,
+                            0.00037873946712352335,
+                            0.0003710437740664929
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 21,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.v_proj.lora_A.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 23,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.005615917034447193
+                        ],
+                        "zero_point": [
+                            123
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.28679710626602173,
+                            0.3436911106109619,
+                            0.3373546898365021,
+                            0.38693225383758545,
+                            0.40539291501045227,
+                            0.3380838930606842,
+                            0.3377487063407898,
+                            0.35613951086997986,
+                            0.3338879346847534,
+                            0.3551289439201355,
+                            0.3355533480644226,
+                            0.3332350552082062,
+                            0.31778484582901,
+                            0.33874425292015076,
+                            0.3934788107872009,
+                            0.31613245606422424
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.005829033441841602,
+                            0.005098548252135515,
+                            0.005407379940152168,
+                            0.005123750306665897,
+                            0.005402757786214352,
+                            0.005312266293913126,
+                            0.0052151489071547985,
+                            0.005331622902303934
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            3.486785650253296,
+                            2.9095892906188965,
+                            2.9642391204833984,
+                            2.5844316482543945,
+                            2.466742753982544,
+                            2.95784592628479,
+                            2.9607810974121094,
+                            2.8078885078430176,
+                            2.9950170516967773,
+                            2.8158786296844482,
+                            2.980152130126953,
+                            3.000884771347046,
+                            3.1467833518981934,
+                            2.9520797729492188,
+                            2.541433095932007,
+                            3.1632308959960938
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 24,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            8.888718184607569e-06
+                        ],
+                        "zero_point": [
+                            102
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.v_proj.lora_B.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 24,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            8.888718184607569e-06
+                        ],
+                        "zero_point": [
+                            102
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.0011025747517123818,
+                            0.000735479814466089,
+                            0.0008849164005368948,
+                            0.0009793724166229367,
+                            0.001015718444250524,
+                            0.001038661110214889,
+                            0.0011344861704856157,
+                            0.0011412216117605567
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            906.9679565429688,
+                            1359.6566162109375,
+                            1130.05029296875,
+                            1021.0620727539062,
+                            984.5247802734375,
+                            962.7780151367188,
+                            881.456298828125,
+                            876.2540283203125
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 25,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 9,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 22,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Identity'>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.v_proj.lora_dropout.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 22,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 23,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 25,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 26,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'add_' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 21,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 26,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 27,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 27,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 28,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 28,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:0:self_attn:q_proj": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<built-in function linear>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 5,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.017885278910398483
+                        ],
+                        "zero_point": [
+                            124
+                        ]
+                    },
+                    {
+                        "id": 6,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0003197852347511798,
+                            0.00030044614686630666,
+                            0.00020690658129751682,
+                            0.0002973057562485337,
+                            0.0004652647185139358,
+                            0.0003272708272561431,
+                            0.00024269577988889068,
+                            0.0005545270978473127,
+                            0.00040261104004457593,
+                            0.00031847142963670194,
+                            0.00036206343793310225,
+                            0.00035386800300329924,
+                            0.00032511813333258033,
+                            0.0002959877601824701,
+                            0.00024614552967250347,
+                            0.0003854252281598747
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 7,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.q_proj.lora_A.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 9,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.005623312667012215
+                        ],
+                        "zero_point": [
+                            126
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.29099902510643005,
+                            0.3386078178882599,
+                            0.32760125398635864,
+                            0.34182459115982056,
+                            0.4200611710548401,
+                            0.3903130888938904,
+                            0.3466292917728424,
+                            0.35106194019317627,
+                            0.2913013994693756,
+                            0.3590526580810547,
+                            0.35574501752853394,
+                            0.36627793312072754,
+                            0.30216914415359497,
+                            0.33425426483154297,
+                            0.40649864077568054,
+                            0.3092302978038788
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.004347391426563263,
+                            0.0051337298937141895,
+                            0.005251043010503054,
+                            0.005544815678149462,
+                            0.005272769834846258,
+                            0.004840066656470299,
+                            0.005468212533742189,
+                            0.005701767280697823
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            3.4364378452301025,
+                            2.9532690048217773,
+                            3.0524914264678955,
+                            2.9254770278930664,
+                            2.380605697631836,
+                            2.5620458126068115,
+                            2.8849265575408936,
+                            2.8485000133514404,
+                            3.432870388031006,
+                            2.785106658935547,
+                            2.811002254486084,
+                            2.7301673889160156,
+                            3.3094048500061035,
+                            2.991734266281128,
+                            2.4600329399108887,
+                            3.2338356971740723
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 10,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            8.660711500851903e-06
+                        ],
+                        "zero_point": [
+                            122
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.q_proj.lora_B.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 10,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            8.660711500851903e-06
+                        ],
+                        "zero_point": [
+                            122
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.001275573275052011,
+                            0.0010749376378953457,
+                            0.0008703423663973808,
+                            0.0012038754066452384,
+                            0.0011535761877894402,
+                            0.0009325446444563568,
+                            0.0009438325650990009,
+                            0.0009104721248149872
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            783.961181640625,
+                            930.2864990234375,
+                            1148.97314453125,
+                            830.6506958007812,
+                            866.8694458007812,
+                            1072.334716796875,
+                            1059.5098876953125,
+                            1098.331298828125
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 11,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 5,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 8,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Identity'>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.q_proj.lora_dropout.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 8,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 9,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 11,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 12,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'add_' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 7,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 12,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 13,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.0.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 13,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 14,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 14,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:1": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.1",
+                "input_tensor_infos": [
+                    {
+                        "id": 62,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0003642447991296649
+                        ],
+                        "zero_point": [
+                            124
+                        ]
+                    },
+                    {
+                        "id": 110,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.00011278505553491414
+                        ],
+                        "zero_point": [
+                            131
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 111,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.1.fc1",
+                "input_tensor_infos": [
+                    {
+                        "id": 113,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.002465195721015334
+                        ],
+                        "zero_point": [
+                            116
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.06853945553302765,
+                            0.10416597872972488,
+                            0.12038654834032059,
+                            0.1323762983083725,
+                            0.12097253650426865,
+                            0.10555252432823181,
+                            0.10595230013132095,
+                            0.18730825185775757,
+                            0.14067092537879944,
+                            0.16711099445819855,
+                            0.11074528843164444,
+                            0.12620683014392853,
+                            0.11356709152460098,
+                            0.15941360592842102,
+                            0.14840248227119446,
+                            0.16470491886138916
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0017666078638285398,
+                            0.002689927350729704,
+                            0.0025342977605760098,
+                            0.0018005971796810627
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            14.590136528015137,
+                            9.60006332397461,
+                            8.306575775146484,
+                            7.554222583770752,
+                            8.266339302062988,
+                            9.473956108093262,
+                            9.438209533691406,
+                            5.3387932777404785,
+                            7.108789443969727,
+                            5.984046459197998,
+                            9.029729843139648,
+                            7.923501491546631,
+                            8.805367469787598,
+                            6.2729902267456055,
+                            6.738431453704834,
+                            6.0714640617370605
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 114,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "scale": [
+                            0.0012313323095440865
+                        ],
+                        "zero_point": [
+                            133
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.activation.ReLU'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.1.activation_fn",
+                "input_tensor_infos": [
+                    {
+                        "id": 114,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0012313323095440865
+                        ],
+                        "zero_point": [
+                            133
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 115,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.00031851575477048755
+                        ],
+                        "zero_point": [
+                            0
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "3": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.1.fc2",
+                "input_tensor_infos": [
+                    {
+                        "id": 115,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.00031851575477048755
+                        ],
+                        "zero_point": [
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.565930187702179,
+                            0.5698147416114807,
+                            0.5411486029624939,
+                            0.5345359444618225
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0006370212067849934,
+                            0.0002463286218699068,
+                            0.00031396516715176404,
+                            0.000519612047355622,
+                            0.0003065533528570086,
+                            0.0004140080709476024,
+                            0.00011627166531980038,
+                            0.00031617359491065145,
+                            0.0004251366190146655,
+                            0.0005555427633225918,
+                            0.00031467361259274185,
+                            0.00010011934500653297,
+                            0.0005672333645634353,
+                            0.00037621770752593875,
+                            0.00032224206370301545,
+                            0.00017085621948353946
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1.7670023441314697,
+                            1.7549563646316528,
+                            1.8479212522506714,
+                            1.870781660079956
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 116,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "4": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.1",
+                "input_tensor_infos": [
+                    {
+                        "id": 112,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0003685714618768543
+                        ],
+                        "zero_point": [
+                            125
+                        ]
+                    },
+                    {
+                        "id": 117,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            6.203278462635353e-05
+                        ],
+                        "zero_point": [
+                            98
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 118,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<class 'torch.nn.modules.normalization.LayerNorm'>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn_layer_norm",
+                "input_tensor_infos": [
+                    {
+                        "id": 62,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 63,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.1",
+                "input_tensor_infos": [
+                    {
+                        "id": 109,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 110,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1",
+                "input_tensor_infos": [
+                    {
+                        "id": 111,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": []
+            },
+            "3": {
+                "op_type": "<method 'reshape' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1",
+                "input_tensor_infos": [
+                    {
+                        "id": 111,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 112,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<class 'torch.nn.modules.normalization.LayerNorm'>",
+                "fqn": "base_model.model.model.decoder.layers.1.final_layer_norm",
+                "input_tensor_infos": [
+                    {
+                        "id": 112,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 113,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "5": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.1",
+                "input_tensor_infos": [
+                    {
+                        "id": 116,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 117,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "6": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1",
+                "input_tensor_infos": [
+                    {
+                        "id": 118,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 119,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 119,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:1:self_attn": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.k_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 67,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.0026635867543518543
+                        ],
+                        "zero_point": [
+                            124
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.13751402497291565,
+                            0.16165295243263245,
+                            0.13631494343280792,
+                            0.1561332792043686,
+                            0.18651795387268066,
+                            0.1500948667526245,
+                            0.1976248025894165,
+                            0.18973785638809204,
+                            0.14113600552082062,
+                            0.1779075264930725,
+                            0.15286338329315186,
+                            0.1429177224636078,
+                            0.13644066452980042,
+                            0.14966687560081482,
+                            0.16786669194698334,
+                            0.14095285534858704
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0027345705311745405,
+                            0.0014046697178855538,
+                            0.0017388592241331935,
+                            0.0025925124064087868,
+                            0.00205766293220222,
+                            0.0017343783983960748,
+                            0.00249765207991004,
+                            0.0024264284875243902,
+                            0.0024790402967482805,
+                            0.0011135643580928445,
+                            0.0009298875811509788,
+                            0.0015201057540252805,
+                            0.0019686089362949133,
+                            0.002474588342010975,
+                            0.001645392389036715,
+                            0.002263008849695325
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            7.2719855308532715,
+                            6.186091899871826,
+                            7.3359527587890625,
+                            6.404784202575684,
+                            5.361413955688477,
+                            6.6624531745910645,
+                            5.060093402862549,
+                            5.270429611206055,
+                            7.08536434173584,
+                            5.6208977699279785,
+                            6.5417890548706055,
+                            6.997032642364502,
+                            7.329193115234375,
+                            6.68150520324707,
+                            5.957108020782471,
+                            7.094571113586426
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 74,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<built-in method bmm of type object at 0x7fd865476560>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 93,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0008104040753096342
+                        ],
+                        "zero_point": [
+                            139
+                        ]
+                    },
+                    {
+                        "id": 96,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0018258652416989207
+                        ],
+                        "zero_point": [
+                            140
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 97,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 98,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0001867298415163532
+                        ],
+                        "zero_point": [
+                            124
+                        ]
+                    },
+                    {
+                        "id": 41,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            1.3344405750530544e+36
+                        ],
+                        "zero_point": [
+                            255
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 99,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "3": {
+                "op_type": "<built-in method bmm of type object at 0x7fd865476560>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 104,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.003919653594493866
+                        ],
+                        "zero_point": [
+                            0
+                        ]
+                    },
+                    {
+                        "id": 95,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0014409187715500593
+                        ],
+                        "zero_point": [
+                            141
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 105,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "4": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.out_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 108,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.0007428489625453949
+                        ],
+                        "zero_point": [
+                            107
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.757666289806366,
+                            0.5746235251426697,
+                            0.8885927200317383,
+                            0.7807335257530212,
+                            1.1450066566467285,
+                            0.41837745904922485,
+                            0.880587637424469,
+                            0.6206071376800537,
+                            0.6669229865074158,
+                            0.9862504601478577,
+                            0.4604479670524597,
+                            1.3671139478683472,
+                            1.458922266960144,
+                            1.123700737953186,
+                            0.8432158827781677,
+                            0.9627863764762878
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.00038053718162700534,
+                            0.0003275577910244465,
+                            0.00034619285725057125,
+                            0.0004894750891253352,
+                            0.0003191542054992169,
+                            0.0008626226335763931,
+                            0.00034072514972649515,
+                            0.0003294590278528631,
+                            0.00027665012748911977,
+                            0.0004014151345472783,
+                            0.00030781672103330493,
+                            0.0004544431285466999,
+                            0.00062305957544595,
+                            0.00045046041486784816,
+                            0.00035027528065256774,
+                            0.0005793329910375178
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1.3198422193527222,
+                            1.7402698993682861,
+                            1.1253750324249268,
+                            1.2808468341827393,
+                            0.8733574151992798,
+                            2.390186071395874,
+                            1.1356053352355957,
+                            1.6113252639770508,
+                            1.4994235038757324,
+                            1.0139412879943848,
+                            2.17179799079895,
+                            0.7314679026603699,
+                            0.6854374408721924,
+                            0.8899165987968445,
+                            1.1859359741210938,
+                            1.0386520624160767
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 109,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 63,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "1": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 72,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 73,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 74,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 75,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 75,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 76,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 76,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 77,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "5": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 86,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 87,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "6": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 87,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 88,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "7": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 88,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 89,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "8": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 73,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 90,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "9": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 90,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 91,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "10": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 91,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 92,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "11": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 92,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 93,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "12": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 77,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 94,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "13": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 89,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 95,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "14": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 94,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": []
+            },
+            "15": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 94,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 96,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "16": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 97,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "17": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 97,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 98,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "18": {
+                "op_type": "<built-in method max of type object at 0x7fd865476560>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 99,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 100,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 101,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "19": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 101,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 102,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "20": {
+                "op_type": "<function softmax at 0x7fd863b841f0>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 102,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 103,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "21": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 103,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 104,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "22": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 105,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "23": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 105,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 106,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "24": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 106,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 107,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "25": {
+                "op_type": "<method 'reshape' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 107,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 108,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 109,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:1:self_attn:v_proj": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<built-in function linear>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 67,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.017186878249049187
+                        ],
+                        "zero_point": [
+                            135
+                        ]
+                    },
+                    {
+                        "id": 78,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.00031020285678096116,
+                            0.0004165052669122815,
+                            0.00037859598523937166,
+                            0.00035231083165854216,
+                            0.00037992530269548297,
+                            0.00043818255653604865,
+                            0.00021832078346051276,
+                            0.00030567412613891065,
+                            0.00029989739414304495,
+                            0.000358005752786994,
+                            0.0003350492916069925,
+                            0.00027488538762554526,
+                            0.00041011988651007414,
+                            0.00020044640405103564,
+                            0.0002536431129556149,
+                            0.00024024673621170223
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 79,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.v_proj.lora_A.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 81,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.005631454288959503
+                        ],
+                        "zero_point": [
+                            129
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.31396907567977905,
+                            0.3419976234436035,
+                            0.32946449518203735,
+                            0.3809604346752167,
+                            0.4369880259037018,
+                            0.37603235244750977,
+                            0.38991811871528625,
+                            0.3634071350097656,
+                            0.3446651101112366,
+                            0.35563480854034424,
+                            0.3299185335636139,
+                            0.36610135436058044,
+                            0.33558520674705505,
+                            0.3683687448501587,
+                            0.39420658349990845,
+                            0.3463117778301239
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.005256906151771545,
+                            0.005702690687030554,
+                            0.00408707931637764,
+                            0.0055601755157113075,
+                            0.0051085250452160835,
+                            0.005457798019051552,
+                            0.005484873428940773,
+                            0.00546643789857626
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            3.1850271224975586,
+                            2.923996925354004,
+                            3.0352284908294678,
+                            2.6249446868896484,
+                            2.2883923053741455,
+                            2.6593456268310547,
+                            2.564640998840332,
+                            2.751734495162964,
+                            2.9013671875,
+                            2.811873197555542,
+                            3.0310513973236084,
+                            2.7314839363098145,
+                            2.9798693656921387,
+                            2.7146711349487305,
+                            2.536741018295288,
+                            2.8875715732574463
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 82,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            9.595456504030153e-06
+                        ],
+                        "zero_point": [
+                            122
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.v_proj.lora_B.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 82,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            9.595456504030153e-06
+                        ],
+                        "zero_point": [
+                            122
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.0012220613425597548,
+                            0.0009459510329179466,
+                            0.0010422103805467486,
+                            0.0007835418218746781,
+                            0.0009632350993342698,
+                            0.0009702403913252056,
+                            0.0007845996296964586,
+                            0.0009382423013448715
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            818.28955078125,
+                            1057.13720703125,
+                            959.4991455078125,
+                            1276.2559814453125,
+                            1038.1680908203125,
+                            1030.67236328125,
+                            1274.535400390625,
+                            1065.82275390625
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 83,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 67,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 80,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Identity'>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.v_proj.lora_dropout.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 80,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 81,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 83,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 84,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'add_' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 79,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 84,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 85,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 85,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 86,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 86,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:1:self_attn:q_proj": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<built-in function linear>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 63,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.017186878249049187
+                        ],
+                        "zero_point": [
+                            135
+                        ]
+                    },
+                    {
+                        "id": 64,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.00033155985875055194,
+                            0.00036077588447369635,
+                            0.00045065759331919253,
+                            0.00041911558946594596,
+                            0.0003609144187066704,
+                            0.00024745348491705954,
+                            0.000459514296380803,
+                            0.00030642913770861924,
+                            0.00029080515378154814,
+                            0.0002998409909196198,
+                            0.00018764213018584996,
+                            0.0003062607138417661,
+                            0.00033330157748423517,
+                            0.00037817415432073176,
+                            0.0002834920596797019,
+                            0.00019271073688287288
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 65,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.q_proj.lora_A.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 67,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.00550608616322279
+                        ],
+                        "zero_point": [
+                            129
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.2970189154148102,
+                            0.3477560579776764,
+                            0.33492574095726013,
+                            0.3764011263847351,
+                            0.37040868401527405,
+                            0.38529539108276367,
+                            0.38419976830482483,
+                            0.3488285541534424,
+                            0.3366585075855255,
+                            0.33536985516548157,
+                            0.31619206070899963,
+                            0.3598238527774811,
+                            0.3590928614139557,
+                            0.29943913221359253,
+                            0.3960738480091095,
+                            0.31984657049179077
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.004763101227581501,
+                            0.00539481732994318,
+                            0.005577226169407368,
+                            0.005154943559318781,
+                            0.0042894682846963406,
+                            0.005435059778392315,
+                            0.005302493926137686,
+                            0.00512793380767107
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            3.366788864135742,
+                            2.8755788803100586,
+                            2.985736608505249,
+                            2.656740188598633,
+                            2.699720621109009,
+                            2.5954113006591797,
+                            2.6028125286102295,
+                            2.8667376041412354,
+                            2.9703688621520996,
+                            2.9817826747894287,
+                            3.1626346111297607,
+                            2.77913761138916,
+                            2.784795045852661,
+                            3.3395769596099854,
+                            2.5247817039489746,
+                            3.1264989376068115
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 68,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            9.067955943464767e-06
+                        ],
+                        "zero_point": [
+                            130
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.q_proj.lora_B.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 68,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            9.067955943464767e-06
+                        ],
+                        "zero_point": [
+                            130
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.0009543295600451529,
+                            0.000984062673524022,
+                            0.001070799888111651,
+                            0.0009884184692054987,
+                            0.0010192039189860225,
+                            0.0009609365952201188,
+                            0.0008825542754493654,
+                            0.0008479927200824022
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1047.8560791015625,
+                            1016.1954956054688,
+                            933.88134765625,
+                            1011.71728515625,
+                            981.1578979492188,
+                            1040.6513671875,
+                            1133.0748291015625,
+                            1179.25537109375
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 69,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 63,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 66,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Identity'>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.q_proj.lora_dropout.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 66,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 67,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 69,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 70,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'add_' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 65,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 70,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 71,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.1.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 71,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 72,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 72,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:2": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.2",
+                "input_tensor_infos": [
+                    {
+                        "id": 119,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0003836948308162391
+                        ],
+                        "zero_point": [
+                            119
+                        ]
+                    },
+                    {
+                        "id": 167,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.00010558274516370147
+                        ],
+                        "zero_point": [
+                            150
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 168,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.2.fc1",
+                "input_tensor_infos": [
+                    {
+                        "id": 170,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0025296418461948633
+                        ],
+                        "zero_point": [
+                            117
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.09582631289958954,
+                            0.1445355862379074,
+                            0.082494355738163,
+                            0.14328081905841827,
+                            0.11226029694080353,
+                            0.08192360401153564,
+                            0.11447090655565262,
+                            0.1828862428665161,
+                            0.1200239434838295,
+                            0.14438803493976593,
+                            0.0708846002817154,
+                            0.1267455518245697,
+                            0.12479699403047562,
+                            0.14254459738731384,
+                            0.12599679827690125,
+                            0.17459845542907715
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.002273815916851163,
+                            0.002167364116758108,
+                            0.0027308000717312098,
+                            0.002338568912819028
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            10.435546875,
+                            6.9187116622924805,
+                            12.122041702270508,
+                            6.979301452636719,
+                            8.907868385314941,
+                            12.206494331359863,
+                            8.735844612121582,
+                            5.467879772186279,
+                            8.331670761108398,
+                            6.925781726837158,
+                            14.107436180114746,
+                            7.8898234367370605,
+                            8.013012886047363,
+                            7.015348434448242,
+                            7.936709403991699,
+                            5.7274274826049805
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 171,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "scale": [
+                            0.0012088003568351269
+                        ],
+                        "zero_point": [
+                            144
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.activation.ReLU'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.2.activation_fn",
+                "input_tensor_infos": [
+                    {
+                        "id": 171,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0012088003568351269
+                        ],
+                        "zero_point": [
+                            144
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 172,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.00028806543559767306
+                        ],
+                        "zero_point": [
+                            0
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "3": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.2.fc2",
+                "input_tensor_infos": [
+                    {
+                        "id": 172,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.00028806543559767306
+                        ],
+                        "zero_point": [
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.6897866725921631,
+                            0.5229077339172363,
+                            0.5212884545326233,
+                            0.5749416947364807
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0005493504577316344,
+                            0.0002844150294549763,
+                            0.0003237436758354306,
+                            0.0003664802643470466,
+                            0.00019999578944407403,
+                            0.0005761217325925827,
+                            0.0002001346874749288,
+                            0.0002735615416895598,
+                            0.0005581346922554076,
+                            0.0003144122601952404,
+                            0.0005320287891663611,
+                            0.00024963842588476837,
+                            0.0002453761699143797,
+                            0.00031408341601490974,
+                            0.0005700319889001548,
+                            0.00040268656448461115
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1.4497236013412476,
+                            1.9123833179473877,
+                            1.9183236360549927,
+                            1.7393068075180054
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 173,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "4": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.2",
+                "input_tensor_infos": [
+                    {
+                        "id": 169,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.00036479582195170224
+                        ],
+                        "zero_point": [
+                            129
+                        ]
+                    },
+                    {
+                        "id": 174,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            4.296861152397469e-05
+                        ],
+                        "zero_point": [
+                            132
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 175,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<class 'torch.nn.modules.normalization.LayerNorm'>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn_layer_norm",
+                "input_tensor_infos": [
+                    {
+                        "id": 119,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 120,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.2",
+                "input_tensor_infos": [
+                    {
+                        "id": 166,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 167,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2",
+                "input_tensor_infos": [
+                    {
+                        "id": 168,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": []
+            },
+            "3": {
+                "op_type": "<method 'reshape' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2",
+                "input_tensor_infos": [
+                    {
+                        "id": 168,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 169,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<class 'torch.nn.modules.normalization.LayerNorm'>",
+                "fqn": "base_model.model.model.decoder.layers.2.final_layer_norm",
+                "input_tensor_infos": [
+                    {
+                        "id": 169,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 170,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "5": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.2",
+                "input_tensor_infos": [
+                    {
+                        "id": 173,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 174,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "6": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2",
+                "input_tensor_infos": [
+                    {
+                        "id": 175,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 176,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 176,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:2:self_attn": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.k_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 124,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.002619683276861906
+                        ],
+                        "zero_point": [
+                            124
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.09771496802568436,
+                            0.12890088558197021,
+                            0.14113005995750427,
+                            0.14548565447330475,
+                            0.21194416284561157,
+                            0.16805870831012726,
+                            0.16498667001724243,
+                            0.19031284749507904,
+                            0.15200883150100708,
+                            0.13688746094703674,
+                            0.16504071652889252,
+                            0.15595300495624542,
+                            0.16503247618675232,
+                            0.15158624947071075,
+                            0.1464781016111374,
+                            0.11796882748603821
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.001982156652957201,
+                            0.0024204144719988108,
+                            0.0021105222404003143,
+                            0.0025380882434546947,
+                            0.0020559560507535934,
+                            0.0019766828045248985,
+                            0.0020437673665583134,
+                            0.002261366695165634,
+                            0.0019714212976396084,
+                            0.0024409634061157703,
+                            0.0017740943003445864,
+                            0.0015419897390529513,
+                            0.0021045394241809845,
+                            0.0018249802524223924,
+                            0.0027011858765035868,
+                            0.0017819146160036325
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            10.233846664428711,
+                            7.757898807525635,
+                            7.085662841796875,
+                            6.87352991104126,
+                            4.718224048614502,
+                            5.950301647186279,
+                            6.061095714569092,
+                            5.2545061111450195,
+                            6.5785651206970215,
+                            7.305271148681641,
+                            6.05911111831665,
+                            6.4121880531311035,
+                            6.059413433074951,
+                            6.596904754638672,
+                            6.826959133148193,
+                            8.476815223693848
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 131,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<built-in method bmm of type object at 0x7fd865476560>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 150,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0008205320918932557
+                        ],
+                        "zero_point": [
+                            131
+                        ]
+                    },
+                    {
+                        "id": 153,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0017587682232260704
+                        ],
+                        "zero_point": [
+                            123
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 154,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 155,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.00016366671479772776
+                        ],
+                        "zero_point": [
+                            134
+                        ]
+                    },
+                    {
+                        "id": 41,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            1.3344405750530544e+36
+                        ],
+                        "zero_point": [
+                            255
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 156,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "3": {
+                "op_type": "<built-in method bmm of type object at 0x7fd865476560>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 161,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.003919653594493866
+                        ],
+                        "zero_point": [
+                            0
+                        ]
+                    },
+                    {
+                        "id": 152,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0014430396258831024
+                        ],
+                        "zero_point": [
+                            128
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 162,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "4": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.out_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 165,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.0006827609031461179
+                        ],
+                        "zero_point": [
+                            148
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.8441921472549438,
+                            0.6734631061553955,
+                            0.7215516567230225,
+                            1.250807523727417,
+                            1.2269479036331177,
+                            0.7070020437240601,
+                            0.7414212822914124,
+                            1.0293961763381958,
+                            1.3132224082946777,
+                            0.6262384057044983,
+                            1.1616765260696411,
+                            1.2901126146316528,
+                            0.5255112051963806,
+                            0.8619773983955383,
+                            1.1792985200881958,
+                            0.7246308326721191
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.00037930699181742966,
+                            0.00014124519657343626,
+                            0.0004108152643311769,
+                            0.0003173276490997523,
+                            0.0003424036840442568,
+                            0.0003925739147234708,
+                            0.0004336285637691617,
+                            0.0007942286320030689,
+                            0.0003078484733123332,
+                            0.00036415716749615967,
+                            0.0002718089963309467,
+                            0.0005587885971181095,
+                            0.0005712821148335934,
+                            0.00039936500252224505,
+                            0.0004900978528894484,
+                            0.0006824227748438716
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1.184564471244812,
+                            1.4848623275756836,
+                            1.3859021663665771,
+                            0.7994834780693054,
+                            0.8150305151939392,
+                            1.4144231081008911,
+                            1.3487608432769775,
+                            0.971443235874176,
+                            0.7614856362342834,
+                            1.596835970878601,
+                            0.8608248233795166,
+                            0.7751261591911316,
+                            1.9029089212417603,
+                            1.1601232290267944,
+                            0.8479617238044739,
+                            1.380013108253479
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 166,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 120,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "1": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 129,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 130,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 131,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 132,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 132,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 133,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 133,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 134,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "5": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 143,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 144,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "6": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 144,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 145,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "7": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 145,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 146,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "8": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 130,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 147,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "9": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 147,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 148,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "10": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 148,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 149,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "11": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 149,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 150,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "12": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 134,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 151,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "13": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 146,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 152,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "14": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 151,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": []
+            },
+            "15": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 151,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 153,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "16": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 154,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "17": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 154,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 155,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "18": {
+                "op_type": "<built-in method max of type object at 0x7fd865476560>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 156,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 157,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 158,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "19": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 158,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 159,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "20": {
+                "op_type": "<function softmax at 0x7fd863b841f0>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 159,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 160,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "21": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 160,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 161,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "22": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 162,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "23": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 162,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 163,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "24": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 163,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 164,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "25": {
+                "op_type": "<method 'reshape' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 164,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 165,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 166,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:2:self_attn:v_proj": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<built-in function linear>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 124,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.01775776967406273
+                        ],
+                        "zero_point": [
+                            131
+                        ]
+                    },
+                    {
+                        "id": 135,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.00030197048909030855,
+                            0.0004389838723000139,
+                            0.00032464342075400054,
+                            0.0003311963810119778,
+                            0.00030156190041452646,
+                            0.0001560249220347032,
+                            0.00030205087387003005,
+                            0.00030933573725633323,
+                            0.0002607844944577664,
+                            0.00043516099685803056,
+                            0.0003899929579347372,
+                            0.00030722690280526876,
+                            0.0004186415462754667,
+                            0.0003425968752708286,
+                            0.00033600343158468604,
+                            0.00032240492873825133
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 136,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.v_proj.lora_A.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 138,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.005629746709018946
+                        ],
+                        "zero_point": [
+                            131
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.3171026110649109,
+                            0.33609408140182495,
+                            0.30538907647132874,
+                            0.3807145655155182,
+                            0.44476813077926636,
+                            0.38373881578445435,
+                            0.3624895513057709,
+                            0.4135953187942505,
+                            0.32436317205429077,
+                            0.3506500720977783,
+                            0.3330007791519165,
+                            0.36063969135284424,
+                            0.3561692535877228,
+                            0.38044002652168274,
+                            0.3623844385147095,
+                            0.3346044421195984
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.005229481495916843,
+                            0.005087756551802158,
+                            0.005477722734212875,
+                            0.005216663237661123,
+                            0.004781876225024462,
+                            0.005361112765967846,
+                            0.005782869178801775,
+                            0.005476581398397684
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            3.1535534858703613,
+                            2.9753575325012207,
+                            3.2745113372802734,
+                            2.6266398429870605,
+                            2.2483625411987305,
+                            2.6059391498565674,
+                            2.758700132369995,
+                            2.4178223609924316,
+                            3.0829641819000244,
+                            2.8518459796905518,
+                            3.0029959678649902,
+                            2.772850751876831,
+                            2.8076539039611816,
+                            2.628535270690918,
+                            2.75950026512146,
+                            2.988603353500366
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 139,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            8.981955943454523e-06
+                        ],
+                        "zero_point": [
+                            130
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.v_proj.lora_B.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 139,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            8.981955943454523e-06
+                        ],
+                        "zero_point": [
+                            130
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.0008957073441706598,
+                            0.0009056724957190454,
+                            0.001112349214963615,
+                            0.0009625894017517567,
+                            0.0008586375624872744,
+                            0.0008882852271199226,
+                            0.0011550098424777389,
+                            0.001077544060535729
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1116.43603515625,
+                            1104.15185546875,
+                            898.998291015625,
+                            1038.8646240234375,
+                            1164.6357421875,
+                            1125.7645263671875,
+                            865.79345703125,
+                            928.0363159179688
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 140,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 124,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 137,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Identity'>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.v_proj.lora_dropout.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 137,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 138,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 140,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 141,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'add_' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 136,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 141,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 142,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 142,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 143,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 143,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:2:self_attn:q_proj": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<built-in function linear>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 120,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.01775776967406273
+                        ],
+                        "zero_point": [
+                            131
+                        ]
+                    },
+                    {
+                        "id": 121,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0003854333481285721,
+                            0.0002002624241868034,
+                            0.0002693389542400837,
+                            0.000398365780711174,
+                            0.00018276108312420547,
+                            0.0001984609116334468,
+                            0.00047398984315805137,
+                            0.00019450885883998126,
+                            0.0002813297032844275,
+                            0.00027399969985708594,
+                            0.000275527621852234,
+                            0.0002677437150850892,
+                            0.0002750060230027884,
+                            0.0003327021258883178,
+                            0.00046579609625041485,
+                            0.000409616157412529
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 122,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.q_proj.lora_A.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 124,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.005674399435520172
+                        ],
+                        "zero_point": [
+                            130
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.31798771023750305,
+                            0.34436145424842834,
+                            0.3208657205104828,
+                            0.34916648268699646,
+                            0.4356110095977783,
+                            0.3874937891960144,
+                            0.37093812227249146,
+                            0.38457533717155457,
+                            0.31679439544677734,
+                            0.3490495979785919,
+                            0.33288922905921936,
+                            0.36058124899864197,
+                            0.34836524724960327,
+                            0.38592469692230225,
+                            0.35887610912323,
+                            0.34950464963912964
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.005451584700495005,
+                            0.005285411607474089,
+                            0.005357700865715742,
+                            0.005486763082444668,
+                            0.005549744237214327,
+                            0.0057990108616650105,
+                            0.005492669530212879,
+                            0.0054483977146446705
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            3.144775629043579,
+                            2.9039254188537598,
+                            3.1165685653686523,
+                            2.8639633655548096,
+                            2.295626163482666,
+                            2.580686569213867,
+                            2.6958673000335693,
+                            2.6002707481384277,
+                            3.1566214561462402,
+                            2.864922285079956,
+                            3.004002332687378,
+                            2.7732999324798584,
+                            2.8705503940582275,
+                            2.591179132461548,
+                            2.7864768505096436,
+                            2.861192226409912
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 125,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            9.729664270707872e-06
+                        ],
+                        "zero_point": [
+                            144
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.q_proj.lora_B.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 125,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            9.729664270707872e-06
+                        ],
+                        "zero_point": [
+                            144
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.0008135975222103298,
+                            0.0010519020725041628,
+                            0.0010809052037075162,
+                            0.0010481273056939244,
+                            0.0010143211111426353,
+                            0.0009273464675061405,
+                            0.0009341556578874588,
+                            0.0007129005971364677
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1229.1090087890625,
+                            950.6588745117188,
+                            925.1505126953125,
+                            954.0825805664062,
+                            985.8810424804688,
+                            1078.3455810546875,
+                            1070.4854736328125,
+                            1402.7200927734375
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 126,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 120,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 123,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Identity'>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.q_proj.lora_dropout.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 123,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 124,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 126,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 127,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'add_' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 122,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 127,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 128,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.2.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 128,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 129,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 129,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:3": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.3",
+                "input_tensor_infos": [
+                    {
+                        "id": 176,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.00037621482624672353
+                        ],
+                        "zero_point": [
+                            122
+                        ]
+                    },
+                    {
+                        "id": 224,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            7.752325473120436e-05
+                        ],
+                        "zero_point": [
+                            125
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 225,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.3.fc1",
+                "input_tensor_infos": [
+                    {
+                        "id": 227,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0023776839952915907
+                        ],
+                        "zero_point": [
+                            133
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.07985354959964752,
+                            0.16083626449108124,
+                            0.08333823829889297,
+                            0.14618489146232605,
+                            0.1407022327184677,
+                            0.16764311492443085,
+                            0.1410415768623352,
+                            0.14249786734580994,
+                            0.16607335209846497,
+                            0.13353237509727478,
+                            0.08218377828598022,
+                            0.13758495450019836,
+                            0.10985197126865387,
+                            0.15455026924610138,
+                            0.164423406124115,
+                            0.11410682648420334
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0024586771614849567,
+                            0.0020242577884346247,
+                            0.001968280179426074,
+                            0.00247385841794312
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            12.522924423217773,
+                            6.217503070831299,
+                            11.99929428100586,
+                            6.840651988983154,
+                            7.107207775115967,
+                            5.965052604675293,
+                            7.0901079177856445,
+                            7.017648696899414,
+                            6.021435737609863,
+                            7.488821029663086,
+                            12.167851448059082,
+                            7.2682366371154785,
+                            9.10315990447998,
+                            6.470386505126953,
+                            6.081859111785889,
+                            8.763717651367188
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 228,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "scale": [
+                            0.0015002207364887
+                        ],
+                        "zero_point": [
+                            138
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.activation.ReLU'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.3.activation_fn",
+                "input_tensor_infos": [
+                    {
+                        "id": 228,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0015002207364887
+                        ],
+                        "zero_point": [
+                            138
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 229,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.00033306205295957625
+                        ],
+                        "zero_point": [
+                            0
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "3": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.3.fc2",
+                "input_tensor_infos": [
+                    {
+                        "id": 229,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.00033306205295957625
+                        ],
+                        "zero_point": [
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.4834460914134979,
+                            0.7404611110687256,
+                            0.5941412448883057,
+                            0.4437545835971832
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0004820904287043959,
+                            0.0004664862062782049,
+                            0.00027692882576957345,
+                            0.00027292605955153704,
+                            0.00012331640755292028,
+                            0.0004861857451032847,
+                            0.0005612521199509501,
+                            0.0002444250858388841,
+                            0.0005028269370086491,
+                            0.00029398142942227423,
+                            0.0004208429018035531,
+                            0.00038634889642708004,
+                            0.000549447606317699,
+                            0.0006100540049374104,
+                            0.000666111649479717,
+                            0.000535536149982363
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            2.0684828758239746,
+                            1.3505098819732666,
+                            1.6831014156341553,
+                            2.253497838973999
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 230,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "4": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.3",
+                "input_tensor_infos": [
+                    {
+                        "id": 226,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.00034788335324265063
+                        ],
+                        "zero_point": [
+                            132
+                        ]
+                    },
+                    {
+                        "id": 231,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            5.832596798427403e-05
+                        ],
+                        "zero_point": [
+                            118
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 232,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<class 'torch.nn.modules.normalization.LayerNorm'>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn_layer_norm",
+                "input_tensor_infos": [
+                    {
+                        "id": 176,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 177,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.3",
+                "input_tensor_infos": [
+                    {
+                        "id": 223,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 224,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3",
+                "input_tensor_infos": [
+                    {
+                        "id": 225,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": []
+            },
+            "3": {
+                "op_type": "<method 'reshape' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3",
+                "input_tensor_infos": [
+                    {
+                        "id": 225,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 226,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<class 'torch.nn.modules.normalization.LayerNorm'>",
+                "fqn": "base_model.model.model.decoder.layers.3.final_layer_norm",
+                "input_tensor_infos": [
+                    {
+                        "id": 226,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 227,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "5": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.3",
+                "input_tensor_infos": [
+                    {
+                        "id": 230,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 231,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "6": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3",
+                "input_tensor_infos": [
+                    {
+                        "id": 232,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 233,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 233,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:3:self_attn": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.k_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 181,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.0027480784337967634
+                        ],
+                        "zero_point": [
+                            132
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.15305233001708984,
+                            0.12225167453289032,
+                            0.14288797974586487,
+                            0.13328514993190765,
+                            0.1680242121219635,
+                            0.14797469973564148,
+                            0.1489410549402237,
+                            0.1615593284368515,
+                            0.1862698644399643,
+                            0.11918998509645462,
+                            0.14913791418075562,
+                            0.1354290097951889,
+                            0.14345955848693848,
+                            0.15483605861663818,
+                            0.15613141655921936,
+                            0.11631940305233002
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.001720090745948255,
+                            0.0024139657616615295,
+                            0.0019263529684394598,
+                            0.0019102180376648903,
+                            0.001767429057508707,
+                            0.0020019139628857374,
+                            0.0013569797156378627,
+                            0.0020858272910118103,
+                            0.001683894544839859,
+                            0.0018227207474410534,
+                            0.0013690086780115962,
+                            0.0025352758821099997,
+                            0.0028395988047122955,
+                            0.0017747465753927827,
+                            0.002007675589993596,
+                            0.0024633959401398897
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            6.533712863922119,
+                            8.17984676361084,
+                            6.998489856719971,
+                            7.502711296081543,
+                            5.951523303985596,
+                            6.757911682128906,
+                            6.7140655517578125,
+                            6.189676761627197,
+                            5.368554592132568,
+                            8.38996696472168,
+                            6.705203056335449,
+                            7.383942604064941,
+                            6.970605850219727,
+                            6.458444118499756,
+                            6.404860973358154,
+                            8.597018241882324
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 188,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<built-in method bmm of type object at 0x7fd865476560>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 207,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0007488401024602354
+                        ],
+                        "zero_point": [
+                            120
+                        ]
+                    },
+                    {
+                        "id": 210,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.001735710073262453
+                        ],
+                        "zero_point": [
+                            115
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 211,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 212,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0001701119472272694
+                        ],
+                        "zero_point": [
+                            123
+                        ]
+                    },
+                    {
+                        "id": 41,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            1.3344405750530544e+36
+                        ],
+                        "zero_point": [
+                            255
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 213,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "3": {
+                "op_type": "<built-in method bmm of type object at 0x7fd865476560>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 218,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.003919653594493866
+                        ],
+                        "zero_point": [
+                            0
+                        ]
+                    },
+                    {
+                        "id": 209,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0015115730930119753
+                        ],
+                        "zero_point": [
+                            113
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 219,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "4": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.out_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 222,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.0006200977368280292
+                        ],
+                        "zero_point": [
+                            113
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.6995439529418945,
+                            0.6177797913551331,
+                            0.7721221446990967,
+                            0.6627070903778076,
+                            1.6795364618301392,
+                            0.7636286020278931,
+                            0.4094974994659424,
+                            0.8874781727790833,
+                            0.9102892875671387,
+                            1.1451750993728638,
+                            1.4108965396881104,
+                            0.7052625417709351,
+                            0.9994568228721619,
+                            1.100576400756836,
+                            1.8541733026504517,
+                            1.0649280548095703
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.00044207024620845914,
+                            0.0003401314897928387,
+                            0.0002646299544721842,
+                            0.0005266364896669984,
+                            0.0006902840686962008,
+                            0.0004130130400881171,
+                            0.0005498980171978474,
+                            0.0004560376692097634,
+                            0.0004986900603398681,
+                            0.0003417898842599243,
+                            0.0004254740779288113,
+                            0.00041787157533690333,
+                            0.00043513832497410476,
+                            0.00033653207356110215,
+                            0.0003935080021619797,
+                            0.0005104956217110157
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1.4295027256011963,
+                            1.6186997890472412,
+                            1.2951319217681885,
+                            1.5089622735977173,
+                            0.5954023599624634,
+                            1.3095371723175049,
+                            2.4420173168182373,
+                            1.1267882585525513,
+                            1.098551869392395,
+                            0.873228907585144,
+                            0.7087692022323608,
+                            1.4179116487503052,
+                            1.000543475151062,
+                            0.9086148142814636,
+                            0.5393239259719849,
+                            0.9390305876731873
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 223,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 177,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "1": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 186,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 187,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 188,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 189,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 189,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 190,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 190,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 191,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "5": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 200,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 201,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "6": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 201,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 202,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "7": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 202,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 203,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "8": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 187,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 204,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "9": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 204,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 205,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "10": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 205,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 206,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "11": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 206,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 207,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "12": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 191,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 208,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "13": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 203,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 209,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "14": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 208,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": []
+            },
+            "15": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 208,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 210,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "16": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 211,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "17": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 211,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 212,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "18": {
+                "op_type": "<built-in method max of type object at 0x7fd865476560>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 213,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 214,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 215,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "19": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 215,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 216,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "20": {
+                "op_type": "<function softmax at 0x7fd863b841f0>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 216,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 217,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "21": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 217,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 218,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "22": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 219,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "23": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 219,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 220,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "24": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 220,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 221,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "25": {
+                "op_type": "<method 'reshape' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 221,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 222,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 223,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:3:self_attn:v_proj": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<built-in function linear>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 181,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.01776798442006111
+                        ],
+                        "zero_point": [
+                            133
+                        ]
+                    },
+                    {
+                        "id": 192,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0003324671706650406,
+                            0.00022315105888992548,
+                            0.0002932818024419248,
+                            0.00030679896008223295,
+                            0.0002626084315124899,
+                            0.00043297093361616135,
+                            0.00032347848173230886,
+                            0.0003563085338100791,
+                            0.0002481463016010821,
+                            0.0003380977432243526,
+                            0.0003385223390068859,
+                            0.0002827795979101211,
+                            0.0001963942195288837,
+                            0.00028943148208782077,
+                            0.0003743217675946653,
+                            0.00036169850500300527
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 193,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.v_proj.lora_A.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 195,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.005664766300469637
+                        ],
+                        "zero_point": [
+                            134
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.32095256447792053,
+                            0.330251008272171,
+                            0.20045620203018188,
+                            0.36967453360557556,
+                            0.3964439630508423,
+                            0.36228129267692566,
+                            0.39195728302001953,
+                            0.38848578929901123,
+                            0.3446809649467468,
+                            0.34981322288513184,
+                            0.2928933799266815,
+                            0.361460417509079,
+                            0.348776251077652,
+                            0.38512811064720154,
+                            0.39673128724098206,
+                            0.27381011843681335
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.004655179567635059,
+                            0.005229951348155737,
+                            0.005511950235813856,
+                            0.004998452961444855,
+                            0.00595475547015667,
+                            0.0053747366182506084,
+                            0.005163250025361776,
+                            0.00517900800332427
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            3.11572527885437,
+                            3.0279998779296875,
+                            4.988620758056641,
+                            2.7050821781158447,
+                            2.5224244594573975,
+                            2.7602858543395996,
+                            2.5512983798980713,
+                            2.5740966796875,
+                            2.901233673095703,
+                            2.858668565750122,
+                            3.4142115116119385,
+                            2.766554594039917,
+                            2.8671674728393555,
+                            2.596538543701172,
+                            2.5205979347229004,
+                            3.6521661281585693
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 196,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            9.392422725795768e-06
+                        ],
+                        "zero_point": [
+                            128
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.v_proj.lora_B.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 196,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            9.392422725795768e-06
+                        ],
+                        "zero_point": [
+                            128
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.0008344786474481225,
+                            0.0009075241396203637,
+                            0.001010999782010913,
+                            0.001130994874984026,
+                            0.0009487126371823251,
+                            0.0008356202160939574,
+                            0.0011216377606615424,
+                            0.001240230049006641
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1198.35302734375,
+                            1101.8990478515625,
+                            989.119873046875,
+                            884.1773071289062,
+                            1054.0599365234375,
+                            1196.7159423828125,
+                            891.553466796875,
+                            806.302001953125
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 197,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 181,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 194,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Identity'>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.v_proj.lora_dropout.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 194,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 195,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 197,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 198,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'add_' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 193,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 198,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 199,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 199,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 200,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 200,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:3:self_attn:q_proj": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<built-in function linear>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 177,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.01776798442006111
+                        ],
+                        "zero_point": [
+                            133
+                        ]
+                    },
+                    {
+                        "id": 178,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.00032423349330201745,
+                            0.000316593621391803,
+                            0.000278401275863871,
+                            0.0004554924671538174,
+                            0.0003137265157420188,
+                            0.0002551494399085641,
+                            0.00033229144173674285,
+                            0.00037298008101060987,
+                            0.00018664839444682002,
+                            0.00038735457928851247,
+                            0.00029549546889029443,
+                            0.0003679211949929595,
+                            0.0003933409752789885,
+                            0.000295131525490433,
+                            0.00048211432294920087,
+                            0.00038136306102387607
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 179,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.q_proj.lora_A.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 181,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.005755296908318996
+                        ],
+                        "zero_point": [
+                            130
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.31676962971687317,
+                            0.34011387825012207,
+                            0.33628129959106445,
+                            0.3565284311771393,
+                            0.4056679308414459,
+                            0.3763771653175354,
+                            0.3095501661300659,
+                            0.3858562707901001,
+                            0.3497072458267212,
+                            0.337135910987854,
+                            0.3139989972114563,
+                            0.3360980153083801,
+                            0.3577878177165985,
+                            0.3620110750198364,
+                            0.397707998752594,
+                            0.31274789571762085
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0053594219498336315,
+                            0.005633404012769461,
+                            0.005202163010835648,
+                            0.004862114321440458,
+                            0.004991317167878151,
+                            0.0058771464973688126,
+                            0.0048659383319318295,
+                            0.0053062173537909985
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            3.1568682193756104,
+                            2.9401917457580566,
+                            2.973700761795044,
+                            2.8048255443573,
+                            2.4650704860687256,
+                            2.656909465789795,
+                            3.2304940223693848,
+                            2.5916385650634766,
+                            2.859534740447998,
+                            2.966162919998169,
+                            3.184723377227783,
+                            2.9753224849700928,
+                            2.794952630996704,
+                            2.7623465061187744,
+                            2.5144073963165283,
+                            3.1974635124206543
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 182,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            8.640432497486472e-06
+                        ],
+                        "zero_point": [
+                            121
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.q_proj.lora_B.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 182,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            8.640432497486472e-06
+                        ],
+                        "zero_point": [
+                            121
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.001176572171971202,
+                            0.0009582844795659184,
+                            0.0010004665236920118,
+                            0.0011581374565139413,
+                            0.0009539962629787624,
+                            0.0009674145840108395,
+                            0.0011028836015611887,
+                            0.0008622322347946465
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            849.9266357421875,
+                            1043.531494140625,
+                            999.53369140625,
+                            863.4553833007812,
+                            1048.22216796875,
+                            1033.6829833984375,
+                            906.7140502929688,
+                            1159.7802734375
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 183,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 177,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 180,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Identity'>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.q_proj.lora_dropout.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 180,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 181,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 183,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 184,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'add_' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 179,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 184,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 185,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.3.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 185,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 186,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 186,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:4": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.4",
+                "input_tensor_infos": [
+                    {
+                        "id": 233,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0003514998243190348
+                        ],
+                        "zero_point": [
+                            130
+                        ]
+                    },
+                    {
+                        "id": 281,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            8.57003906276077e-05
+                        ],
+                        "zero_point": [
+                            128
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 282,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.4.fc1",
+                "input_tensor_infos": [
+                    {
+                        "id": 284,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.002228383906185627
+                        ],
+                        "zero_point": [
+                            119
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.07490289211273193,
+                            0.08660875260829926,
+                            0.11810106784105301,
+                            0.14189991354942322,
+                            0.21989880502223969,
+                            0.11465814709663391,
+                            0.16941188275814056,
+                            0.12009169906377792,
+                            0.10226233303546906,
+                            0.142814502120018,
+                            0.10813327878713608,
+                            0.11895965784788132,
+                            0.121878482401371,
+                            0.11287350207567215,
+                            0.13685068488121033,
+                            0.09771235287189484
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0020820628851652145,
+                            0.0020485082641243935,
+                            0.002014799742028117,
+                            0.0023746262304484844
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            13.350618362426758,
+                            11.54617691040039,
+                            8.467324256896973,
+                            7.047220706939697,
+                            4.54754638671875,
+                            8.721578598022461,
+                            5.902773857116699,
+                            8.326970100402832,
+                            9.77877140045166,
+                            7.002089977264404,
+                            9.247847557067871,
+                            8.406210899353027,
+                            8.204894065856934,
+                            8.859475135803223,
+                            7.307234287261963,
+                            10.23412036895752
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 285,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "scale": [
+                            0.001055107219144702
+                        ],
+                        "zero_point": [
+                            145
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.activation.ReLU'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.4.activation_fn",
+                "input_tensor_infos": [
+                    {
+                        "id": 285,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.001055107219144702
+                        ],
+                        "zero_point": [
+                            145
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 286,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.000284639245364815
+                        ],
+                        "zero_point": [
+                            0
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "3": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.4.fc2",
+                "input_tensor_infos": [
+                    {
+                        "id": 286,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.000284639245364815
+                        ],
+                        "zero_point": [
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.6736321449279785,
+                            0.7133809328079224,
+                            0.6224557757377625,
+                            0.6026663780212402
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0004203274438623339,
+                            0.0005461736582219601,
+                            0.00029452473972924054,
+                            0.000159776012878865,
+                            0.00043560945778153837,
+                            0.00014329873374663293,
+                            0.00041623887955211103,
+                            0.00016978861822281033,
+                            0.0005588481435552239,
+                            0.0005692706909030676,
+                            0.0005020407843403518,
+                            0.00035534577909857035,
+                            0.00012209215492475778,
+                            0.00042365113040432334,
+                            0.00031242144177667797,
+                            0.00043097708839923143
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1.4844897985458374,
+                            1.401775598526001,
+                            1.6065398454666138,
+                            1.6592928171157837
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 287,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "4": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.4",
+                "input_tensor_infos": [
+                    {
+                        "id": 283,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.0003883987374138087
+                        ],
+                        "zero_point": [
+                            119
+                        ]
+                    },
+                    {
+                        "id": 288,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            5.001319004804827e-05
+                        ],
+                        "zero_point": [
+                            98
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 289,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<class 'torch.nn.modules.normalization.LayerNorm'>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn_layer_norm",
+                "input_tensor_infos": [
+                    {
+                        "id": 233,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 234,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.4",
+                "input_tensor_infos": [
+                    {
+                        "id": 280,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 281,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4",
+                "input_tensor_infos": [
+                    {
+                        "id": 282,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": []
+            },
+            "3": {
+                "op_type": "<method 'reshape' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4",
+                "input_tensor_infos": [
+                    {
+                        "id": 282,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 283,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<class 'torch.nn.modules.normalization.LayerNorm'>",
+                "fqn": "base_model.model.model.decoder.layers.4.final_layer_norm",
+                "input_tensor_infos": [
+                    {
+                        "id": 283,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 284,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "5": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.4",
+                "input_tensor_infos": [
+                    {
+                        "id": 287,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 288,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "6": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4",
+                "input_tensor_infos": [
+                    {
+                        "id": 289,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 290,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 290,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:4:self_attn": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.k_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 238,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.0027999659068882465
+                        ],
+                        "zero_point": [
+                            126
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.1276211142539978,
+                            0.150820791721344,
+                            0.1714450716972351,
+                            0.16795159876346588,
+                            0.16124901175498962,
+                            0.18224044144153595,
+                            0.18983706831932068,
+                            0.1788574606180191,
+                            0.15811805427074432,
+                            0.1341456025838852,
+                            0.12334004789590836,
+                            0.1529984027147293,
+                            0.1920592486858368,
+                            0.18413232266902924,
+                            0.14898242056369781,
+                            0.14343814551830292
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.00161055289208889,
+                            0.002761982847005129,
+                            0.002112831687554717,
+                            0.001920697744935751,
+                            0.0022592521272599697,
+                            0.002328770002350211,
+                            0.0025834105908870697,
+                            0.001746615394949913,
+                            0.001962139271199703,
+                            0.002837864914909005,
+                            0.0014426681445911527,
+                            0.0011523402063176036,
+                            0.0017300972249358892,
+                            0.0019138624193146825,
+                            0.0012878895504400134,
+                            0.002227139426395297
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            7.835694313049316,
+                            6.630385875701904,
+                            5.832772254943848,
+                            5.954096794128418,
+                            6.2015886306762695,
+                            5.487256050109863,
+                            5.267674922943115,
+                            5.5910444259643555,
+                            6.32438850402832,
+                            7.454586982727051,
+                            8.107666969299316,
+                            6.53601598739624,
+                            5.206726551055908,
+                            5.430877208709717,
+                            6.7122015953063965,
+                            6.971645832061768
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 245,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<built-in method bmm of type object at 0x7fd865476560>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 264,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0007117515779100358
+                        ],
+                        "zero_point": [
+                            140
+                        ]
+                    },
+                    {
+                        "id": 267,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0019945132080465555
+                        ],
+                        "zero_point": [
+                            119
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 268,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<method 'add' of 'torch._C._TensorBase' objects>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 269,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            0.00019521928334143013
+                        ],
+                        "zero_point": [
+                            116
+                        ]
+                    },
+                    {
+                        "id": 41,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32",
+                        "force_dtype": "torch.float32",
+                        "scale": [
+                            1.3344405750530544e+36
+                        ],
+                        "zero_point": [
+                            255
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 270,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "3": {
+                "op_type": "<built-in method bmm of type object at 0x7fd865476560>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 275,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.003919653594493866
+                        ],
+                        "zero_point": [
+                            0
+                        ]
+                    },
+                    {
+                        "id": 266,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0016270156484097242
+                        ],
+                        "zero_point": [
+                            146
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 276,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "4": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.out_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 279,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.0007375198183581233
+                        ],
+                        "zero_point": [
+                            128
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.7722166180610657,
+                            0.5633851885795593,
+                            1.1684167385101318,
+                            0.3943912386894226,
+                            1.159185767173767,
+                            0.5480626225471497,
+                            0.6345553994178772,
+                            0.5357393026351929,
+                            0.4990648031234741,
+                            0.5183709263801575,
+                            0.999289870262146,
+                            1.1737982034683228,
+                            0.900857150554657,
+                            0.5510081648826599,
+                            0.6094800233840942,
+                            0.8488926291465759
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.0007356680580414832,
+                            0.000624767504632473,
+                            0.00041961250826716423,
+                            0.0003189104900229722,
+                            0.0006824223673902452,
+                            0.00028400454903021455,
+                            0.0003987895615864545,
+                            0.0007355051347985864,
+                            0.000533037877175957,
+                            0.0007393524865619838,
+                            0.0004391110851429403,
+                            0.0005651583196595311,
+                            0.0006558214081451297,
+                            0.00030508654890581965,
+                            0.0003671708982437849,
+                            0.00036240171175450087
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1.294973373413086,
+                            1.77498459815979,
+                            0.8558589816093445,
+                            2.535553216934204,
+                            0.8626744747161865,
+                            1.8246090412139893,
+                            1.57590651512146,
+                            1.866579532623291,
+                            2.0037479400634766,
+                            1.9291205406188965,
+                            1.0007106065750122,
+                            0.8519351482391357,
+                            1.1100538969039917,
+                            1.8148550987243652,
+                            1.6407428979873657,
+                            1.178005337715149
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 280,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 234,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "1": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 243,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 244,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 245,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 246,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 246,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 247,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 247,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 248,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "5": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 257,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 258,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "6": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 258,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 259,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "7": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 259,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 260,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "8": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 244,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 261,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "9": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 261,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 262,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "10": {
+                "op_type": "<method 'contiguous' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 262,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 263,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "11": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 263,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 264,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "12": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 248,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 265,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "13": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 260,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 266,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "14": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 265,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": []
+            },
+            "15": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 265,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 267,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "16": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 268,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "17": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 268,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 269,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "18": {
+                "op_type": "<built-in method max of type object at 0x7fd865476560>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 270,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 271,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 272,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "19": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 272,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 273,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "20": {
+                "op_type": "<function softmax at 0x7fd863b841f0>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 273,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 274,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "21": {
+                "op_type": "<function dropout at 0x7fd863b82670>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 274,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 275,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    }
+                ]
+            },
+            "22": {
+                "op_type": "<method 'size' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 276,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": []
+            },
+            "23": {
+                "op_type": "<method 'view' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 276,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 277,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "24": {
+                "op_type": "<method 'transpose' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 277,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 278,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "25": {
+                "op_type": "<method 'reshape' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn",
+                "input_tensor_infos": [
+                    {
+                        "id": 278,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {},
+                    {},
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 279,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 280,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:4:self_attn:v_proj": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<built-in function linear>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 238,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.01734682358801365
+                        ],
+                        "zero_point": [
+                            133
+                        ]
+                    },
+                    {
+                        "id": 249,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.00031136625329963863,
+                            0.00025668463786132634,
+                            0.00022895813162904233,
+                            0.00041868723928928375,
+                            0.00046801360440440476,
+                            0.000544839771464467,
+                            0.0002859718806575984,
+                            0.0003757727099582553,
+                            0.00036266579991206527,
+                            0.0003396574466023594,
+                            0.00031236352515406907,
+                            0.0002318342449143529,
+                            0.00039146305061876774,
+                            0.00027779644005931914,
+                            0.00029864496900700033,
+                            0.00038601880078203976
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 250,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.v_proj.lora_A.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 252,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.005635845009237528
+                        ],
+                        "zero_point": [
+                            131
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.31827637553215027,
+                            0.3578762412071228,
+                            0.33076614141464233,
+                            0.38460636138916016,
+                            0.40154778957366943,
+                            0.36107155680656433,
+                            0.40281784534454346,
+                            0.35139355063438416,
+                            0.33984655141830444,
+                            0.3377038836479187,
+                            0.3539571166038513,
+                            0.3015575408935547,
+                            0.36707204580307007,
+                            0.3956300914287567,
+                            0.3773953318595886,
+                            0.35525140166282654
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.005779150407761335,
+                            0.0048353704623878,
+                            0.005278879776597023,
+                            0.005413893144577742,
+                            0.005464703775942326,
+                            0.005715600214898586,
+                            0.005475110374391079,
+                            0.00549249816685915
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            3.141923427581787,
+                            2.794262170791626,
+                            3.0232841968536377,
+                            2.6000609397888184,
+                            2.490363597869873,
+                            2.769534111022949,
+                            2.4825117588043213,
+                            2.8458120822906494,
+                            2.942504644393921,
+                            2.9611740112304688,
+                            2.8252010345458984,
+                            3.3161168098449707,
+                            2.7242608070373535,
+                            2.527613639831543,
+                            2.6497411727905273,
+                            2.8149077892303467
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 253,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            9.803168723010458e-06
+                        ],
+                        "zero_point": [
+                            123
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.v_proj.lora_B.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 253,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            9.803168723010458e-06
+                        ],
+                        "zero_point": [
+                            123
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.0008598009590059519,
+                            0.0009210532298311591,
+                            0.0007724169990979135,
+                            0.0008119846461340785,
+                            0.0008297572494484484,
+                            0.0009486194467172027,
+                            0.0009887740015983582,
+                            0.0009047275525517762
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            1163.0599365234375,
+                            1085.713623046875,
+                            1294.637451171875,
+                            1231.5504150390625,
+                            1205.1717529296875,
+                            1054.16357421875,
+                            1011.3534545898438,
+                            1105.30517578125
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 254,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 238,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 251,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Identity'>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.v_proj.lora_dropout.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 251,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 252,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 254,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 255,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'add_' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 250,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 255,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 256,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.v_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 256,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 257,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 257,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:model:decoder:layers:4:self_attn:q_proj": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<built-in function linear>",
+                "op_type_is_module": false,
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 234,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.01734682358801365
+                        ],
+                        "zero_point": [
+                            133
+                        ]
+                    },
+                    {
+                        "id": 235,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "force_dtype": "torch.quint8",
+                        "scale": [
+                            0.0002663393097463995,
+                            0.0001794756535673514,
+                            0.00038752422551624477,
+                            0.00040039693703874946,
+                            0.0003284724662080407,
+                            0.0003813150688074529,
+                            0.00030346630956046283,
+                            0.0003574831353034824,
+                            0.00039425952127203345,
+                            0.00027267029508948326,
+                            0.0005633418913930655,
+                            0.0002701474877540022,
+                            0.00029551019542850554,
+                            0.00038041899097152054,
+                            0.00028876157011836767,
+                            0.0003570202679838985
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [],
+                "output_tensor_infos": [
+                    {
+                        "id": 236,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "MinMaxObserver",
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_tensor_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": false,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.q_proj.lora_A.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 238,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.005501109641045332
+                        ],
+                        "zero_point": [
+                            129
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.30547964572906494,
+                            0.35993775725364685,
+                            0.31513577699661255,
+                            0.34536346793174744,
+                            0.44191431999206543,
+                            0.35816141963005066,
+                            0.3525455892086029,
+                            0.32803699374198914,
+                            0.34046927094459534,
+                            0.3439047634601593,
+                            0.26700085401535034,
+                            0.35134607553482056,
+                            0.35045164823532104,
+                            0.3664116859436035,
+                            0.3242016136646271,
+                            0.31810393929481506
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.005165941547602415,
+                            0.005216381512582302,
+                            0.005039858631789684,
+                            0.0055467900820076466,
+                            0.005347891245037317,
+                            0.005455384962260723,
+                            0.005077300127595663,
+                            0.0038714618422091007
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            3.273540496826172,
+                            2.7782580852508545,
+                            3.1732351779937744,
+                            2.8955001831054688,
+                            2.2628822326660156,
+                            2.79203724861145,
+                            2.836512565612793,
+                            3.0484366416931152,
+                            2.9371225833892822,
+                            2.9077816009521484,
+                            3.7453062534332275,
+                            2.8461966514587402,
+                            2.8534607887268066,
+                            2.729170560836792,
+                            3.084500312805176,
+                            3.1436264514923096
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 239,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            9.286790373153053e-06
+                        ],
+                        "zero_point": [
+                            122
+                        ]
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            },
+            "2": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.q_proj.lora_B.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 239,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            9.286790373153053e-06
+                        ],
+                        "zero_point": [
+                            122
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.0010128314606845379,
+                            0.0009598570759408176,
+                            0.0008829228463582695,
+                            0.0010532870655879378,
+                            0.0010632226476445794,
+                            0.00080936832819134,
+                            0.0009245102410204709,
+                            0.0015908911591395736
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07,
+                            1.1920928955078125e-07
+                        ],
+                        "zero_point": [
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            987.3311157226562,
+                            1041.82177734375,
+                            1132.601806640625,
+                            949.4088134765625,
+                            940.5367431640625,
+                            1235.5313720703125,
+                            1081.65380859375,
+                            628.5784912109375
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 240,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {
+            "0": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 234,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 237,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "1": {
+                "op_type": "<class 'torch.nn.modules.linear.Identity'>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.q_proj.lora_dropout.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 237,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 238,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8"
+                    }
+                ]
+            },
+            "2": {
+                "op_type": "<method 'mul' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 240,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 241,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "3": {
+                "op_type": "<method 'add_' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 236,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {
+                        "id": 241,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 242,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            },
+            "4": {
+                "op_type": "<method 'to' of 'torch._C._TensorBase' objects>",
+                "fqn": "base_model.model.model.decoder.layers.4.self_attn.q_proj",
+                "input_tensor_infos": [
+                    {
+                        "id": 242,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    },
+                    {}
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 243,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ]
+            }
+        },
+        "layer_output_infos": [
+            {
+                "id": 243,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    },
+    "base_model:model:score": {
+        "q_op_infos": {
+            "0": {
+                "op_type": "<class 'torch.nn.modules.linear.Linear'>",
+                "op_type_is_module": true,
+                "fqn": "base_model.model.score.modules_to_save.default",
+                "input_tensor_infos": [
+                    {
+                        "id": 291,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.quint8",
+                        "force_dtype": "torch.qint8",
+                        "scale": [
+                            0.002082008868455887
+                        ],
+                        "zero_point": [
+                            121
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            0.09728745371103287,
+                            0.1161823719739914,
+                            0.11396356672048569,
+                            0.11526038497686386,
+                            0.1379472017288208,
+                            0.07854942232370377,
+                            0.11264046281576157,
+                            0.11592700332403183,
+                            0.06852123886346817,
+                            0.12406256049871445,
+                            0.13584022223949432,
+                            0.11282354593276978,
+                            0.04839572310447693,
+                            0.16329161822795868,
+                            0.15682533383369446,
+                            0.13011035323143005
+                        ]
+                    }
+                ],
+                "weight_tensor_infos": [
+                    {
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.qint8",
+                        "scale": [
+                            0.002189199673011899,
+                            0.001974712824448943
+                        ],
+                        "zero_point": [
+                            0,
+                            0
+                        ],
+                        "smooth_quant_scaling_factor": [
+                            10.278817176818848,
+                            8.607157707214355,
+                            8.774734497070312,
+                            8.676008224487305,
+                            7.249150276184082,
+                            12.730838775634766,
+                            8.87780475616455,
+                            8.626117706298828,
+                            14.594016075134277,
+                            8.060449600219727,
+                            7.3615899085998535,
+                            8.863398551940918,
+                            20.662982940673828,
+                            6.1240129470825195,
+                            6.37652063369751,
+                            7.685783386230469
+                        ]
+                    }
+                ],
+                "output_tensor_infos": [
+                    {
+                        "id": 292,
+                        "orig_dtype": "torch.float32",
+                        "inf_dtype": "torch.float32"
+                    }
+                ],
+                "activation_observer": {
+                    "name": "SmoothQuantActivationObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.quint8",
+                    "qscheme": "torch.per_tensor_affine",
+                    "reduce_range": false,
+                    "quant_min": 0,
+                    "quant_max": 255,
+                    "alpha": 0.5,
+                    "act_observer": {
+                        "name": "HistogramObserver",
+                        "bins": 2048,
+                        "upsample_rate": 128,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_tensor_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "act_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": -1,
+                        "dtype": "torch.quint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": 0,
+                        "quant_max": 255
+                    }
+                },
+                "weight_observer": {
+                    "name": "SmoothQuantWeightObserver",
+                    "smooth_quant_enabled": true,
+                    "dtype": "torch.qint8",
+                    "qscheme": "torch.per_channel_symmetric",
+                    "reduce_range": false,
+                    "quant_min": -128,
+                    "quant_max": 127,
+                    "alpha": 0.5,
+                    "wei_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 0,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_symmetric",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    },
+                    "wei_ic_observer": {
+                        "name": "PerChannelMinMaxObserver",
+                        "ch_axis": 1,
+                        "dtype": "torch.qint8",
+                        "qscheme": "torch.per_channel_affine",
+                        "reduce_range": false,
+                        "quant_min": -128,
+                        "quant_max": 127
+                    }
+                }
+            }
+        },
+        "nonq_op_infos": {},
+        "layer_output_infos": [
+            {
+                "id": 292,
+                "orig_dtype": "torch.float32",
+                "inf_dtype": "torch.float32"
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/test/algorithm/test_smooth_quant.py b/test/algorithm/test_smooth_quant.py
index 4b4201edcc3..3f0cb63a4d9 100644
--- a/test/algorithm/test_smooth_quant.py
+++ b/test/algorithm/test_smooth_quant.py
@@ -27,6 +27,7 @@
     import intel_extension_for_pytorch as ipex
 
     TEST_IPEX = True
+    IPEX_VERSION = Version(ipex.__version__)
 except:
     TEST_IPEX = False
 
@@ -891,8 +892,8 @@ def calib_func(model):
             )
             self.assertTrue(torch.allclose(inc_sq_weight_scale, ipex_sq_weight_scale))
             # set a big atol to avoid random issue
-            self.assertTrue(torch.allclose(ipex_out, inc_out, atol=1e-02))
-            self.assertTrue(torch.allclose(output1, inc_out, atol=1e-02))
+            self.assertTrue(torch.allclose(ipex_out, inc_out, atol=2e-02))
+            self.assertTrue(torch.allclose(output1, inc_out, atol=2e-02))
 
         class CalibDataloader:
             def __init__(self):
@@ -915,7 +916,7 @@ def __iter__(self):
         )
         output2 = q_model.model(input_ids)
         # set a big atol to avoid random issue
-        self.assertTrue(torch.allclose(output1, output2, atol=1e-02))
+        self.assertTrue(torch.allclose(output1, output2, atol=2e-02))
 
         conf = PostTrainingQuantConfig(
             backend="ipex",
@@ -931,7 +932,7 @@ def __iter__(self):
         )
         output2 = q_model.model(input_ids)
         # set a big atol to avoid random issue
-        self.assertTrue(torch.allclose(output1, output2, atol=1e-02))
+        self.assertTrue(torch.allclose(output1, output2, atol=2e-02))
 
 
 class TestSqSkipOp(unittest.TestCase):
@@ -1288,5 +1289,143 @@ def test_sq_auto_mem_usage(self):
         assert (mem_use1 - mem_use0) <= 2.0
 
 
+class TestPeftModel(unittest.TestCase):
+    def test_peft_model_fixed_alpha(self):
+        import peft
+
+        model_id = "peft-internal-testing/tiny_OPTForSequenceClassification-lora"
+        model = peft.AutoPeftModelForSequenceClassification.from_pretrained(model_id)
+        example_input = torch.ones(1, 12, dtype=torch.long)
+        out1 = model(example_input)
+
+        def calib_func(model):
+            model(example_input)
+
+        sq = TorchSmoothQuant(model, example_inputs=example_input, q_func=calib_func)
+        sq.transform(alpha=0.5, folding=False)
+        self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, SQLinearWrapper))
+        self.assertTrue(
+            isinstance(
+                model.base_model.model.model.decoder.layers[0].self_attn.v_proj.sq_linear.lora_A.default,
+                SQLinearWrapper,
+            )
+        )  # Linear in Linear
+        self.assertTrue(
+            isinstance(model.base_model.model.score.original_module, torch.nn.Linear)
+        )  # Linear that is not called in calibration
+
+    def test_peft_model_auto_alpha(self):
+        import peft
+
+        model_id = "peft-internal-testing/tiny_OPTForSequenceClassification-lora"
+        model = peft.AutoPeftModelForSequenceClassification.from_pretrained(model_id, torchscript=True)
+        example_input = torch.ones(1, 12, dtype=torch.long)
+        out1 = model(example_input)
+
+        def calib_func(model):
+            model(example_input)
+
+        # folding=False
+        sq = TorchSmoothQuant(model, example_inputs=example_input, q_func=calib_func)
+        sq.transform(alpha="auto", folding=False)
+        self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, SQLinearWrapper))
+        self.assertTrue(
+            isinstance(
+                model.base_model.model.model.decoder.layers[0].self_attn.v_proj.sq_linear.lora_A.default,
+                SQLinearWrapper,
+            )
+        )  # Linear in Linear
+        self.assertTrue(
+            isinstance(model.base_model.model.score.original_module, torch.nn.Linear)
+        )  # Linear that is not called in calibration
+
+        # folding=True
+        model = peft.AutoPeftModelForSequenceClassification.from_pretrained(model_id, torchscript=True)
+        example_input = torch.ones(1, 12, dtype=torch.long)
+        out1 = model(example_input)
+
+        def calib_func(model):
+            model(example_input)
+
+        sq = TorchSmoothQuant(model, example_inputs=example_input, q_func=calib_func)
+        sq.transform(alpha="auto", folding=True)
+        self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, torch.nn.Linear))
+        self.assertTrue(
+            isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj.lora_A.default, torch.nn.Linear)
+        )  # Linear in Linear
+
+    def test_peft_model_quantization(self):
+        import peft
+
+        model_id = "peft-internal-testing/tiny_OPTForSequenceClassification-lora"
+        model = peft.AutoPeftModelForSequenceClassification.from_pretrained(model_id)
+        # model.base_model.model.model.decoder.layers[0].self_attn.v_proj.lora_B.default.weight is Zero
+        # peft model is needed to be trained first.
+        example_input = torch.ones(1, 12, dtype=torch.long)
+        out1 = model(example_input)
+
+        def calib_func(model):
+            model(example_input)
+
+        from neural_compressor import PostTrainingQuantConfig, quantization
+
+        recipes = {"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}}
+        conf = PostTrainingQuantConfig(
+            excluded_precisions=["bf16"],
+            recipes=recipes,
+            example_inputs=example_input,
+        )
+        q_model = quantization.fit(
+            model,
+            conf,
+            calib_func=calib_func,
+        )
+        decoder = q_model.model.base_model.model.model.decoder
+        self.assertTrue(isinstance(decoder.layers[0].self_attn.v_proj, SQLinearWrapper))
+        self.assertTrue(
+            isinstance(
+                decoder.layers[0].self_attn.v_proj.sq_linear.module.lora_A.default,
+                SQLinearWrapper,
+            )
+        )  # Linear in Linear
+        self.assertTrue(
+            isinstance(q_model.model.base_model.model.score.original_module, torch.nn.Linear)
+        )  # Linear that is not called in calibration
+
+    @unittest.skipIf(
+        IPEX_VERSION.release <= Version("2.1.0").release and ipex.__version__ != "2.1.0+cpu",
+        "Please use Intel extension for Pytorch version higher or equal to 2.1.0",
+    )
+    def test_peft_model_quantization_ipex(self):
+        import peft
+
+        model_id = "peft-internal-testing/tiny_OPTForSequenceClassification-lora"
+        model = peft.AutoPeftModelForSequenceClassification.from_pretrained(model_id, torchscript=True)
+        # model.base_model.model.model.decoder.layers[0].self_attn.v_proj.lora_B.default.weight is Zero
+        # peft model is needed to be trained first.
+        example_input = torch.ones(1, 12, dtype=torch.long)
+        out1 = model(example_input)[0]
+
+        def calib_func(model):
+            model(example_input)
+
+        from neural_compressor import PostTrainingQuantConfig, quantization
+
+        recipes = {"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}}
+        conf = PostTrainingQuantConfig(
+            backend="ipex",  # IPEX will got error now, will enhance it.
+            excluded_precisions=["bf16"],
+            op_name_dict={".*": {"activation": {"algorithm": "minmax"}}},
+            recipes=recipes,
+            example_inputs=example_input,
+        )
+        q_model = quantization.fit(
+            model,
+            conf,
+            calib_func=calib_func,
+        )
+        out2 = q_model.model(example_input)[0]
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/requirements.txt b/test/requirements.txt
index 7e4499f31f4..616dbe385dc 100644
--- a/test/requirements.txt
+++ b/test/requirements.txt
@@ -10,6 +10,7 @@ onnx
 onnxruntime
 onnxruntime-extensions; python_version < '3.11'
 optimum
+peft
 tensorflow-addons
 tf2onnx
 tf_slim