diff --git a/docs/source/quantization_weight_only.md b/docs/source/quantization_weight_only.md
index 50f5913893e..13200d536c8 100644
--- a/docs/source/quantization_weight_only.md
+++ b/docs/source/quantization_weight_only.md
@@ -87,6 +87,8 @@ Notes:
 |  use_max_length  | False | Whether to align all calibration data to fixed length, which equals to pad_max_length. |
 |  block_size  | 128 | Execute GPTQ quantization per block, block shape = [$C_{out}$, block_size] |
 |  static_groups  | False | Whether to calculate group wise quantization parameters in advance. This option mitigate actorder's extra computational requirements |
+|  true_sequential  | False | Whether to quantize layers within a transformer block in their original order. This can lead to higher accuracy but slower overall quantization process. |
+|  lm_head  | False | Whether to quantize the lm_head (linear layer related to prediction in the end of the language models). |
 
 **Note:** Neural compressor provides `Unsigned integer for asymmetric quantization` and `Signed integer for symmetric quantization`. Please follow the below section to compress the low bit data type for saving.
 
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
index ce4b7f9ab4f..477ad75b7e1 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
@@ -77,6 +77,8 @@
                                                                            this should align with your model config, \
                                                                            and your dataset builder args: args.pad_max_length')
 parser.add_argument('--gptq_static_groups', action='store_true', help='Use determined group to do quantization')
+parser.add_argument('--gptq_true_sequential', action='store_true', help="Whether to run in true_sequential model.")
+parser.add_argument('--gptq_lm_head', action='store_true', help="Whether to use GPTQ to quantize the output layer of the LLMs.")
 # ==============code generation args===========
 parser.add_argument("--code_generation", action="store_true")
 parser.add_argument("--n_samples", default=200, type=int)
@@ -278,7 +280,8 @@ def calib_func(prepared_model):
             'use_max_length': args.gptq_use_max_length,
             'pad_max_length': args.gptq_pad_max_length,
             'static_groups': args.gptq_static_groups,
-            "enable_mse_search": args.woq_enable_mse_search,
+            "true_sequential": args.gptq_true_sequential,
+            "lm_head": args.gptq_lm_head,
         }
         # GPTQ: use assistive functions to modify calib_dataloader and calib_func
         # TEQ: set calib_func=None, use default training func as calib_func
diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index db79fe05c7f..2a368665739 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -4722,6 +4722,8 @@ def gptq_quantize(self, model, tune_cfg, dataloader):
                     "act_order": self.recipes["gptq_args"].get("act_order", False),
                     "block_size": self.recipes["gptq_args"].get("block_size", True),
                     "static_groups": self.recipes["gptq_args"].get("static_groups", False),
+                    "true_sequential": self.recipes["gptq_args"].get("true_sequential", False),
+                    "lm_head": self.recipes["gptq_args"].get("lm_head", False),
                 }
         nsamples = self.recipes["gptq_args"].get("nsamples", 128)
         use_max_length = self.recipes["gptq_args"].get("use_max_length", False)
diff --git a/neural_compressor/adaptor/torch_utils/gptq.py b/neural_compressor/adaptor/torch_utils/gptq.py
index 49ac8695598..2f1c6cc0582 100644
--- a/neural_compressor/adaptor/torch_utils/gptq.py
+++ b/neural_compressor/adaptor/torch_utils/gptq.py
@@ -89,6 +89,7 @@ def trace_gptq_target_blocks(module, module_types=[torch.nn.ModuleList, torch.nn
             "transformers": {}, Dict# TODO
         }
     """
+    find_transformers = False
     if type(module).__name__ == "MixFormerSequentialForCausalLM":  # pragma: no cover
         gptq_related_blocks = {
             "embeddings": {},
@@ -118,12 +119,19 @@ def trace_gptq_target_blocks(module, module_types=[torch.nn.ModuleList, torch.nn
         }
         for n, m in module.named_modules():
             if type(m) in module_types:
+                # find the block
                 gptq_related_blocks["transformers_name"] = n
                 gptq_related_blocks["transformers"] = m
-                return gptq_related_blocks
+                find_transformers = True
+                # return gptq_related_blocks
+            elif is_leaf(m) and not find_transformers:
+                gptq_related_blocks["embeddings"][n] = m
+            elif n.find(gptq_related_blocks["transformers_name"]) == -1 and find_transformers:
+                # no longer belong to transformers
+                gptq_related_blocks["transformers_post"]["name"] = n
+                gptq_related_blocks["transformers_post"]["layer"] = m
             else:
-                if is_leaf(m):
-                    gptq_related_blocks["embeddings"][n] = m
+                continue
     return gptq_related_blocks
 
 
@@ -234,6 +242,7 @@ def __init__(
         self.sym_default = False
         self.act_order_default = False
         self.static_groups_default = False
+        self.true_sequential_default = None
         self.perchannel_default = True
         self.mse_default = False
         self.check_layer_config()
@@ -411,6 +420,9 @@ def check_layer_config(self):
                 tmp_weight_config[name]["static_groups"] = self.weight_config.get(
                     "static_groups", self.static_groups_default
                 )
+                tmp_weight_config[name]["true_sequential"] = self.weight_config.get(
+                    "true_sequential", self.true_sequential_default
+                )
                 tmp_weight_config[name]["perchannel"] = self.weight_config.get("perchannel", self.perchannel_default)
                 tmp_weight_config[name]["mse"] = self.weight_config.get("mse", self.mse_default)
             self.weight_config = tmp_weight_config
@@ -425,6 +437,9 @@ def check_layer_config(self):
                 self.weight_config[layer_name]["static_groups"] = config.get(
                     "static_groups", self.static_groups_default
                 )
+                self.weight_config[layer_name]["true_sequential"] = config.get(
+                    "true_sequential", self.true_sequential_default
+                )
                 self.weight_config[layer_name]["perchannel"] = config.get("perchannel", self.perchannel_default)
                 self.weight_config[layer_name]["mse"] = config.get("mse", self.mse_default)
 
@@ -544,6 +559,38 @@ def update_blockwise_hidden_states(self, outs):
         else:
             self.cache_positional_arguments[0] = outs[:]
 
+    def find_true_sequential_config(self):
+        for layer_name in self.weight_config:
+            if self.weight_config[layer_name].get("true_sequential", None) is not None:
+                return self.weight_config[layer_name]["true_sequential"]
+        return False
+
+    def find_lm_head_config(self):
+        for layer_name in self.weight_config:
+            if self.weight_config[layer_name].get("lm_head", None) is not None:
+                return self.weight_config[layer_name]["lm_head"]
+        return False
+
+    def analyze_true_sequential(self, module, inputs=None):
+        # to obtain the depth of each linear layers in this block
+        # obtain all linear layers' names
+        layers = find_layers(module)
+        layers = list(layers)
+        # group layers into sequentials
+        # case 1: query, key and value are calculated from one matrix, bloom, etc..
+        if "q" in layers[0].lower() and "k" in layers[0].lower():
+            qkv_layers = [layers[0]]
+            post_qkv_layers = layers[1:]
+        else:
+            # case 2: qkv are calculated separately.
+            qkv_layers = layers[0:3]
+            post_qkv_layers = layers[3:]
+        layers.clear()
+        layers.append(qkv_layers)
+        for layer in post_qkv_layers:
+            layers.append([layer])
+        return layers
+
     @torch.no_grad()
     def execute_quantization(self, means=None, stds=None, model_path=None):
         """Run quantization."""
@@ -554,6 +601,11 @@ def execute_quantization(self, means=None, stds=None, model_path=None):
 
         # Step2: run gptq quantization in a transformer block-wise manner.
         gptq_config = {}
+
+        self.true_sequential = self.find_true_sequential_config()
+        # automatically get true_sequential
+        true_sequential_map = self.analyze_true_sequential(self.gptq_related_blocks["transformers"][0])
+        logger.info(f"Sequential Name: {true_sequential_map}")
         tblock_length = len(self.gptq_related_blocks["transformers"])
         for block_idx in range(tblock_length):
             logger.info(f"Quantizing layer {block_idx + 1} / {tblock_length}..")
@@ -565,75 +617,209 @@ def execute_quantization(self, means=None, stds=None, model_path=None):
             # Step2.1: obtain all layers (Linear, Conv2d, etc) in the block which can be quantized.
             sub_layers = find_layers(transformer_block)
             sub_layers_to_quant = {}
+            # add true sequential options
+            if self.true_sequential is not None and self.true_sequential:
+                sequentials = true_sequential_map
+            else:
+                sequentials = [list(sub_layers.keys())]
+            # start to process every layers in a sequential
+            for sequential in sequentials:
+                logger.info(f"Current quantization sequential: {sequential}")
+                sub_layers_to_quant = {}
+                sequential_layers = {n: sub_layers[n] for n in sequential}
+                for layer_name, layer_obj in sequential_layers.items():
+                    # filter sub_layers with included layer_names in self.weight_config
+                    full_layer_name = self.get_full_layer_name(layer_name, block_idx)
+                    # if self.weight_config.get(full_layer_name, None) == None:
+                    if self.get_layer_config(full_layer_name) is None:
+                        logger.warning(
+                            f"{full_layer_name} can be quantized " + "but excluded from quantization configs."
+                        )
+                    else:
+                        sub_layers_to_quant[layer_name] = layer_obj
+                del sequential_layers
+                sequential_layers = sub_layers_to_quant
+                # Step 2.2: Initialize GPTQ quantizers for collected layers.
+                gptq_for_this_block = {}
+                # initialize gptq quantizer for every layer in a transformer block
+                for layer_name in sequential_layers:
+                    # weight_config_this_layer = self.weight_config.get(
+                    #     self.get_full_layer_name(layer_name, block_idx), None
+                    # )
+                    full_layer_name = self.get_full_layer_name(layer_name, block_idx)
+                    weight_config_this_layer = self.get_layer_config(full_layer_name)
+                    if self.layer_wise:
+                        from ..torch_utils.layer_wise_quant.utils import load_value
+
+                        W = load_value(self.model, full_layer_name + ".weight", model_path)
+                    else:
+                        W = sequential_layers[layer_name].weight.data.clone()
+
+                    gptq_for_this_block[layer_name] = GPTQ(sequential_layers[layer_name], W, self.device)
+                    # gptq_for_this_block[layer_name].quantizer = Quantizer()
+                    gptq_for_this_block[layer_name].quantizer.configure(
+                        weight_config_this_layer["wbits"],
+                        weight_config_this_layer["perchannel"],
+                        weight_config_this_layer["sym"],
+                        weight_config_this_layer["mse"],
+                    )
+
+                # Step 2.3: modify forward functions to hook inputs data (used in gptq execution)
+                def add_batch(_name):
+                    def tmp(_, inp, out):
+                        gptq_for_this_block[_name].add_batch(inp[0].data, out.data)  # noqa: F821
+
+                    return tmp
+
+                handles = []  # register handles which add inputs and outputs to gptq object
+                for layer_name in sequential_layers:
+                    handles.append(sequential_layers[layer_name].register_forward_hook(add_batch(layer_name)))
+                idx = self.cache_key_arguments.pop("i")
+                for j in range(len(self.dataloader)):
+                    cache_keyword_batch = self.gather_single_batch_from_dict(self.cache_key_arguments, j)
+                    cache_positional_batch = self.gather_single_batch_from_list(self.cache_positional_arguments, j)
+                    out = transformer_block(*cache_positional_batch, **cache_keyword_batch)
+                    out = self.track_hidden_states(out)
+                self.cache_key_arguments["i"] = idx
+                for h in handles:
+                    h.remove()
+                # Step 2.4: everything is prepared, so start quantization!
+                for layer_name in sequential_layers:
+                    # weight_config_this_layer = self.weight_config.get(
+                    #     self.get_full_layer_name(layer_name, block_idx), None
+                    # )
+                    weight_config_this_layer = self.get_layer_config(self.get_full_layer_name(layer_name, block_idx))
+                    logger.info(f"Quantizing layer {layer_name}")
+                    if self.layer_wise:
+                        from ..torch_utils.layer_wise_quant.utils import load_value
+
+                        full_layer_name = self.get_full_layer_name(layer_name, block_idx)
+                        W = load_value(self.model, full_layer_name + ".weight", model_path)
+                    else:
+                        W = sequential_layers[layer_name].weight.data.clone()
+                    scale, zp, Q = gptq_for_this_block[layer_name].fasterquant(
+                        W,
+                        blocksize=weight_config_this_layer["block_size"],
+                        percdamp=weight_config_this_layer["percdamp"],
+                        groupsize=weight_config_this_layer["group_size"],
+                        act_order=weight_config_this_layer["act_order"],
+                        static_groups=weight_config_this_layer["static_groups"],
+                    )
+                    if self.layer_wise:
+                        from ..torch_utils.layer_wise_quant.utils import (
+                            LWQ_WORKSPACE,
+                            clean_module_weight,
+                            load_value,
+                            set_module_tensor_to_device,
+                        )
+
+                        sub_layer = sequential_layers[layer_name]
+                        full_layer_name = self.get_full_layer_name(layer_name, block_idx)
+                        for n, p in sub_layer.named_parameters():
+                            param_name = full_layer_name + "." + n
+                            if n == "weight":
+                                set_module_tensor_to_device(self.model, param_name, self.device, Q)
+                            else:
+                                value = load_value(self.model, param_name, model_path)
+                                set_module_tensor_to_device(self.model, param_name, self.device, value)
+                        # sub_layer.weight.data = Q
+                        torch.save(sub_layer.state_dict(), LWQ_WORKSPACE + f"/{full_layer_name}.pt")
+                        clean_module_weight(sub_layer)
+                        del Q
+                        gc.collect()
+                    else:
+                        sequential_layers[layer_name].weight.data = Q
+                    gptq_config[self.get_full_layer_name(layer_name, block_idx)] = {"scale": scale}
+                    if not weight_config_this_layer["sym"]:
+                        gptq_config[self.get_full_layer_name(layer_name, block_idx)]["zero"] = zp
+                    if weight_config_this_layer["act_order"] and not weight_config_this_layer["static_groups"]:
+                        # save perm for restoring the weights, but only when static_groups is not enabled.
+                        gptq_config[self.get_full_layer_name(layer_name, block_idx)]["perm"] = gptq_for_this_block[
+                            layer_name
+                        ].perm
+                    gptq_for_this_block[layer_name].free()
+
+            # Step 2.5: replace output data with quantized weights
+            outs = []
+            idx = self.cache_key_arguments.pop("i")
+            for j in range(len(self.dataloader)):
+                cache_keyword_batch = self.gather_single_batch_from_dict(self.cache_key_arguments, j)
+                cache_positional_batch = self.gather_single_batch_from_list(self.cache_positional_arguments, j)
+                out = transformer_block(*cache_positional_batch, **cache_keyword_batch)
+                out = self.track_hidden_states(out)
+                outs.append(out)
+            self.cache_key_arguments["i"] = idx
+            if self.layer_wise:
+                self.gptq_related_blocks["transformers"][block_idx] = transformer_block
+            else:
+                self.gptq_related_blocks["transformers"][block_idx] = transformer_block.cpu()
+            del gptq_for_this_block
+            torch.cuda.empty_cache()
+            # iteratively replace the input with output, thus layerwise quantization can continue.
+            self.update_blockwise_hidden_states(outs)
+            logger.info("------------------------------")
+
+        # do the post transformer blocks quantization
+        do_post_transformer_quant = self.find_lm_head_config()
+        if do_post_transformer_quant:
+            logger.info("Quantizing post transformer layers")
+            # the input should be self.cache_key_arguments and self.cache_positional_arguments
+            sub_layers = find_layers(self.gptq_related_blocks["transformers_post"]["layer"])
+            sub_layers_to_quant = {}
             for layer_name, layer_obj in sub_layers.items():
                 # filter sub_layers with included layer_names in self.weight_config
-                full_layer_name = self.get_full_layer_name(layer_name, block_idx)
+                full_layer_name = self.gptq_related_blocks["transformers_post"]["name"]
                 # if self.weight_config.get(full_layer_name, None) == None:
                 if self.get_layer_config(full_layer_name) is None:
                     logger.warning(f"{full_layer_name} can be quantized " + "but excluded from quantization configs.")
                 else:
-                    sub_layers_to_quant[layer_name] = layer_obj
+                    sub_layers_to_quant[full_layer_name] = layer_obj
             del sub_layers
             sub_layers = sub_layers_to_quant
-            # Step 2.2: Initialize GPTQ quantizers for collected layers.
-            gptq_for_this_block = {}
-            # initialize gptq quantizer for every layer in a transformer block
+            gptq_post_block = {}
+
+            def add_batch_post(_name):
+                def tmp(_, inp, out):
+                    gptq_post_block[_name].add_batch(inp[0].data, out.data)
+
+                return tmp
+
             for layer_name in sub_layers:
-                # weight_config_this_layer = self.weight_config.get(
-                #     self.get_full_layer_name(layer_name, block_idx), None
-                # )
-                full_layer_name = self.get_full_layer_name(layer_name, block_idx)
+                full_layer_name = self.gptq_related_blocks["transformers_post"]["name"]
                 weight_config_this_layer = self.get_layer_config(full_layer_name)
-                if self.layer_wise:
-                    from ..torch_utils.layer_wise_quant.utils import load_value
+                W = sub_layers[layer_name].weight.data.clone()
 
-                    W = load_value(self.model, full_layer_name + ".weight", model_path)
-                else:
-                    W = sub_layers[layer_name].weight.data.clone()
-
-                gptq_for_this_block[layer_name] = GPTQ(sub_layers[layer_name], W, self.device)
+                gptq_post_block[layer_name] = GPTQ(sub_layers[layer_name], W, self.device)
                 # gptq_for_this_block[layer_name].quantizer = Quantizer()
-                gptq_for_this_block[layer_name].quantizer.configure(
+                gptq_post_block[layer_name].quantizer.configure(
                     weight_config_this_layer["wbits"],
                     weight_config_this_layer["perchannel"],
                     weight_config_this_layer["sym"],
                     weight_config_this_layer["mse"],
                 )
-
-            # Step 2.3: modify forward functions to hook inputs data (used in gptq execution)
-            def add_batch(_name):
-                def tmp(_, inp, out):
-                    gptq_for_this_block[_name].add_batch(inp[0].data, out.data)  # noqa: F821
-
-                return tmp
-
+            # generate the gptq quantizer
             handles = []  # register handles which add inputs and outputs to gptq object
             for layer_name in sub_layers:
-                handles.append(sub_layers[layer_name].register_forward_hook(add_batch(layer_name)))
-            idx = self.cache_key_arguments.pop("i")
+                handles.append(sub_layers[layer_name].register_forward_hook(add_batch_post(layer_name)))
             for j in range(len(self.dataloader)):
-                cache_keyword_batch = self.gather_single_batch_from_dict(self.cache_key_arguments, j)
-                cache_positional_batch = self.gather_single_batch_from_list(self.cache_positional_arguments, j)
-                out = transformer_block(*cache_positional_batch, **cache_keyword_batch)
-                out = self.track_hidden_states(out)
-            self.cache_key_arguments["i"] = idx
+                if "hidden_states" in self.cache_key_arguments:
+                    out = sub_layers[layer_name](self.cache_key_arguments["hidden_states"][j])
+                else:
+                    out = sub_layers[layer_name](self.cache_positional_arguments[0][j])
+
+            # if "hidden_states" in self.cache_key_arguments:
+            #     self.cache_key_arguments["hidden_states"] = outs[:]
+            # else:
+            #     self.cache_positional_arguments[0] = outs[:]
+            # perform the inference process
+
             for h in handles:
                 h.remove()
-            # Step 2.4: everything is prepared, so start quantization!
-            for layer_name in sub_layers:
-                # weight_config_this_layer = self.weight_config.get(
-                #     self.get_full_layer_name(layer_name, block_idx), None
-                # )
-                weight_config_this_layer = self.get_layer_config(self.get_full_layer_name(layer_name, block_idx))
-                logger.info(f"Quantizing layer {layer_name}")
-                if self.layer_wise:
-                    from ..torch_utils.layer_wise_quant.utils import load_value
 
-                    full_layer_name = self.get_full_layer_name(layer_name, block_idx)
-                    W = load_value(self.model, full_layer_name + ".weight", model_path)
-                else:
-                    W = sub_layers[layer_name].weight.data.clone()
-                scale, zp, Q = gptq_for_this_block[layer_name].fasterquant(
+            for layer_name in sub_layers:
+                full_layer_name = self.gptq_related_blocks["transformers_post"]["name"]
+                weight_config_this_layer = self.get_layer_config(full_layer_name)
+                scale, zp, Q = gptq_post_block[layer_name].fasterquant(
                     W,
                     blocksize=weight_config_this_layer["block_size"],
                     percdamp=weight_config_this_layer["percdamp"],
@@ -641,59 +827,15 @@ def tmp(_, inp, out):
                     act_order=weight_config_this_layer["act_order"],
                     static_groups=weight_config_this_layer["static_groups"],
                 )
-                if self.layer_wise:
-                    from ..torch_utils.layer_wise_quant.utils import (
-                        LWQ_WORKSPACE,
-                        clean_module_weight,
-                        load_value,
-                        set_module_tensor_to_device,
-                    )
-
-                    sub_layer = sub_layers[layer_name]
-                    full_layer_name = self.get_full_layer_name(layer_name, block_idx)
-                    for n, p in sub_layer.named_parameters():
-                        param_name = full_layer_name + "." + n
-                        if n == "weight":
-                            set_module_tensor_to_device(self.model, param_name, self.device, Q)
-                        else:
-                            value = load_value(self.model, param_name, model_path)
-                            set_module_tensor_to_device(self.model, param_name, self.device, value)
-                    # sub_layer.weight.data = Q
-                    torch.save(sub_layer.state_dict(), LWQ_WORKSPACE + f"/{full_layer_name}.pt")
-                    clean_module_weight(sub_layer)
-                    del Q
-                    gc.collect()
-                else:
-                    sub_layers[layer_name].weight.data = Q
-                gptq_config[self.get_full_layer_name(layer_name, block_idx)] = {"scale": scale}
+                sub_layers[layer_name].weight.data = Q
+                # save the quantization results
+                gptq_config[full_layer_name] = {"scale": scale}
                 if not weight_config_this_layer["sym"]:
-                    gptq_config[self.get_full_layer_name(layer_name, block_idx)]["zero"] = zp
+                    gptq_config[full_layer_name]["zero"] = zp
                 if weight_config_this_layer["act_order"] and not weight_config_this_layer["static_groups"]:
                     # save perm for restoring the weights, but only when static_groups is not enabled.
-                    gptq_config[self.get_full_layer_name(layer_name, block_idx)]["perm"] = gptq_for_this_block[
-                        layer_name
-                    ].perm
-                gptq_for_this_block[layer_name].free()
-
-            # Step 2.5: replace output data with quantized weights
-            outs = []
-            idx = self.cache_key_arguments.pop("i")
-            for j in range(len(self.dataloader)):
-                cache_keyword_batch = self.gather_single_batch_from_dict(self.cache_key_arguments, j)
-                cache_positional_batch = self.gather_single_batch_from_list(self.cache_positional_arguments, j)
-                out = transformer_block(*cache_positional_batch, **cache_keyword_batch)
-                out = self.track_hidden_states(out)
-                outs.append(out)
-            self.cache_key_arguments["i"] = idx
-            if self.layer_wise:
-                self.gptq_related_blocks["transformers"][block_idx] = transformer_block
-            else:
-                self.gptq_related_blocks["transformers"][block_idx] = transformer_block.cpu()
-            del gptq_for_this_block
-            torch.cuda.empty_cache()
-            # iteratively replace the input with output, thus layerwise quantization can continue.
-            self.update_blockwise_hidden_states(outs)
-            logger.info("------------------------------")
+                    gptq_config[full_layer_name]["perm"] = gptq_post_block[full_layer_name].perm
+                gptq_post_block[layer_name].free()
 
         logger.info("Quantization done")
         # self.model.config.use_cache = self.use_cache
diff --git a/test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py b/test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py
index 8bcacd65cff..ca4a011907a 100644
--- a/test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py
+++ b/test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py
@@ -522,7 +522,15 @@ def __iter__(self):
                 },
             },
             recipes={
-                "gptq_args": {"percdamp": 0.01, "act_order": False, "use_max_length": True, "pad_max_length": 512},
+                "gptq_args": {
+                    "percdamp": 0.01,
+                    "act_order": False,
+                    "use_max_length": True,
+                    "pad_max_length": 512,
+                    "static_groups": True,
+                    "true_sequential": True,
+                    "lm_head": True,
+                },
             },
         )
 
@@ -537,7 +545,7 @@ def __iter__(self):
         )
         q_model.save("saved")
         out1 = q_model.model(input)
-        self.assertTrue(torch.allclose(out1[0], out0[0], atol=1e-02))
+        self.assertTrue(torch.allclose(out1[0], out0[0], atol=1e-01))
         compressed_model = q_model.export_compressed_model(use_optimum_format=False)
         out2 = compressed_model(input)
         torch.save(compressed_model.state_dict(), "saved/compressed_model.pt")
diff --git a/test/quantization/test_weight_only_quantization.py b/test/quantization/test_weight_only_quantization.py
index b990f9aee9f..f5577eea7c4 100644
--- a/test/quantization/test_weight_only_quantization.py
+++ b/test/quantization/test_weight_only_quantization.py
@@ -154,6 +154,7 @@ def __iter__(self):
                 "sym": True,
                 "percdamp": 0.01,
                 "perchannel": False,
+                "lm_head": True,
             },
             "transformer.h.1.attn.k_proj": {
                 "wbits": 3,