Skip to content

Commit

Permalink
explicitly call torch.no_grad() (#239)
Browse files Browse the repository at this point in the history
Co-authored-by: LRL-ModelCloud <lrl@modelcloud.ai>
  • Loading branch information
LRL-ModelCloud and LRL-ModelCloud authored Jul 16, 2024
1 parent d6109a9 commit c75eb02
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 18 deletions.
26 changes: 8 additions & 18 deletions gptqmodel/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,18 +150,6 @@ def _convert_tensor_to_list(tensor):
return new_calibration_dataset_batched

def quantize(
self,
calibration_dataset: List[Dict[str, Union[List[int], torch.LongTensor]]],
batch_size: int = 1,
calibration_enable_gpu_cache: bool = True,
):
if isinstance(self.quantize_config, AutoRoundQuantizeConfig):
self._quantize(calibration_dataset, batch_size, calibration_enable_gpu_cache)
else:
with torch.inference_mode():
self._quantize(calibration_dataset, batch_size, calibration_enable_gpu_cache)

def _quantize(
self,
calibration_dataset: List[Dict[str, Union[List[int], torch.LongTensor]]],
batch_size: int = 1,
Expand Down Expand Up @@ -440,7 +428,8 @@ def tmp(_, inp, out):
additional_layer_inputs["position_ids"] = layer_position_ids
for k, v in layer_input_kwargs[j].items():
additional_layer_inputs[k] = nested_move_to(v, cur_layer_device)
layer(*layer_input, **additional_layer_inputs)
with torch.no_grad():
layer(*layer_input, **additional_layer_inputs)
for h in handles:
h.remove()

Expand Down Expand Up @@ -490,11 +479,12 @@ def tmp(_, inp, out):
additional_layer_inputs["position_ids"] = layer_position_ids
for k, v in layer_input_kwargs[j].items():
additional_layer_inputs[k] = nested_move_to(v, cur_layer_device)
layer_output = move_to(
layer(*layer_input, **additional_layer_inputs)[0],
cur_layer_device if calibration_enable_gpu_cache else CPU,
)
layer_outputs.append([layer_output])
with torch.no_grad():
layer_output = move_to(
layer(*layer_input, **additional_layer_inputs)[0],
cur_layer_device if calibration_enable_gpu_cache else CPU,
)
layer_outputs.append([layer_output])

layers[i] = move_to(layer, CPU if force_layer_back_to_cpu else cur_layer_device)
del layer
Expand Down
1 change: 1 addition & 0 deletions gptqmodel/quantization/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def add_batch(self, inp, out):
# self.H += 2 / self.nsamples * inp.matmul(inp.t())
self.H += inp.matmul(inp.t())

@torch.inference_mode()
def fasterquant(
self,
blocksize=128,
Expand Down

0 comments on commit c75eb02

Please sign in to comment.