Skip to content

Commit

Permalink
Adapt to _convert_weight_to_int4pack new behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
manuelcandales committed Jul 25, 2024
1 parent e5df48e commit 4595971
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions torchao/quantization/GPTQ.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,8 @@ def _create_quantized_state_dict(
self.precision, # dtype for scales_and_zeros
)
# TODO: just get the device from mod.weight.device?
w_cpu = w_int4x8.cpu()
w_int4x8 = (w_cpu[::, ::2] << 4 | w_cpu[::, 1::2]).to(torch.uint8)
weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(w_int4x8.to(self.device), self.inner_k_tiles)
cur_state_dict[f"{fqn}.weight"] = weight_int4pack.to(self.device)
cur_state_dict[f"{fqn}.scales_and_zeros"] = scales_and_zeros.to(self.device)
Expand Down

0 comments on commit 4595971

Please sign in to comment.