Skip to content

Commit

Permalink
Making sure int4 weight only supports cpu as well
Browse files Browse the repository at this point in the history
Summary:
We want to deprecate int4 weight only quantizer in torchchat, so
making sure cpu is also supported

Test Plan:
python test/dtypes/test_affine_quantized.py

Reviewers:

Subscribers:

Tasks:

Tags:
  • Loading branch information
jerryzh168 committed Oct 30, 2024
1 parent 85ec209 commit 5755483
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 2 deletions.
23 changes: 23 additions & 0 deletions test/dtypes/test_affine_quantized.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,30 @@ def test_print_quantized_module(self, apply_quant):
assert "AffineQuantizedTensor" in str(ql)


class TestAffineQuantizedBasic(TestCase):
COMMON_DEVICES = ["cpu"] + (["cuda"] if torch.cuda.is_available() else [])
COMMON_DTYPES = [torch.bfloat16]

@common_utils.parametrize("apply_quant", get_quantization_functions(False, True))
@common_utils.parametrize("device", COMMON_DEVICES)
@common_utils.parametrize("dtype", COMMON_DTYPES)
def test_flatten_unflatten(self, apply_quant, device, dtype):
l = torch.nn.Linear(128, 256, dtype=dtype, device=device)
ql = apply_quant(l)
lp_tensor = ql.weight
tensor_data_name_dict, tensor_attributes = lp_tensor.__tensor_flatten__()
tensor_data_dict = {name: getattr(lp_tensor, name) for name in tensor_data_name_dict}
outer_size = lp_tensor.size()
outer_stride = lp_tensor.stride()
reconstructed = type(lp_tensor).__tensor_unflatten__(tensor_data_dict, tensor_attributes, outer_size, outer_stride)
example_inputs = (torch.randn(32, 128, dtype=dtype, device=device),)
ref = ql(*example_inputs)
ql.weight = torch.nn.Parameter(reconstructed, requires_grad=False)
reconstruct_res = ql(*example_inputs)
self.assertEqual(reconstruct_res, ref)

common_utils.instantiate_parametrized_tests(TestAffineQuantized)
common_utils.instantiate_parametrized_tests(TestAffineQuantizedBasic)


if __name__ == "__main__":
Expand Down
7 changes: 5 additions & 2 deletions torchao/dtypes/affine_quantized_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1268,8 +1268,11 @@ def from_plain(
def to(self, *args, **kwargs):
kwargs = self._get_to_kwargs(*args, **kwargs)
device = kwargs["device"]
if not is_device("cuda", device):
raise ValueError(f"TensorCoreTiledAQTTensorImpl is only available for cuda device, can't convert to {device}")
# tensor core tiled layout supports both cpu and cuda but does not support the conversion
# between these two devices, in the future we should not use the same layout for
# cpu and cuda device: https://github.com/pytorch/ao/issues/1117
if not is_device(torch.device(self.device).type, device):
raise ValueError(f"TensorCoreTiledAQTTensorImpl does not support conversion from {self.device} to {device}")
return self.__class__(
self.packed_weight.to(device),
self.scale_and_zero.to(device),
Expand Down

0 comments on commit 5755483

Please sign in to comment.