Skip to content

Commit

Permalink
Fix fp8-all-gather buck errors (pytorch#912)
Browse files Browse the repository at this point in the history
Summary: Pull Request resolved: pytorch#912

Reviewed By: vkuzo

Differential Revision: D63048850
  • Loading branch information
y-sq authored and facebook-github-bot committed Sep 25, 2024
1 parent 2dea315 commit cc3c8ea
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion test/float8/test_fsdp2/test_fsdp2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from torchao.float8.config import CastConfig, Float8LinearConfig, ScalingType
from torchao.float8.float8_linear_utils import convert_to_float8_training
from torchao.float8.fsdp_utils import WeightWithDynamicFloat8CastTensor
from fsdp2_common import check_parity_bf16_mp, check_parity_no_mp
from torchao.testing.float8.fsdp2_utils import check_parity_bf16_mp, check_parity_no_mp
from torch.distributed._composable.fsdp import fully_shard, MixedPrecisionPolicy
from torch.distributed._tensor import DTensor
from torch.testing._internal.common_cuda import TEST_CUDA
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ def check_parity_no_mp(
precompute_float8_dynamic_scale_for_fsdp(model)

if compile_transformer_block:
test_cls.assertEqual(losses[0], losses[1], atol=1e-4, rtol=1e-4)
test_cls.assertEqual(losses[0], losses[1], atol=1e-4, rtol=1e-4, msg = f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}")
else:
test_cls.assertEqual(losses[0], losses[1])
test_cls.assertEqual(losses[0], losses[1], msg = f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}")


def check_parity_bf16_mp(
Expand Down Expand Up @@ -86,4 +86,4 @@ def check_parity_bf16_mp(
ref_model.parameters(), ref_model_bf16.parameters()
):
param_bf16.detach().copy_(param_fp32)
test_cls.assertEqual(losses[0], losses[1])
test_cls.assertEqual(losses[0], losses[1], msg = f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}")

0 comments on commit cc3c8ea

Please sign in to comment.