diff --git a/distributed/tensor_parallelism/fsdp_tp_example.py b/distributed/tensor_parallelism/fsdp_tp_example.py index dbab48c1b8..15052780c5 100644 --- a/distributed/tensor_parallelism/fsdp_tp_example.py +++ b/distributed/tensor_parallelism/fsdp_tp_example.py @@ -87,7 +87,7 @@ dp_mesh = device_mesh["dp"] # For TP, input needs to be same across all TP ranks. -# while for SP, input can be different across all ranks. +# while for DP, input can be different across all ranks. # We will use dp_rank for setting the random seed # to mimic the behavior of the dataloader. dp_rank = dp_mesh.get_local_rank()