diff --git a/colossalai/communication/__init__.py b/colossalai/legacy/communication/__init__.py similarity index 53% rename from colossalai/communication/__init__.py rename to colossalai/legacy/communication/__init__.py index 220481b7af15..88ad0487b785 100644 --- a/colossalai/communication/__init__.py +++ b/colossalai/legacy/communication/__init__.py @@ -1,9 +1,17 @@ -from .collective import all_gather, reduce_scatter, all_reduce, broadcast, reduce -from .p2p import (send_forward, send_forward_recv_forward, send_backward_recv_forward, send_backward, - send_backward_recv_backward, send_forward_recv_backward, send_forward_backward_recv_forward_backward, - recv_forward, recv_backward) +from .collective import all_gather, all_reduce, broadcast, reduce, reduce_scatter +from .p2p import ( + recv_backward, + recv_forward, + send_backward, + send_backward_recv_backward, + send_backward_recv_forward, + send_forward, + send_forward_backward_recv_forward_backward, + send_forward_recv_backward, + send_forward_recv_forward, +) from .ring import ring_forward -from .utils import send_obj_meta, recv_obj_meta +from .utils import recv_obj_meta, send_obj_meta __all__ = [ 'all_gather', diff --git a/colossalai/communication/collective.py b/colossalai/legacy/communication/collective.py similarity index 100% rename from colossalai/communication/collective.py rename to colossalai/legacy/communication/collective.py diff --git a/colossalai/communication/p2p.py b/colossalai/legacy/communication/p2p.py similarity index 100% rename from colossalai/communication/p2p.py rename to colossalai/legacy/communication/p2p.py diff --git a/colossalai/communication/p2p_v2.py b/colossalai/legacy/communication/p2p_v2.py similarity index 100% rename from colossalai/communication/p2p_v2.py rename to colossalai/legacy/communication/p2p_v2.py diff --git a/colossalai/communication/ring.py b/colossalai/legacy/communication/ring.py similarity index 100% rename from colossalai/communication/ring.py rename to colossalai/legacy/communication/ring.py diff --git a/colossalai/communication/utils.py b/colossalai/legacy/communication/utils.py similarity index 100% rename from colossalai/communication/utils.py rename to colossalai/legacy/communication/utils.py diff --git a/colossalai/legacy/engine/schedule/_pipeline_schedule.py b/colossalai/legacy/engine/schedule/_pipeline_schedule.py index 88b54ce6af0f..4571fd679e8c 100644 --- a/colossalai/legacy/engine/schedule/_pipeline_schedule.py +++ b/colossalai/legacy/engine/schedule/_pipeline_schedule.py @@ -6,7 +6,7 @@ import torch.cuda -import colossalai.communication as comm +import colossalai.legacy.communication as comm from colossalai.amp.naive_amp import NaiveAMPModel from colossalai.context.parallel_mode import ParallelMode from colossalai.core import global_context as gpc diff --git a/colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py b/colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py index 9e7372b675ce..385c615372f5 100644 --- a/colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py +++ b/colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py @@ -5,10 +5,10 @@ import torch.cuda -import colossalai.communication.p2p_v2 as comm -from colossalai import engine +import colossalai.legacy.communication.p2p_v2 as comm from colossalai.context.parallel_mode import ParallelMode from colossalai.core import global_context as gpc +from colossalai.legacy.engine import Engine from colossalai.utils.cuda import get_current_device from ._pipeline_schedule import PipelineSchedule @@ -60,7 +60,7 @@ def data_process_func(stage_output, dataloader_output): """ def forward_backward_step(self, - engine: engine.Engine, + engine: Engine, data_iter: Iterable, forward_only=False, return_loss=True, diff --git a/colossalai/legacy/trainer/hooks/_metric_hook.py b/colossalai/legacy/trainer/hooks/_metric_hook.py index d0598c240181..f1bd19387cb5 100644 --- a/colossalai/legacy/trainer/hooks/_metric_hook.py +++ b/colossalai/legacy/trainer/hooks/_metric_hook.py @@ -7,9 +7,9 @@ import torch import torch.distributed as dist -from colossalai.communication import all_reduce from colossalai.context import ParallelMode from colossalai.core import global_context as gpc +from colossalai.legacy.communication import all_reduce from colossalai.legacy.registry import HOOKS from colossalai.utils import get_current_device, is_no_pp_or_last_stage diff --git a/colossalai/nn/layer/parallel_1d/layers.py b/colossalai/nn/layer/parallel_1d/layers.py index 7b129009e4f0..c0a169c1596f 100644 --- a/colossalai/nn/layer/parallel_1d/layers.py +++ b/colossalai/nn/layer/parallel_1d/layers.py @@ -10,11 +10,11 @@ from torch import Tensor from torch.nn.parameter import Parameter -from colossalai.communication import broadcast from colossalai.context import ParallelMode, seed from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env from colossalai.kernel import LayerNorm +from colossalai.legacy.communication import broadcast from colossalai.legacy.registry import LAYERS from colossalai.nn import init as init from colossalai.utils.checkpointing import ( diff --git a/colossalai/nn/layer/parallel_2d/_operation.py b/colossalai/nn/layer/parallel_2d/_operation.py index 306577dbd933..fa9b49bcf53f 100644 --- a/colossalai/nn/layer/parallel_2d/_operation.py +++ b/colossalai/nn/layer/parallel_2d/_operation.py @@ -2,13 +2,14 @@ import torch import torch.distributed as dist -from colossalai.communication.collective import (all_gather, all_reduce, reduce, reduce_scatter) -from colossalai.context.parallel_mode import ParallelMode -from colossalai.core import global_context as gpc -from colossalai.utils import get_current_device from torch import Tensor from torch.cuda.amp import custom_bwd, custom_fwd + +from colossalai.context.parallel_mode import ParallelMode +from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env +from colossalai.legacy.communication.collective import all_gather, all_reduce, reduce, reduce_scatter +from colossalai.utils import get_current_device def matmul_2d( @@ -226,9 +227,9 @@ def forward( col_group = gpc.get_group(col_parallel_mode) src_a = summa_dim * row_rank + data_parallel_rank * pipeline_parallel_size * tensor_parallel_size + \ - pipeline_parallel_rank * tensor_parallel_size + pipeline_parallel_rank * tensor_parallel_size src_b = col_rank + data_parallel_rank * pipeline_parallel_size * tensor_parallel_size + \ - pipeline_parallel_rank * tensor_parallel_size + pipeline_parallel_rank * tensor_parallel_size opa = [None] * 2 opb = [None] * 2 @@ -351,9 +352,9 @@ def forward( col_group = gpc.get_group(col_parallel_mode) src_b = col_rank + data_parallel_rank * pipeline_parallel_size * tensor_parallel_size + \ - pipeline_parallel_rank * tensor_parallel_size + pipeline_parallel_rank * tensor_parallel_size src_c = summa_dim * row_rank + data_parallel_rank * pipeline_parallel_size * tensor_parallel_size + \ - pipeline_parallel_rank * tensor_parallel_size + pipeline_parallel_rank * tensor_parallel_size opb = [None] * 2 opr = [None] * 2 @@ -484,9 +485,9 @@ def forward( col_group = gpc.get_group(col_parallel_mode) src_a = summa_dim * row_rank + data_parallel_rank * pipeline_parallel_size * tensor_parallel_size + \ - pipeline_parallel_rank * tensor_parallel_size + pipeline_parallel_rank * tensor_parallel_size src_c = col_rank + data_parallel_rank * pipeline_parallel_size * tensor_parallel_size + \ - pipeline_parallel_rank * tensor_parallel_size + pipeline_parallel_rank * tensor_parallel_size opa = [None] * 2 opr = [None] * 2 diff --git a/colossalai/nn/layer/parallel_2d/layers.py b/colossalai/nn/layer/parallel_2d/layers.py index 1a01d5437aab..b458d15c78e7 100644 --- a/colossalai/nn/layer/parallel_2d/layers.py +++ b/colossalai/nn/layer/parallel_2d/layers.py @@ -8,10 +8,10 @@ from torch import Tensor from torch.nn import Parameter -from colossalai.communication import broadcast from colossalai.context import ParallelMode, seed from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env +from colossalai.legacy.communication import broadcast from colossalai.legacy.registry import LAYERS from colossalai.nn import init as init from colossalai.utils.checkpointing import gather_tensor_parallel_state_dict, partition_tensor_parallel_state_dict diff --git a/colossalai/nn/layer/parallel_2p5d/_operation.py b/colossalai/nn/layer/parallel_2p5d/_operation.py index 5a0f537cd6d9..55defa4a328d 100644 --- a/colossalai/nn/layer/parallel_2p5d/_operation.py +++ b/colossalai/nn/layer/parallel_2p5d/_operation.py @@ -2,12 +2,13 @@ import torch import torch.distributed as dist -from colossalai.communication.collective import (all_gather, all_reduce, reduce_scatter) +from torch import Tensor +from torch.cuda.amp import custom_bwd, custom_fwd + from colossalai.context.parallel_mode import ParallelMode from colossalai.core import global_context as gpc +from colossalai.legacy.communication.collective import all_gather, all_reduce, reduce_scatter from colossalai.utils import get_current_device -from torch import Tensor -from torch.cuda.amp import custom_bwd, custom_fwd def get_parallel_group(parallel_mode: ParallelMode): diff --git a/colossalai/nn/layer/parallel_2p5d/layers.py b/colossalai/nn/layer/parallel_2p5d/layers.py index 62c4292fdfd7..04acc2bb0f4c 100644 --- a/colossalai/nn/layer/parallel_2p5d/layers.py +++ b/colossalai/nn/layer/parallel_2p5d/layers.py @@ -8,10 +8,10 @@ from torch import Tensor from torch.nn import Parameter -from colossalai.communication import broadcast from colossalai.context import ParallelMode, seed from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env +from colossalai.legacy.communication import broadcast from colossalai.legacy.registry import LAYERS from colossalai.nn import init as init from colossalai.utils.checkpointing import ( diff --git a/colossalai/nn/layer/parallel_3d/_operation.py b/colossalai/nn/layer/parallel_3d/_operation.py index 5dc9a242851f..ca0b0e62783a 100755 --- a/colossalai/nn/layer/parallel_3d/_operation.py +++ b/colossalai/nn/layer/parallel_3d/_operation.py @@ -7,10 +7,10 @@ from torch import Tensor from torch.cuda.amp import custom_bwd, custom_fwd -from colossalai.communication import all_gather, all_reduce, broadcast, reduce, reduce_scatter from colossalai.constants import INPUT_GROUP_3D, WEIGHT_GROUP_3D from colossalai.context.parallel_mode import ParallelMode from colossalai.core import global_context as gpc +from colossalai.legacy.communication import all_gather, all_reduce, broadcast, reduce, reduce_scatter from ._utils import get_parallel_mode_from_env, push_async_grad diff --git a/colossalai/nn/layer/parallel_3d/layers.py b/colossalai/nn/layer/parallel_3d/layers.py index 7d940aa27564..2861b53013e1 100644 --- a/colossalai/nn/layer/parallel_3d/layers.py +++ b/colossalai/nn/layer/parallel_3d/layers.py @@ -8,11 +8,11 @@ from torch import Tensor from torch.nn import Parameter -from colossalai.communication import all_reduce, broadcast from colossalai.constants import INPUT_GROUP_3D, INPUT_X_WEIGHT_3D, OUTPUT_GROUP_3D, OUTPUT_X_WEIGHT_3D, WEIGHT_GROUP_3D from colossalai.context import ParallelMode, seed from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env +from colossalai.legacy.communication import all_reduce, broadcast from colossalai.legacy.registry import LAYERS from colossalai.nn import init as init from colossalai.nn.layer.base_layer import ParallelLayer diff --git a/colossalai/nn/layer/parallel_sequence/_operation.py b/colossalai/nn/layer/parallel_sequence/_operation.py index fc80494224c6..d03102527caa 100644 --- a/colossalai/nn/layer/parallel_sequence/_operation.py +++ b/colossalai/nn/layer/parallel_sequence/_operation.py @@ -3,13 +3,13 @@ import torch from torch import distributed as dist +from torch.cuda.amp import custom_bwd, custom_fwd -from colossalai.communication import ring_forward from colossalai.context.parallel_mode import ParallelMode from colossalai.core import global_context as gpc -from colossalai.nn.layer.parallel_sequence._utils import _calc_incoming_device_range, _calc_current_device_range +from colossalai.legacy.communication import ring_forward +from colossalai.nn.layer.parallel_sequence._utils import _calc_current_device_range, _calc_incoming_device_range from colossalai.utils import get_current_device -from torch.cuda.amp import custom_bwd, custom_fwd class RingQK(torch.autograd.Function): diff --git a/tests/test_comm/test_boardcast_send_recv_v2.py b/tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py similarity index 93% rename from tests/test_comm/test_boardcast_send_recv_v2.py rename to tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py index 253f6f21cd80..c5fb049fe93f 100644 --- a/tests/test_comm/test_boardcast_send_recv_v2.py +++ b/tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py @@ -1,10 +1,10 @@ import pytest import torch -from colossalai.communication.p2p_v2 import _recv_object, _send_object from colossalai.context import ParallelMode from colossalai.core import global_context as gpc from colossalai.initialize import launch +from colossalai.legacy.communication.p2p_v2 import _recv_object, _send_object from colossalai.logging import disable_existing_loggers from colossalai.testing import rerun_if_address_is_in_use, spawn diff --git a/tests/test_comm/test_comm.py b/tests/test_legacy/test_comm/test_comm.py similarity index 96% rename from tests/test_comm/test_comm.py rename to tests/test_legacy/test_comm/test_comm.py index 747596bd2ded..3251d8d46f0b 100644 --- a/tests/test_comm/test_comm.py +++ b/tests/test_legacy/test_comm/test_comm.py @@ -2,10 +2,10 @@ import torch import torch.distributed as dist -from colossalai.communication import all_gather, all_reduce, reduce_scatter from colossalai.context import ParallelMode from colossalai.core import global_context as gpc from colossalai.initialize import launch +from colossalai.legacy.communication import all_gather, all_reduce, reduce_scatter from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.utils import get_current_device diff --git a/tests/test_comm/test_object_list_p2p.py b/tests/test_legacy/test_comm/test_object_list_p2p.py similarity index 98% rename from tests/test_comm/test_object_list_p2p.py rename to tests/test_legacy/test_comm/test_object_list_p2p.py index e9d7630c1543..f50982ee1c2d 100644 --- a/tests/test_comm/test_object_list_p2p.py +++ b/tests/test_legacy/test_comm/test_object_list_p2p.py @@ -1,7 +1,10 @@ import pytest import torch -from colossalai.communication.p2p import ( +from colossalai.context import ParallelMode +from colossalai.core import global_context as gpc +from colossalai.initialize import launch +from colossalai.legacy.communication.p2p import ( recv_backward, recv_forward, send_backward, @@ -9,9 +12,6 @@ send_forward, send_forward_recv_backward, ) -from colossalai.context import ParallelMode -from colossalai.core import global_context as gpc -from colossalai.initialize import launch from colossalai.testing import rerun_if_address_is_in_use, spawn CONFIG = dict(parallel=dict(pipeline=2)) diff --git a/tests/test_comm/test_object_list_p2p_v2.py b/tests/test_legacy/test_comm/test_object_list_p2p_v2.py similarity index 97% rename from tests/test_comm/test_object_list_p2p_v2.py rename to tests/test_legacy/test_comm/test_object_list_p2p_v2.py index cae38385b6e1..040c63322f2b 100644 --- a/tests/test_comm/test_object_list_p2p_v2.py +++ b/tests/test_legacy/test_comm/test_object_list_p2p_v2.py @@ -1,10 +1,10 @@ import pytest import torch -from colossalai.communication.p2p_v2 import recv_backward, recv_forward, send_backward, send_forward from colossalai.context import ParallelMode from colossalai.core import global_context as gpc from colossalai.initialize import launch +from colossalai.legacy.communication.p2p_v2 import recv_backward, recv_forward, send_backward, send_forward from colossalai.logging import disable_existing_loggers from colossalai.testing import rerun_if_address_is_in_use, spawn diff --git a/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py b/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py index 8ad366133d18..5fb678525bb3 100644 --- a/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py +++ b/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py @@ -5,7 +5,10 @@ import torch import torch.distributed as dist -from colossalai.communication import ( +from colossalai.context.parallel_mode import ParallelMode +from colossalai.core import global_context as gpc +from colossalai.initialize import launch +from colossalai.legacy.communication import ( recv_backward, recv_forward, recv_obj_meta, @@ -15,9 +18,6 @@ send_forward_recv_backward, send_obj_meta, ) -from colossalai.context.parallel_mode import ParallelMode -from colossalai.core import global_context as gpc -from colossalai.initialize import launch from colossalai.logging import get_dist_logger from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.utils import get_current_device