diff --git a/colossalai/communication/__init__.py b/colossalai/legacy/communication/__init__.py
similarity index 53%
rename from colossalai/communication/__init__.py
rename to colossalai/legacy/communication/__init__.py
index 220481b7af15..88ad0487b785 100644
--- a/colossalai/communication/__init__.py
+++ b/colossalai/legacy/communication/__init__.py
@@ -1,9 +1,17 @@
-from .collective import all_gather, reduce_scatter, all_reduce, broadcast, reduce
-from .p2p import (send_forward, send_forward_recv_forward, send_backward_recv_forward, send_backward,
-                  send_backward_recv_backward, send_forward_recv_backward, send_forward_backward_recv_forward_backward,
-                  recv_forward, recv_backward)
+from .collective import all_gather, all_reduce, broadcast, reduce, reduce_scatter
+from .p2p import (
+    recv_backward,
+    recv_forward,
+    send_backward,
+    send_backward_recv_backward,
+    send_backward_recv_forward,
+    send_forward,
+    send_forward_backward_recv_forward_backward,
+    send_forward_recv_backward,
+    send_forward_recv_forward,
+)
 from .ring import ring_forward
-from .utils import send_obj_meta, recv_obj_meta
+from .utils import recv_obj_meta, send_obj_meta
 
 __all__ = [
     'all_gather',
diff --git a/colossalai/communication/collective.py b/colossalai/legacy/communication/collective.py
similarity index 100%
rename from colossalai/communication/collective.py
rename to colossalai/legacy/communication/collective.py
diff --git a/colossalai/communication/p2p.py b/colossalai/legacy/communication/p2p.py
similarity index 100%
rename from colossalai/communication/p2p.py
rename to colossalai/legacy/communication/p2p.py
diff --git a/colossalai/communication/p2p_v2.py b/colossalai/legacy/communication/p2p_v2.py
similarity index 100%
rename from colossalai/communication/p2p_v2.py
rename to colossalai/legacy/communication/p2p_v2.py
diff --git a/colossalai/communication/ring.py b/colossalai/legacy/communication/ring.py
similarity index 100%
rename from colossalai/communication/ring.py
rename to colossalai/legacy/communication/ring.py
diff --git a/colossalai/communication/utils.py b/colossalai/legacy/communication/utils.py
similarity index 100%
rename from colossalai/communication/utils.py
rename to colossalai/legacy/communication/utils.py
diff --git a/colossalai/legacy/engine/schedule/_pipeline_schedule.py b/colossalai/legacy/engine/schedule/_pipeline_schedule.py
index 88b54ce6af0f..4571fd679e8c 100644
--- a/colossalai/legacy/engine/schedule/_pipeline_schedule.py
+++ b/colossalai/legacy/engine/schedule/_pipeline_schedule.py
@@ -6,7 +6,7 @@
 
 import torch.cuda
 
-import colossalai.communication as comm
+import colossalai.legacy.communication as comm
 from colossalai.amp.naive_amp import NaiveAMPModel
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
diff --git a/colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py b/colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py
index 9e7372b675ce..385c615372f5 100644
--- a/colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py
+++ b/colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py
@@ -5,10 +5,10 @@
 
 import torch.cuda
 
-import colossalai.communication.p2p_v2 as comm
-from colossalai import engine
+import colossalai.legacy.communication.p2p_v2 as comm
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
+from colossalai.legacy.engine import Engine
 from colossalai.utils.cuda import get_current_device
 
 from ._pipeline_schedule import PipelineSchedule
@@ -60,7 +60,7 @@ def data_process_func(stage_output, dataloader_output):
     """
 
     def forward_backward_step(self,
-                              engine: engine.Engine,
+                              engine: Engine,
                               data_iter: Iterable,
                               forward_only=False,
                               return_loss=True,
diff --git a/colossalai/legacy/trainer/hooks/_metric_hook.py b/colossalai/legacy/trainer/hooks/_metric_hook.py
index d0598c240181..f1bd19387cb5 100644
--- a/colossalai/legacy/trainer/hooks/_metric_hook.py
+++ b/colossalai/legacy/trainer/hooks/_metric_hook.py
@@ -7,9 +7,9 @@
 import torch
 import torch.distributed as dist
 
-from colossalai.communication import all_reduce
 from colossalai.context import ParallelMode
 from colossalai.core import global_context as gpc
+from colossalai.legacy.communication import all_reduce
 from colossalai.legacy.registry import HOOKS
 from colossalai.utils import get_current_device, is_no_pp_or_last_stage
 
diff --git a/colossalai/nn/layer/parallel_1d/layers.py b/colossalai/nn/layer/parallel_1d/layers.py
index 7b129009e4f0..c0a169c1596f 100644
--- a/colossalai/nn/layer/parallel_1d/layers.py
+++ b/colossalai/nn/layer/parallel_1d/layers.py
@@ -10,11 +10,11 @@
 from torch import Tensor
 from torch.nn.parameter import Parameter
 
-from colossalai.communication import broadcast
 from colossalai.context import ParallelMode, seed
 from colossalai.core import global_context as gpc
 from colossalai.global_variables import tensor_parallel_env as env
 from colossalai.kernel import LayerNorm
+from colossalai.legacy.communication import broadcast
 from colossalai.legacy.registry import LAYERS
 from colossalai.nn import init as init
 from colossalai.utils.checkpointing import (
diff --git a/colossalai/nn/layer/parallel_2d/_operation.py b/colossalai/nn/layer/parallel_2d/_operation.py
index 306577dbd933..fa9b49bcf53f 100644
--- a/colossalai/nn/layer/parallel_2d/_operation.py
+++ b/colossalai/nn/layer/parallel_2d/_operation.py
@@ -2,13 +2,14 @@
 
 import torch
 import torch.distributed as dist
-from colossalai.communication.collective import (all_gather, all_reduce, reduce, reduce_scatter)
-from colossalai.context.parallel_mode import ParallelMode
-from colossalai.core import global_context as gpc
-from colossalai.utils import get_current_device
 from torch import Tensor
 from torch.cuda.amp import custom_bwd, custom_fwd
+
+from colossalai.context.parallel_mode import ParallelMode
+from colossalai.core import global_context as gpc
 from colossalai.global_variables import tensor_parallel_env as env
+from colossalai.legacy.communication.collective import all_gather, all_reduce, reduce, reduce_scatter
+from colossalai.utils import get_current_device
 
 
 def matmul_2d(
@@ -226,9 +227,9 @@ def forward(
         col_group = gpc.get_group(col_parallel_mode)
 
         src_a = summa_dim * row_rank + data_parallel_rank * pipeline_parallel_size * tensor_parallel_size + \
-                pipeline_parallel_rank * tensor_parallel_size
+            pipeline_parallel_rank * tensor_parallel_size
         src_b = col_rank + data_parallel_rank * pipeline_parallel_size * tensor_parallel_size + \
-                pipeline_parallel_rank * tensor_parallel_size
+            pipeline_parallel_rank * tensor_parallel_size
 
         opa = [None] * 2
         opb = [None] * 2
@@ -351,9 +352,9 @@ def forward(
         col_group = gpc.get_group(col_parallel_mode)
 
         src_b = col_rank + data_parallel_rank * pipeline_parallel_size * tensor_parallel_size + \
-                pipeline_parallel_rank * tensor_parallel_size
+            pipeline_parallel_rank * tensor_parallel_size
         src_c = summa_dim * row_rank + data_parallel_rank * pipeline_parallel_size * tensor_parallel_size + \
-                pipeline_parallel_rank * tensor_parallel_size
+            pipeline_parallel_rank * tensor_parallel_size
 
         opb = [None] * 2
         opr = [None] * 2
@@ -484,9 +485,9 @@ def forward(
         col_group = gpc.get_group(col_parallel_mode)
 
         src_a = summa_dim * row_rank + data_parallel_rank * pipeline_parallel_size * tensor_parallel_size + \
-                pipeline_parallel_rank * tensor_parallel_size
+            pipeline_parallel_rank * tensor_parallel_size
         src_c = col_rank + data_parallel_rank * pipeline_parallel_size * tensor_parallel_size + \
-                pipeline_parallel_rank * tensor_parallel_size
+            pipeline_parallel_rank * tensor_parallel_size
 
         opa = [None] * 2
         opr = [None] * 2
diff --git a/colossalai/nn/layer/parallel_2d/layers.py b/colossalai/nn/layer/parallel_2d/layers.py
index 1a01d5437aab..b458d15c78e7 100644
--- a/colossalai/nn/layer/parallel_2d/layers.py
+++ b/colossalai/nn/layer/parallel_2d/layers.py
@@ -8,10 +8,10 @@
 from torch import Tensor
 from torch.nn import Parameter
 
-from colossalai.communication import broadcast
 from colossalai.context import ParallelMode, seed
 from colossalai.core import global_context as gpc
 from colossalai.global_variables import tensor_parallel_env as env
+from colossalai.legacy.communication import broadcast
 from colossalai.legacy.registry import LAYERS
 from colossalai.nn import init as init
 from colossalai.utils.checkpointing import gather_tensor_parallel_state_dict, partition_tensor_parallel_state_dict
diff --git a/colossalai/nn/layer/parallel_2p5d/_operation.py b/colossalai/nn/layer/parallel_2p5d/_operation.py
index 5a0f537cd6d9..55defa4a328d 100644
--- a/colossalai/nn/layer/parallel_2p5d/_operation.py
+++ b/colossalai/nn/layer/parallel_2p5d/_operation.py
@@ -2,12 +2,13 @@
 
 import torch
 import torch.distributed as dist
-from colossalai.communication.collective import (all_gather, all_reduce, reduce_scatter)
+from torch import Tensor
+from torch.cuda.amp import custom_bwd, custom_fwd
+
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
+from colossalai.legacy.communication.collective import all_gather, all_reduce, reduce_scatter
 from colossalai.utils import get_current_device
-from torch import Tensor
-from torch.cuda.amp import custom_bwd, custom_fwd
 
 
 def get_parallel_group(parallel_mode: ParallelMode):
diff --git a/colossalai/nn/layer/parallel_2p5d/layers.py b/colossalai/nn/layer/parallel_2p5d/layers.py
index 62c4292fdfd7..04acc2bb0f4c 100644
--- a/colossalai/nn/layer/parallel_2p5d/layers.py
+++ b/colossalai/nn/layer/parallel_2p5d/layers.py
@@ -8,10 +8,10 @@
 from torch import Tensor
 from torch.nn import Parameter
 
-from colossalai.communication import broadcast
 from colossalai.context import ParallelMode, seed
 from colossalai.core import global_context as gpc
 from colossalai.global_variables import tensor_parallel_env as env
+from colossalai.legacy.communication import broadcast
 from colossalai.legacy.registry import LAYERS
 from colossalai.nn import init as init
 from colossalai.utils.checkpointing import (
diff --git a/colossalai/nn/layer/parallel_3d/_operation.py b/colossalai/nn/layer/parallel_3d/_operation.py
index 5dc9a242851f..ca0b0e62783a 100755
--- a/colossalai/nn/layer/parallel_3d/_operation.py
+++ b/colossalai/nn/layer/parallel_3d/_operation.py
@@ -7,10 +7,10 @@
 from torch import Tensor
 from torch.cuda.amp import custom_bwd, custom_fwd
 
-from colossalai.communication import all_gather, all_reduce, broadcast, reduce, reduce_scatter
 from colossalai.constants import INPUT_GROUP_3D, WEIGHT_GROUP_3D
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
+from colossalai.legacy.communication import all_gather, all_reduce, broadcast, reduce, reduce_scatter
 
 from ._utils import get_parallel_mode_from_env, push_async_grad
 
diff --git a/colossalai/nn/layer/parallel_3d/layers.py b/colossalai/nn/layer/parallel_3d/layers.py
index 7d940aa27564..2861b53013e1 100644
--- a/colossalai/nn/layer/parallel_3d/layers.py
+++ b/colossalai/nn/layer/parallel_3d/layers.py
@@ -8,11 +8,11 @@
 from torch import Tensor
 from torch.nn import Parameter
 
-from colossalai.communication import all_reduce, broadcast
 from colossalai.constants import INPUT_GROUP_3D, INPUT_X_WEIGHT_3D, OUTPUT_GROUP_3D, OUTPUT_X_WEIGHT_3D, WEIGHT_GROUP_3D
 from colossalai.context import ParallelMode, seed
 from colossalai.core import global_context as gpc
 from colossalai.global_variables import tensor_parallel_env as env
+from colossalai.legacy.communication import all_reduce, broadcast
 from colossalai.legacy.registry import LAYERS
 from colossalai.nn import init as init
 from colossalai.nn.layer.base_layer import ParallelLayer
diff --git a/colossalai/nn/layer/parallel_sequence/_operation.py b/colossalai/nn/layer/parallel_sequence/_operation.py
index fc80494224c6..d03102527caa 100644
--- a/colossalai/nn/layer/parallel_sequence/_operation.py
+++ b/colossalai/nn/layer/parallel_sequence/_operation.py
@@ -3,13 +3,13 @@
 
 import torch
 from torch import distributed as dist
+from torch.cuda.amp import custom_bwd, custom_fwd
 
-from colossalai.communication import ring_forward
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
-from colossalai.nn.layer.parallel_sequence._utils import _calc_incoming_device_range, _calc_current_device_range
+from colossalai.legacy.communication import ring_forward
+from colossalai.nn.layer.parallel_sequence._utils import _calc_current_device_range, _calc_incoming_device_range
 from colossalai.utils import get_current_device
-from torch.cuda.amp import custom_bwd, custom_fwd
 
 
 class RingQK(torch.autograd.Function):
diff --git a/tests/test_comm/test_boardcast_send_recv_v2.py b/tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py
similarity index 93%
rename from tests/test_comm/test_boardcast_send_recv_v2.py
rename to tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py
index 253f6f21cd80..c5fb049fe93f 100644
--- a/tests/test_comm/test_boardcast_send_recv_v2.py
+++ b/tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py
@@ -1,10 +1,10 @@
 import pytest
 import torch
 
-from colossalai.communication.p2p_v2 import _recv_object, _send_object
 from colossalai.context import ParallelMode
 from colossalai.core import global_context as gpc
 from colossalai.initialize import launch
+from colossalai.legacy.communication.p2p_v2 import _recv_object, _send_object
 from colossalai.logging import disable_existing_loggers
 from colossalai.testing import rerun_if_address_is_in_use, spawn
 
diff --git a/tests/test_comm/test_comm.py b/tests/test_legacy/test_comm/test_comm.py
similarity index 96%
rename from tests/test_comm/test_comm.py
rename to tests/test_legacy/test_comm/test_comm.py
index 747596bd2ded..3251d8d46f0b 100644
--- a/tests/test_comm/test_comm.py
+++ b/tests/test_legacy/test_comm/test_comm.py
@@ -2,10 +2,10 @@
 import torch
 import torch.distributed as dist
 
-from colossalai.communication import all_gather, all_reduce, reduce_scatter
 from colossalai.context import ParallelMode
 from colossalai.core import global_context as gpc
 from colossalai.initialize import launch
+from colossalai.legacy.communication import all_gather, all_reduce, reduce_scatter
 from colossalai.testing import rerun_if_address_is_in_use, spawn
 from colossalai.utils import get_current_device
 
diff --git a/tests/test_comm/test_object_list_p2p.py b/tests/test_legacy/test_comm/test_object_list_p2p.py
similarity index 98%
rename from tests/test_comm/test_object_list_p2p.py
rename to tests/test_legacy/test_comm/test_object_list_p2p.py
index e9d7630c1543..f50982ee1c2d 100644
--- a/tests/test_comm/test_object_list_p2p.py
+++ b/tests/test_legacy/test_comm/test_object_list_p2p.py
@@ -1,7 +1,10 @@
 import pytest
 import torch
 
-from colossalai.communication.p2p import (
+from colossalai.context import ParallelMode
+from colossalai.core import global_context as gpc
+from colossalai.initialize import launch
+from colossalai.legacy.communication.p2p import (
     recv_backward,
     recv_forward,
     send_backward,
@@ -9,9 +12,6 @@
     send_forward,
     send_forward_recv_backward,
 )
-from colossalai.context import ParallelMode
-from colossalai.core import global_context as gpc
-from colossalai.initialize import launch
 from colossalai.testing import rerun_if_address_is_in_use, spawn
 
 CONFIG = dict(parallel=dict(pipeline=2))
diff --git a/tests/test_comm/test_object_list_p2p_v2.py b/tests/test_legacy/test_comm/test_object_list_p2p_v2.py
similarity index 97%
rename from tests/test_comm/test_object_list_p2p_v2.py
rename to tests/test_legacy/test_comm/test_object_list_p2p_v2.py
index cae38385b6e1..040c63322f2b 100644
--- a/tests/test_comm/test_object_list_p2p_v2.py
+++ b/tests/test_legacy/test_comm/test_object_list_p2p_v2.py
@@ -1,10 +1,10 @@
 import pytest
 import torch
 
-from colossalai.communication.p2p_v2 import recv_backward, recv_forward, send_backward, send_forward
 from colossalai.context import ParallelMode
 from colossalai.core import global_context as gpc
 from colossalai.initialize import launch
+from colossalai.legacy.communication.p2p_v2 import recv_backward, recv_forward, send_backward, send_forward
 from colossalai.logging import disable_existing_loggers
 from colossalai.testing import rerun_if_address_is_in_use, spawn
 
diff --git a/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py b/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py
index 8ad366133d18..5fb678525bb3 100644
--- a/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py
+++ b/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py
@@ -5,7 +5,10 @@
 import torch
 import torch.distributed as dist
 
-from colossalai.communication import (
+from colossalai.context.parallel_mode import ParallelMode
+from colossalai.core import global_context as gpc
+from colossalai.initialize import launch
+from colossalai.legacy.communication import (
     recv_backward,
     recv_forward,
     recv_obj_meta,
@@ -15,9 +18,6 @@
     send_forward_recv_backward,
     send_obj_meta,
 )
-from colossalai.context.parallel_mode import ParallelMode
-from colossalai.core import global_context as gpc
-from colossalai.initialize import launch
 from colossalai.logging import get_dist_logger
 from colossalai.testing import rerun_if_address_is_in_use, spawn
 from colossalai.utils import get_current_device