diff --git a/configs/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb.py b/configs/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb.py
new file mode 100644
index 0000000000..3dfa6726e8
--- /dev/null
+++ b/configs/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb.py
@@ -0,0 +1,123 @@
+# model settings
+model = dict(
+    type='Recognizer3D',
+    backbone=dict(
+        type='ResNet3dCSN',
+        pretrained2d=False,
+        pretrained=  # noqa: E251
+        'https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/csn/ircsn_from_scratch_r152_ig65m_20200807-771c4135.pth',  # noqa: E501
+        depth=152,
+        with_pool2=False,
+        bottleneck_mode='ir',
+        norm_eval=True,
+        bn_frozen=True,
+        zero_init_residual=False),
+    cls_head=dict(
+        type='I3DHead',
+        num_classes=400,
+        in_channels=2048,
+        spatial_type='avg',
+        dropout_ratio=0.5,
+        init_std=0.01))
+# model training and testing settings
+train_cfg = None
+test_cfg = dict(average_clips=None)
+# dataset settings
+dataset_type = 'RawframeDataset'
+data_root = 'data/kinetics400/rawframes_train'
+data_root_val = 'data/kinetics400/rawframes_val'
+ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt'
+ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt'
+ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
+train_pipeline = [
+    dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1),
+    dict(type='FrameSelector'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(type='RandomResizedCrop'),
+    dict(type='Resize', scale=(224, 224), keep_ratio=False),
+    dict(type='Flip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs', 'label'])
+]
+val_pipeline = [
+    dict(
+        type='SampleFrames',
+        clip_len=32,
+        frame_interval=2,
+        num_clips=1,
+        test_mode=True),
+    dict(type='FrameSelector'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(type='CenterCrop', crop_size=224),
+    dict(type='Flip', flip_ratio=0),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs'])
+]
+test_pipeline = [
+    dict(
+        type='SampleFrames',
+        clip_len=32,
+        frame_interval=2,
+        num_clips=10,
+        test_mode=True),
+    dict(type='FrameSelector'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(type='ThreeCrop', crop_size=256),
+    dict(type='Flip', flip_ratio=0),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs'])
+]
+data = dict(
+    videos_per_gpu=3,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=ann_file_train,
+        data_prefix=data_root,
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=ann_file_val,
+        data_prefix=data_root_val,
+        pipeline=val_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=ann_file_val,
+        data_prefix=data_root_val,
+        pipeline=test_pipeline))
+# optimizer
+optimizer = dict(
+    type='SGD', lr=0.0005, momentum=0.9, weight_decay=0.0001)  # 0.0005 for 32g
+optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    step=[32, 48],
+    warmup='linear',
+    warmup_ratio=0.1,
+    warmup_by_epoch=True,
+    warmup_iters=16)
+total_epochs = 58
+checkpoint_config = dict(interval=2)
+evaluation = dict(
+    interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'], topk=(1, 5))
+log_config = dict(
+    interval=20,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+# runtime settings
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb'  # noqa: E501
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+find_unused_parameters = True
diff --git a/configs/recognition/csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py b/configs/recognition/csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py
new file mode 100644
index 0000000000..3eb832dd49
--- /dev/null
+++ b/configs/recognition/csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py
@@ -0,0 +1,121 @@
+# model settings
+model = dict(
+    type='Recognizer3D',
+    backbone=dict(
+        type='ResNet3dCSN',
+        pretrained2d=False,
+        pretrained=  # noqa: E251
+        'https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/csn/ircsn_from_scratch_r152_ig65m_20200807-771c4135.pth',  # noqa: E501
+        depth=152,
+        with_pool2=False,
+        bottleneck_mode='ir',
+        norm_eval=False,
+        zero_init_residual=False),
+    cls_head=dict(
+        type='I3DHead',
+        num_classes=400,
+        in_channels=2048,
+        spatial_type='avg',
+        dropout_ratio=0.5,
+        init_std=0.01))
+# model training and testing settings
+train_cfg = None
+test_cfg = dict(average_clips=None)
+# dataset settings
+dataset_type = 'RawframeDataset'
+data_root = 'data/kinetics400/rawframes_train'
+data_root_val = 'data/kinetics400/rawframes_val'
+ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt'
+ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt'
+ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
+train_pipeline = [
+    dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1),
+    dict(type='FrameSelector'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(type='RandomResizedCrop'),
+    dict(type='Resize', scale=(224, 224), keep_ratio=False),
+    dict(type='Flip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs', 'label'])
+]
+val_pipeline = [
+    dict(
+        type='SampleFrames',
+        clip_len=32,
+        frame_interval=2,
+        num_clips=1,
+        test_mode=True),
+    dict(type='FrameSelector'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(type='CenterCrop', crop_size=224),
+    dict(type='Flip', flip_ratio=0),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs'])
+]
+test_pipeline = [
+    dict(
+        type='SampleFrames',
+        clip_len=32,
+        frame_interval=2,
+        num_clips=10,
+        test_mode=True),
+    dict(type='FrameSelector'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(type='ThreeCrop', crop_size=256),
+    dict(type='Flip', flip_ratio=0),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs'])
+]
+data = dict(
+    videos_per_gpu=3,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=ann_file_train,
+        data_prefix=data_root,
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=ann_file_val,
+        data_prefix=data_root_val,
+        pipeline=val_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=ann_file_val,
+        data_prefix=data_root_val,
+        pipeline=test_pipeline))
+# optimizer
+optimizer = dict(
+    type='SGD', lr=0.0005, momentum=0.9, weight_decay=0.0001)  # 0.0005 for 32g
+optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    step=[32, 48],
+    warmup='linear',
+    warmup_ratio=0.1,
+    warmup_by_epoch=True,
+    warmup_iters=16)
+total_epochs = 58
+checkpoint_config = dict(interval=2)
+evaluation = dict(
+    interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'], topk=(1, 5))
+log_config = dict(
+    interval=20,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+# runtime settings
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/mmaction/models/__init__.py b/mmaction/models/__init__.py
index 1b4be9e983..5726187f56 100644
--- a/mmaction/models/__init__.py
+++ b/mmaction/models/__init__.py
@@ -1,5 +1,5 @@
-from .backbones import (ResNet, ResNet2Plus1d, ResNet3d, ResNet3dSlowFast,
-                        ResNet3dSlowOnly, ResNetTSM)
+from .backbones import (ResNet, ResNet2Plus1d, ResNet3d, ResNet3dCSN,
+                        ResNet3dSlowFast, ResNet3dSlowOnly, ResNetTSM)
 from .builder import (build_backbone, build_head, build_localizer, build_model,
                       build_recognizer)
 from .common import Conv2plus1d
@@ -18,5 +18,5 @@
     'ResNet3dSlowFast', 'SlowFastHead', 'Conv2plus1d', 'ResNet3dSlowOnly',
     'BCELossWithLogits', 'LOCALIZERS', 'build_localizer', 'PEM', 'TEM',
     'BinaryLogisticRegressionLoss', 'BMN', 'BMNLoss', 'build_model',
-    'OHEMHingeLoss', 'SSNLoss'
+    'OHEMHingeLoss', 'SSNLoss', 'ResNet3dCSN'
 ]
diff --git a/mmaction/models/backbones/__init__.py b/mmaction/models/backbones/__init__.py
index cf703d8dda..1e27f2ee6c 100644
--- a/mmaction/models/backbones/__init__.py
+++ b/mmaction/models/backbones/__init__.py
@@ -1,11 +1,12 @@
 from .resnet import ResNet
 from .resnet2plus1d import ResNet2Plus1d
 from .resnet3d import ResNet3d
+from .resnet3d_csn import ResNet3dCSN
 from .resnet3d_slowfast import ResNet3dSlowFast
 from .resnet3d_slowonly import ResNet3dSlowOnly
 from .resnet_tsm import ResNetTSM
 
 __all__ = [
     'ResNet', 'ResNet3d', 'ResNetTSM', 'ResNet2Plus1d', 'ResNet3dSlowFast',
-    'ResNet3dSlowOnly'
+    'ResNet3dSlowOnly', 'ResNet3dCSN'
 ]
diff --git a/mmaction/models/backbones/resnet3d.py b/mmaction/models/backbones/resnet3d.py
index 682c140a77..1d95a95df1 100644
--- a/mmaction/models/backbones/resnet3d.py
+++ b/mmaction/models/backbones/resnet3d.py
@@ -368,8 +368,10 @@ class ResNet3d(nn.Module):
         non_local (Sequence[int]): Determine whether to apply non-local module
             in the corresponding block of each stages. Default: (0, 0, 0, 0).
         non_local_cfg (dict): Config for non-local module. Default: ``dict()``.
-        zero_init_residual (bool): Whether to use zero initialization for
-            residual block, Default: True.
+        zero_init_residual (bool):
+            Whether to use zero initialization for residual block,
+            Default: True.
+        kwargs (dict, optional): Key arguments for "make_res_layer".
     """
 
     arch_settings = {
@@ -405,7 +407,8 @@ def __init__(self,
                  with_cp=False,
                  non_local=(0, 0, 0, 0),
                  non_local_cfg=dict(),
-                 zero_init_residual=True):
+                 zero_init_residual=True,
+                 **kwargs):
         super().__init__()
         if depth not in self.arch_settings:
             raise KeyError(f'invalid depth {depth} for resnet')
@@ -467,7 +470,8 @@ def __init__(self,
                 non_local_cfg=self.non_local_cfg,
                 inflate=self.stage_inflations[i],
                 inflate_style=self.inflate_style,
-                with_cp=with_cp)
+                with_cp=with_cp,
+                **kwargs)
             self.inplanes = planes * self.block.expansion
             layer_name = f'layer{i + 1}'
             self.add_module(layer_name, res_layer)
@@ -492,7 +496,8 @@ def make_res_layer(self,
                        norm_cfg=None,
                        act_cfg=None,
                        conv_cfg=None,
-                       with_cp=False):
+                       with_cp=False,
+                       **kwargs):
         """Build residual layer for ResNet3D.
 
         Args:
@@ -565,7 +570,8 @@ def make_res_layer(self,
                 norm_cfg=norm_cfg,
                 conv_cfg=conv_cfg,
                 act_cfg=act_cfg,
-                with_cp=with_cp))
+                with_cp=with_cp,
+                **kwargs))
         inplanes = planes * block.expansion
         for i in range(1, blocks):
             layers.append(
@@ -583,7 +589,8 @@ def make_res_layer(self,
                     norm_cfg=norm_cfg,
                     conv_cfg=conv_cfg,
                     act_cfg=act_cfg,
-                    with_cp=with_cp))
+                    with_cp=with_cp,
+                    **kwargs))
 
         return nn.Sequential(*layers)
 
diff --git a/mmaction/models/backbones/resnet3d_csn.py b/mmaction/models/backbones/resnet3d_csn.py
new file mode 100644
index 0000000000..d34683f404
--- /dev/null
+++ b/mmaction/models/backbones/resnet3d_csn.py
@@ -0,0 +1,148 @@
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+from mmcv.utils import _BatchNorm
+
+from ..registry import BACKBONES
+from .resnet3d import Bottleneck3d, ResNet3d
+
+
+class CSNBottleneck3d(Bottleneck3d):
+    """Channel-Separated Bottleneck Block.
+
+    This module is proposed in
+    "Video Classification with Channel-Separated Convolutional Networks"
+    Link: https://arxiv.org/pdf/1711.11248.pdf
+
+    Args:
+        inplanes (int): Number of channels for the input in first conv3d layer.
+        planes (int): Number of channels produced by some norm/conv3d layers.
+        bottleneck_mode (str): Determine which ways to factorize a 3D
+            bottleneck block using channel-separated convolutional networks.
+                If set to 'ip', it will replace the 3x3x3 conv2 layer with a
+                1x1x1 traditional convolution and a 3x3x3 depthwise
+                convolution, i.e., Interaction-preserved channel-separated
+                bottleneck block.
+                If set to 'ir', it will replace the 3x3x3 conv2 layer with a
+                3x3x3 depthwise convolution, which is derived from preserved
+                bottleneck block by removing the extra 1x1x1 convolution,
+                i.e., Interaction-reduced channel-separated bottleneck block.
+            Default: 'ir'.
+        args (position arguments): Position arguments for Bottleneck.
+        kwargs (dict, optional): Keyword arguments for Bottleneck.
+    """
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 *args,
+                 bottleneck_mode='ir',
+                 **kwargs):
+        super(CSNBottleneck3d, self).__init__(inplanes, planes, *args,
+                                              **kwargs)
+        self.bottleneck_mode = bottleneck_mode
+        conv2 = []
+        if self.bottleneck_mode == 'ip':
+            conv2.append(
+                nn.Conv3d(planes, planes, kernel_size=1, stride=1, bias=False))
+        conv2_kernel_size = self.conv2.conv.kernel_size
+        conv2_stride = self.conv2.conv.stride
+        conv2_padding = self.conv2.conv.padding
+        conv2_dilation = self.conv2.conv.dilation
+        conv2_bias = True if self.conv2.conv.bias else False
+        self.conv2 = ConvModule(
+            planes,
+            planes,
+            conv2_kernel_size,
+            stride=conv2_stride,
+            padding=conv2_padding,
+            dilation=conv2_dilation,
+            bias=conv2_bias,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            groups=planes)
+        conv2.append(self.conv2)
+        self.conv2 = nn.Sequential(*conv2)
+
+
+@BACKBONES.register_module()
+class ResNet3dCSN(ResNet3d):
+    """ResNet backbone for CSN.
+
+    Args:
+        depth (int): Depth of ResNetCSN, from {18, 34, 50, 101, 152}.
+        pretrained (str | None): Name of pretrained model.
+        temporal_strides (tuple[int]):
+            Temporal strides of residual blocks of each stage.
+            Default: (1, 2, 2, 2).
+        conv1_kernel (tuple[int]): Kernel size of the first conv layer.
+            Default: (3, 7, 7).
+        conv1_stride_t (int): Temporal stride of the first conv layer.
+            Default: 1.
+        pool1_stride_t (int): Temporal stride of the first pooling layer.
+            Default: 1.
+        norm_cfg (dict): Config for norm layers. required keys are `type` and
+            `requires_grad`.
+            Default: dict(type='BN3d', requires_grad=True, eps=1e-3).
+        inflate_style (str): `3x1x1` or `1x1x1`. which determines the kernel
+            sizes and padding strides for conv1 and conv2 in each block.
+            Default: '3x3x3'.
+        bottleneck_mode (str): Determine which ways to factorize a 3D
+            bottleneck block using channel-separated convolutional networks.
+                If set to 'ip', it will replace the 3x3x3 conv2 layer with a
+                1x1x1 traditional convolution and a 3x3x3 depthwise
+                convolution, i.e., Interaction-preserved channel-separated
+                bottleneck block.
+                If set to 'ir', it will replace the 3x3x3 conv2 layer with a
+                3x3x3 depthwise convolution, which is derived from preserved
+                bottleneck block by removing the extra 1x1x1 convolution,
+                i.e., Interaction-reduced channel-separated bottleneck block.
+            Default: 'ip'.
+        kwargs (dict, optional): Key arguments for "make_res_layer".
+    """
+
+    def __init__(self,
+                 depth,
+                 pretrained,
+                 temporal_strides=(1, 2, 2, 2),
+                 conv1_kernel=(3, 7, 7),
+                 conv1_stride_t=1,
+                 pool1_stride_t=1,
+                 norm_cfg=dict(type='BN3d', requires_grad=True, eps=1e-3),
+                 inflate_style='3x3x3',
+                 bottleneck_mode='ir',
+                 bn_frozen=False,
+                 **kwargs):
+        self.arch_settings = {
+            # 18: (BasicBlock3d, (2, 2, 2, 2)),
+            # 34: (BasicBlock3d, (3, 4, 6, 3)),
+            50: (CSNBottleneck3d, (3, 4, 6, 3)),
+            101: (CSNBottleneck3d, (3, 4, 23, 3)),
+            152: (CSNBottleneck3d, (3, 8, 36, 3))
+        }
+        self.bn_frozen = bn_frozen
+        if bottleneck_mode not in ['ip', 'ir']:
+            raise ValueError(f'Bottleneck mode must be "ip" or "ir",'
+                             f'but got {bottleneck_mode}.')
+        super(ResNet3dCSN, self).__init__(
+            depth,
+            pretrained,
+            temporal_strides=temporal_strides,
+            conv1_kernel=conv1_kernel,
+            conv1_stride_t=conv1_stride_t,
+            pool1_stride_t=pool1_stride_t,
+            norm_cfg=norm_cfg,
+            inflate_style=inflate_style,
+            bottleneck_mode=bottleneck_mode,
+            **kwargs)
+
+    def train(self, mode=True):
+        super(ResNet3d, self).train()
+        self._freeze_stages()
+        if mode and self.norm_eval:
+            for m in self.modules():
+                if isinstance(m, _BatchNorm):
+                    m.eval()
+                    if self.bn_frozen:
+                        for param in m.parameters():
+                            param.requires_grad = False
diff --git a/tests/test_models/test_backbone.py b/tests/test_models/test_backbone.py
index 79f835c06d..6946db23b9 100644
--- a/tests/test_models/test_backbone.py
+++ b/tests/test_models/test_backbone.py
@@ -6,8 +6,8 @@
 import torch.nn as nn
 from mmcv.utils import _BatchNorm
 
-from mmaction.models import (ResNet, ResNet2Plus1d, ResNet3d, ResNet3dSlowFast,
-                             ResNet3dSlowOnly, ResNetTSM)
+from mmaction.models import (ResNet, ResNet2Plus1d, ResNet3d, ResNet3dCSN,
+                             ResNet3dSlowFast, ResNet3dSlowOnly, ResNetTSM)
 from mmaction.models.backbones.resnet_tsm import NL3DWrapper
 
 
@@ -736,6 +736,68 @@ def test_slowonly_backbone():
     assert feat.shape == torch.Size([1, 2048, 8, 2, 2])
 
 
+def test_resnet_csn_backbone():
+    """Test resnet_csn backbone."""
+    with pytest.raises(ValueError):
+        # Bottleneck mode must be "ip" or "ir"
+        ResNet3dCSN(152, None, bottleneck_mode='id')
+
+    input_shape = (2, 3, 6, 64, 64)
+    imgs = _demo_inputs(input_shape)
+
+    resnet3d_csn_frozen = ResNet3dCSN(
+        152, None, bn_frozen=True, norm_eval=True)
+    resnet3d_csn_frozen.train()
+    for m in resnet3d_csn_frozen.modules():
+        if isinstance(m, _BatchNorm):
+            for param in m.parameters():
+                assert param.requires_grad is False
+
+    # Interaction-preserved channel-separated bottleneck block
+    resnet3d_csn_ip = ResNet3dCSN(152, None, bottleneck_mode='ip')
+    resnet3d_csn_ip.init_weights()
+    resnet3d_csn_ip.train()
+    for i, layer_name in enumerate(resnet3d_csn_ip.res_layers):
+        layers = getattr(resnet3d_csn_ip, layer_name)
+        num_blocks = resnet3d_csn_ip.stage_blocks[i]
+        assert len(layers) == num_blocks
+        for layer in layers:
+            assert isinstance(layer.conv2, nn.Sequential)
+            assert len(layer.conv2) == 2
+            assert layer.conv2[1].groups == layer.planes
+    if torch.__version__ == 'parrots':
+        if torch.cuda.is_available():
+            resnet3d_csn_ip = resnet3d_csn_ip.cuda()
+            imgs_gpu = imgs.cuda()
+            feat = resnet3d_csn_ip(imgs_gpu)
+            assert feat.shape == torch.Size([2, 2048, 1, 2, 2])
+    else:
+        feat = resnet3d_csn_ip(imgs)
+        assert feat.shape == torch.Size([2, 2048, 1, 2, 2])
+
+    # Interaction-reduced channel-separated bottleneck block
+    resnet3d_csn_ir = ResNet3dCSN(152, None, bottleneck_mode='ir')
+    resnet3d_csn_ir.init_weights()
+    resnet3d_csn_ir.train()
+    for i, layer_name in enumerate(resnet3d_csn_ir.res_layers):
+        layers = getattr(resnet3d_csn_ir, layer_name)
+        num_blocks = resnet3d_csn_ir.stage_blocks[i]
+        assert len(layers) == num_blocks
+        for layer in layers:
+            assert isinstance(layer.conv2, nn.Sequential)
+            assert len(layer.conv2) == 1
+            assert layer.conv2[0].groups == layer.planes
+    if torch.__version__ == 'parrots':
+        if torch.cuda.is_available():
+            resnet3d_csn_ir = resnet3d_csn_ir.cuda()
+            imgs_gpu = imgs.cuda()
+            feat = resnet3d_csn_ir(imgs_gpu)
+            assert feat.shape == torch.Size([2, 2048, 1, 2, 2])
+    else:
+        feat = resnet3d_csn_ir(imgs)
+        assert feat.shape == torch.Size([2, 2048, 1, 2, 2])
+
+
 def _demo_inputs(input_shape=(1, 3, 64, 64)):
     """Create a superset of inputs needed to run backbone.
 
diff --git a/tests/test_models/test_recognizers.py b/tests/test_models/test_recognizers.py
index 96a730745e..8ce86d6ec3 100644
--- a/tests/test_models/test_recognizers.py
+++ b/tests/test_models/test_recognizers.py
@@ -242,6 +242,46 @@ def test_tsm():
             recognizer(one_img, None, return_loss=False)
 
 
+def test_csn():
+    model, train_cfg, test_cfg = _get_recognizer_cfg(
+        'csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py')
+    model['backbone']['pretrained2d'] = False
+    model['backbone']['pretrained'] = None
+
+    recognizer = build_recognizer(
+        model, train_cfg=train_cfg, test_cfg=test_cfg)
+
+    input_shape = (1, 3, 3, 8, 32, 32)
+    demo_inputs = generate_demo_inputs(input_shape, '3D')
+
+    imgs = demo_inputs['imgs']
+    gt_labels = demo_inputs['gt_labels']
+
+    # parrots 3dconv is only implemented on gpu
+    if torch.__version__ == 'parrots':
+        if torch.cuda.is_available():
+            recognizer = recognizer.cuda()
+            imgs = imgs.cuda()
+            gt_labels = gt_labels.cuda()
+            losses = recognizer(imgs, gt_labels)
+            assert isinstance(losses, dict)
+
+            # Test forward test
+            with torch.no_grad():
+                img_list = [img[None, :] for img in imgs]
+                for one_img in img_list:
+                    recognizer(one_img, None, return_loss=False)
+    else:
+        losses = recognizer(imgs, gt_labels)
+        assert isinstance(losses, dict)
+
+        # Test forward test
+        with torch.no_grad():
+            img_list = [img[None, :] for img in imgs]
+            for one_img in img_list:
+                recognizer(one_img, None, return_loss=False)
+
+
 def generate_demo_inputs(input_shape=(1, 3, 3, 224, 224), model_type='2D'):
     """Create a superset of inputs needed to run test or train batches.