From 139c1e0f62e8212d6695c7a55cb8f0ebbcc47637 Mon Sep 17 00:00:00 2001 From: xusu Date: Sun, 7 Feb 2021 15:37:46 +0800 Subject: [PATCH 1/7] Add configs and some stats. --- configs/recognition/slowfast/README.md | 5 +- ...owfast_r101_4x16x1_256e_kinetics400_rgb.py | 136 +++++++++++++++++ ...lowfast_r101_8x8x1_256e_kinetics400_rgb.py | 137 ++++++++++++++++++ ...owfast_r152_4x16x1_256e_kinetics400_rgb.py | 136 +++++++++++++++++ ...lowfast_r152_8x8x1_256e_kinetics400_rgb.py | 137 ++++++++++++++++++ 5 files changed, 550 insertions(+), 1 deletion(-) create mode 100644 configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py create mode 100644 configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py create mode 100644 configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py create mode 100644 configs/recognition/slowfast/slowfast_r152_8x8x1_256e_kinetics400_rgb.py diff --git a/configs/recognition/slowfast/README.md b/configs/recognition/slowfast/README.md index 1ed8080863..3ac61de23d 100644 --- a/configs/recognition/slowfast/README.md +++ b/configs/recognition/slowfast/README.md @@ -25,7 +25,10 @@ |[slowfast_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py) |short-side 320|8x3| ResNet50|None |75.64|92.3|1.6 ((32+4)x10x3 frames)|6203|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/slowfast_r50_4x16x1_256e_kinetics400_rgb_20200704-bcde7ed7.pth)| [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/20200704_232901.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/20200704_232901.log.json)| |[slowfast_r50_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py) |short-side 256|8x4| ResNet50 |None |75.61|92.34|x|9062|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb_20200810-863812c2.pth)|[log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/20200731_151537.log)|[json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/20200731_151537.log.json)| |[slowfast_r50_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py) |short-side 320|8x3| ResNet50 |None|76.94|92.8|1.3 ((32+8)x10x3 frames)|9062| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/20200716_192653.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/20200716_192653.log.json)| - +|[slowfast_r101_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 |None||||| [ckpt]() | [log]()| [json]()| +|[slowfast_r101_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 |None||||| [ckpt]() | [log]()| [json]()| +|[slowfast_r152_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 |None||||| [ckpt]() | [log]()| [json]()| +|[slowfast_r152_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 |None||||| [ckpt]() | [log]()| [json]()| Notes: 1. The **gpus** indicates the number of gpu we used to get the checkpoint. It is noteworthy that the configs we provide are used for 8 gpus as default. diff --git a/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py new file mode 100644 index 0000000000..ef70bbaffe --- /dev/null +++ b/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py @@ -0,0 +1,136 @@ +model = dict( + type='Recognizer3D', + backbone=dict( + type='ResNet3dSlowFast', + pretrained=None, + resample_rate=8, # tau + speed_ratio=8, # alpha + channel_ratio=8, # beta_inv + slow_pathway=dict( + type='resnet3d', + depth=101, + pretrained=None, + lateral=True, + conv1_kernel=(1, 7, 7), + dilations=(1, 1, 1, 1), + conv1_stride_t=1, + pool1_stride_t=1, + inflate=(0, 0, 1, 1), + norm_eval=False), + fast_pathway=dict( + type='resnet3d', + depth=50, + pretrained=None, + lateral=False, + base_channels=8, + conv1_kernel=(5, 7, 7), + conv1_stride_t=1, + pool1_stride_t=1, + norm_eval=False)), + cls_head=dict( + type='SlowFastHead', + in_channels=2304, # 2048+256 + num_classes=400, + spatial_type='avg', + dropout_ratio=0.5)) +train_cfg = None +test_cfg = dict(average_clips='prob') +dataset_type = 'RawframeDataset' +data_root = 'data/kinetics400/rawframes_train' +data_root_val = 'data/kinetics400/rawframes_val' +ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt' +ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=32, + frame_interval=2, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=32, + frame_interval=2, + num_clips=10, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=8, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.1, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +lr_config = dict( + policy='CosineAnnealing', + min_lr=0, + warmup='linear', + warmup_by_epoch=True, + warmup_iters=34) +total_epochs = 256 +checkpoint_config = dict(interval=4) +workflow = [('train', 1)] +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) +log_config = dict( + interval=20, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook'), + ]) +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/slowfast_r101_3d_4x16x1_256e_kinetics400_rgb' +load_from = None +resume_from = None +find_unused_parameters = False diff --git a/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py new file mode 100644 index 0000000000..add27189ee --- /dev/null +++ b/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py @@ -0,0 +1,137 @@ +model = dict( + type='Recognizer3D', + backbone=dict( + type='ResNet3dSlowFast', + pretrained=None, + resample_rate=4, # tau + speed_ratio=4, # alpha + channel_ratio=8, # beta_inv + slow_pathway=dict( + type='resnet3d', + depth=101, + pretrained=None, + lateral=True, + fusion_kernel=7, + conv1_kernel=(1, 7, 7), + dilations=(1, 1, 1, 1), + conv1_stride_t=1, + pool1_stride_t=1, + inflate=(0, 0, 1, 1), + norm_eval=False), + fast_pathway=dict( + type='resnet3d', + depth=50, + pretrained=None, + lateral=False, + base_channels=8, + conv1_kernel=(5, 7, 7), + conv1_stride_t=1, + pool1_stride_t=1, + norm_eval=False)), + cls_head=dict( + type='SlowFastHead', + in_channels=2304, # 2048+256 + num_classes=400, + spatial_type='avg', + dropout_ratio=0.5)) +train_cfg = None +test_cfg = dict(average_clips='prob') +dataset_type = 'RawframeDataset' +data_root = 'data/kinetics400/rawframes_train' +data_root_val = 'data/kinetics400/rawframes_val' +ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt' +ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=32, + frame_interval=2, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=32, + frame_interval=2, + num_clips=10, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=8, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.1, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +lr_config = dict( + policy='CosineAnnealing', + min_lr=0, + warmup='linear', + warmup_by_epoch=True, + warmup_iters=34) +total_epochs = 256 +checkpoint_config = dict(interval=4) +workflow = [('train', 1)] +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) +log_config = dict( + interval=20, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook'), + ]) +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/slowfast_r101_3d_8x8x1_256e_kinetics400_rgb' +load_from = None +resume_from = None +find_unused_parameters = False diff --git a/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py new file mode 100644 index 0000000000..9e9167c2a6 --- /dev/null +++ b/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py @@ -0,0 +1,136 @@ +model = dict( + type='Recognizer3D', + backbone=dict( + type='ResNet3dSlowFast', + pretrained=None, + resample_rate=8, # tau + speed_ratio=8, # alpha + channel_ratio=8, # beta_inv + slow_pathway=dict( + type='resnet3d', + depth=152, + pretrained=None, + lateral=True, + conv1_kernel=(1, 7, 7), + dilations=(1, 1, 1, 1), + conv1_stride_t=1, + pool1_stride_t=1, + inflate=(0, 0, 1, 1), + norm_eval=False), + fast_pathway=dict( + type='resnet3d', + depth=50, + pretrained=None, + lateral=False, + base_channels=8, + conv1_kernel=(5, 7, 7), + conv1_stride_t=1, + pool1_stride_t=1, + norm_eval=False)), + cls_head=dict( + type='SlowFastHead', + in_channels=2304, # 2048+256 + num_classes=400, + spatial_type='avg', + dropout_ratio=0.5)) +train_cfg = None +test_cfg = dict(average_clips='prob', max_testing_views=8) +dataset_type = 'RawframeDataset' +data_root = 'data/kinetics400/rawframes_train' +data_root_val = 'data/kinetics400/rawframes_val' +ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt' +ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=32, + frame_interval=2, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=32, + frame_interval=2, + num_clips=10, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=8, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.1, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +lr_config = dict( + policy='CosineAnnealing', + min_lr=0, + warmup='linear', + warmup_by_epoch=True, + warmup_iters=34) +total_epochs = 256 +checkpoint_config = dict(interval=4) +workflow = [('train', 1)] +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) +log_config = dict( + interval=20, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook'), + ]) +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/slowfast_r152_3d_4x16x1_256e_kinetics400_rgb' +load_from = None +resume_from = None +find_unused_parameters = False diff --git a/configs/recognition/slowfast/slowfast_r152_8x8x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r152_8x8x1_256e_kinetics400_rgb.py new file mode 100644 index 0000000000..5e60b6a3e2 --- /dev/null +++ b/configs/recognition/slowfast/slowfast_r152_8x8x1_256e_kinetics400_rgb.py @@ -0,0 +1,137 @@ +model = dict( + type='Recognizer3D', + backbone=dict( + type='ResNet3dSlowFast', + pretrained=None, + resample_rate=4, # tau + speed_ratio=4, # alpha + channel_ratio=8, # beta_inv + slow_pathway=dict( + type='resnet3d', + depth=152, + pretrained=None, + lateral=True, + fusion_kernel=7, + conv1_kernel=(1, 7, 7), + dilations=(1, 1, 1, 1), + conv1_stride_t=1, + pool1_stride_t=1, + inflate=(0, 0, 1, 1), + norm_eval=False), + fast_pathway=dict( + type='resnet3d', + depth=50, + pretrained=None, + lateral=False, + base_channels=8, + conv1_kernel=(5, 7, 7), + conv1_stride_t=1, + pool1_stride_t=1, + norm_eval=False)), + cls_head=dict( + type='SlowFastHead', + in_channels=2304, # 2048+256 + num_classes=400, + spatial_type='avg', + dropout_ratio=0.5)) +train_cfg = None +test_cfg = dict(average_clips='prob') +dataset_type = 'RawframeDataset' +data_root = 'data/kinetics400/rawframes_train' +data_root_val = 'data/kinetics400/rawframes_val' +ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt' +ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=32, + frame_interval=2, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=32, + frame_interval=2, + num_clips=10, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=8, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.1, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +lr_config = dict( + policy='CosineAnnealing', + min_lr=0, + warmup='linear', + warmup_by_epoch=True, + warmup_iters=34) +total_epochs = 256 +checkpoint_config = dict(interval=4) +workflow = [('train', 1)] +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) +log_config = dict( + interval=20, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook'), + ]) +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/slowfast_r152_3d_8x8x1_256e_kinetics400_rgb' +load_from = None +resume_from = None +find_unused_parameters = False From 52a6e51bd1d4ecdb295f3dc31aca86b7e38413ea Mon Sep 17 00:00:00 2001 From: xusu Date: Sun, 7 Feb 2021 18:58:44 +0800 Subject: [PATCH 2/7] Update. --- configs/recognition/slowfast/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/recognition/slowfast/README.md b/configs/recognition/slowfast/README.md index 3ac61de23d..3197eb74f7 100644 --- a/configs/recognition/slowfast/README.md +++ b/configs/recognition/slowfast/README.md @@ -25,9 +25,9 @@ |[slowfast_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py) |short-side 320|8x3| ResNet50|None |75.64|92.3|1.6 ((32+4)x10x3 frames)|6203|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/slowfast_r50_4x16x1_256e_kinetics400_rgb_20200704-bcde7ed7.pth)| [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/20200704_232901.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/20200704_232901.log.json)| |[slowfast_r50_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py) |short-side 256|8x4| ResNet50 |None |75.61|92.34|x|9062|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb_20200810-863812c2.pth)|[log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/20200731_151537.log)|[json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/20200731_151537.log.json)| |[slowfast_r50_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py) |short-side 320|8x3| ResNet50 |None|76.94|92.8|1.3 ((32+8)x10x3 frames)|9062| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/20200716_192653.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/20200716_192653.log.json)| -|[slowfast_r101_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 |None||||| [ckpt]() | [log]()| [json]()| +|[slowfast_r101_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 |None|76.69|93.07||| [ckpt]() | [log]()| [json]()| |[slowfast_r101_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 |None||||| [ckpt]() | [log]()| [json]()| -|[slowfast_r152_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 |None||||| [ckpt]() | [log]()| [json]()| +|[slowfast_r152_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 |None|77.13|93.20||| [ckpt]() | [log]()| [json]()| |[slowfast_r152_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 |None||||| [ckpt]() | [log]()| [json]()| Notes: From cb7676d10e35f9c31493ec8e5f578f97beaac0fe Mon Sep 17 00:00:00 2001 From: xusu Date: Tue, 23 Feb 2021 16:34:36 +0800 Subject: [PATCH 3/7] Update r101 8x8. --- configs/recognition/slowfast/README.md | 7 +- ...lowfast_r101_8x8x1_256e_kinetics400_rgb.py | 2 +- ...t_r101_r50_4x16x1_256e_kinetics400_rgb.py} | 0 ...lowfast_r152_8x8x1_256e_kinetics400_rgb.py | 137 ------------------ ...t_r152_r50_4x16x1_256e_kinetics400_rgb.py} | 0 5 files changed, 4 insertions(+), 142 deletions(-) rename configs/recognition/slowfast/{slowfast_r101_4x16x1_256e_kinetics400_rgb.py => slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py} (100%) delete mode 100644 configs/recognition/slowfast/slowfast_r152_8x8x1_256e_kinetics400_rgb.py rename configs/recognition/slowfast/{slowfast_r152_4x16x1_256e_kinetics400_rgb.py => slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py} (100%) diff --git a/configs/recognition/slowfast/README.md b/configs/recognition/slowfast/README.md index 3197eb74f7..ef87a26ec3 100644 --- a/configs/recognition/slowfast/README.md +++ b/configs/recognition/slowfast/README.md @@ -25,10 +25,9 @@ |[slowfast_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py) |short-side 320|8x3| ResNet50|None |75.64|92.3|1.6 ((32+4)x10x3 frames)|6203|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/slowfast_r50_4x16x1_256e_kinetics400_rgb_20200704-bcde7ed7.pth)| [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/20200704_232901.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/20200704_232901.log.json)| |[slowfast_r50_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py) |short-side 256|8x4| ResNet50 |None |75.61|92.34|x|9062|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb_20200810-863812c2.pth)|[log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/20200731_151537.log)|[json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/20200731_151537.log.json)| |[slowfast_r50_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py) |short-side 320|8x3| ResNet50 |None|76.94|92.8|1.3 ((32+8)x10x3 frames)|9062| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/20200716_192653.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/20200716_192653.log.json)| -|[slowfast_r101_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 |None|76.69|93.07||| [ckpt]() | [log]()| [json]()| -|[slowfast_r101_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 |None||||| [ckpt]() | [log]()| [json]()| -|[slowfast_r152_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 |None|77.13|93.20||| [ckpt]() | [log]()| [json]()| -|[slowfast_r152_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 |None||||| [ckpt]() | [log]()| [json]()| +|[slowfast_r101_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 + ResNet50 |None|76.69|93.07||16628| [ckpt]() | [log]()| [json]()| +|[slowfast_r101_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x4| ResNet101 |None|77.90|93.51||25994| [ckpt]() | [log]()| [json]()| +|[slowfast_r152_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 + ResNet50 |None|77.13|93.20||10077| [ckpt]() | [log]()| [json]()| Notes: 1. The **gpus** indicates the number of gpu we used to get the checkpoint. It is noteworthy that the configs we provide are used for 8 gpus as default. diff --git a/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py index add27189ee..e69afafd9e 100644 --- a/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py @@ -20,7 +20,7 @@ norm_eval=False), fast_pathway=dict( type='resnet3d', - depth=50, + depth=101, pretrained=None, lateral=False, base_channels=8, diff --git a/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py similarity index 100% rename from configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py rename to configs/recognition/slowfast/slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py diff --git a/configs/recognition/slowfast/slowfast_r152_8x8x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r152_8x8x1_256e_kinetics400_rgb.py deleted file mode 100644 index 5e60b6a3e2..0000000000 --- a/configs/recognition/slowfast/slowfast_r152_8x8x1_256e_kinetics400_rgb.py +++ /dev/null @@ -1,137 +0,0 @@ -model = dict( - type='Recognizer3D', - backbone=dict( - type='ResNet3dSlowFast', - pretrained=None, - resample_rate=4, # tau - speed_ratio=4, # alpha - channel_ratio=8, # beta_inv - slow_pathway=dict( - type='resnet3d', - depth=152, - pretrained=None, - lateral=True, - fusion_kernel=7, - conv1_kernel=(1, 7, 7), - dilations=(1, 1, 1, 1), - conv1_stride_t=1, - pool1_stride_t=1, - inflate=(0, 0, 1, 1), - norm_eval=False), - fast_pathway=dict( - type='resnet3d', - depth=50, - pretrained=None, - lateral=False, - base_channels=8, - conv1_kernel=(5, 7, 7), - conv1_stride_t=1, - pool1_stride_t=1, - norm_eval=False)), - cls_head=dict( - type='SlowFastHead', - in_channels=2304, # 2048+256 - num_classes=400, - spatial_type='avg', - dropout_ratio=0.5)) -train_cfg = None -test_cfg = dict(average_clips='prob') -dataset_type = 'RawframeDataset' -data_root = 'data/kinetics400/rawframes_train' -data_root_val = 'data/kinetics400/rawframes_val' -ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt' -ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' -ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) -train_pipeline = [ - dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='RandomResizedCrop'), - dict(type='Resize', scale=(224, 224), keep_ratio=False), - dict(type='Flip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs', 'label']) -] -val_pipeline = [ - dict( - type='SampleFrames', - clip_len=32, - frame_interval=2, - num_clips=1, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -test_pipeline = [ - dict( - type='SampleFrames', - clip_len=32, - frame_interval=2, - num_clips=10, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='ThreeCrop', crop_size=256), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -data = dict( - videos_per_gpu=8, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=ann_file_train, - data_prefix=data_root, - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=val_pipeline), - test=dict( - type=dataset_type, - ann_file=ann_file_test, - data_prefix=data_root_val, - pipeline=test_pipeline)) -# optimizer -optimizer = dict( - type='SGD', lr=0.1, momentum=0.9, - weight_decay=0.0001) # this lr is used for 8 gpus -optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) -# learning policy -lr_config = dict( - policy='CosineAnnealing', - min_lr=0, - warmup='linear', - warmup_by_epoch=True, - warmup_iters=34) -total_epochs = 256 -checkpoint_config = dict(interval=4) -workflow = [('train', 1)] -evaluation = dict( - interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) -log_config = dict( - interval=20, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook'), - ]) -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/slowfast_r152_3d_8x8x1_256e_kinetics400_rgb' -load_from = None -resume_from = None -find_unused_parameters = False diff --git a/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py similarity index 100% rename from configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py rename to configs/recognition/slowfast/slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py From 073ea67c03297457a3922556021f5b7335e20427 Mon Sep 17 00:00:00 2001 From: xusu Date: Wed, 24 Feb 2021 14:21:45 +0800 Subject: [PATCH 4/7] Minor. --- configs/recognition/slowfast/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/configs/recognition/slowfast/README.md b/configs/recognition/slowfast/README.md index ef87a26ec3..62f18869f3 100644 --- a/configs/recognition/slowfast/README.md +++ b/configs/recognition/slowfast/README.md @@ -28,6 +28,7 @@ |[slowfast_r101_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 + ResNet50 |None|76.69|93.07||16628| [ckpt]() | [log]()| [json]()| |[slowfast_r101_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x4| ResNet101 |None|77.90|93.51||25994| [ckpt]() | [log]()| [json]()| |[slowfast_r152_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 + ResNet50 |None|77.13|93.20||10077| [ckpt]() | [log]()| [json]()| + Notes: 1. The **gpus** indicates the number of gpu we used to get the checkpoint. It is noteworthy that the configs we provide are used for 8 gpus as default. From d7c170f314dd7d06312e342da5a04c9b69b3b867 Mon Sep 17 00:00:00 2001 From: xusu Date: Thu, 25 Feb 2021 12:00:36 +0800 Subject: [PATCH 5/7] Update url. --- configs/recognition/slowfast/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/recognition/slowfast/README.md b/configs/recognition/slowfast/README.md index 62f18869f3..54a81a03f3 100644 --- a/configs/recognition/slowfast/README.md +++ b/configs/recognition/slowfast/README.md @@ -25,9 +25,9 @@ |[slowfast_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py) |short-side 320|8x3| ResNet50|None |75.64|92.3|1.6 ((32+4)x10x3 frames)|6203|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/slowfast_r50_4x16x1_256e_kinetics400_rgb_20200704-bcde7ed7.pth)| [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/20200704_232901.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/20200704_232901.log.json)| |[slowfast_r50_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py) |short-side 256|8x4| ResNet50 |None |75.61|92.34|x|9062|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb_20200810-863812c2.pth)|[log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/20200731_151537.log)|[json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/20200731_151537.log.json)| |[slowfast_r50_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py) |short-side 320|8x3| ResNet50 |None|76.94|92.8|1.3 ((32+8)x10x3 frames)|9062| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/20200716_192653.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/20200716_192653.log.json)| -|[slowfast_r101_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 + ResNet50 |None|76.69|93.07||16628| [ckpt]() | [log]()| [json]()| -|[slowfast_r101_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x4| ResNet101 |None|77.90|93.51||25994| [ckpt]() | [log]()| [json]()| -|[slowfast_r152_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 + ResNet50 |None|77.13|93.20||10077| [ckpt]() | [log]()| [json]()| +|[slowfast_r101_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 + ResNet50 |None|76.69|93.07||16628| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb/slowfast_r101_4x16x1_256e_kinetics400_rgb_20210218-d8b58813.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb/20210118_133528.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb/20210118_133528.log.json)| +|[slowfast_r101_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x4| ResNet101 |None|77.90|93.51||25994| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb/slowfast_r101_8x8x1_256e_kinetics400_rgb_20210218-0dd54025.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb/20210218_121513.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb/20210218_121513.log.json)| +|[slowfast_r152_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 + ResNet50 |None|77.13|93.20||10077| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb/slowfast_r152_4x16x1_256e_kinetics400_rgb_20210122-bdeb6b87.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb/20210122_131321.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb/20210122_131321.log.json)| Notes: From fa439aeed43f69a5738e9b59d26137bee2d411d9 Mon Sep 17 00:00:00 2001 From: xusu Date: Thu, 25 Feb 2021 12:03:01 +0800 Subject: [PATCH 6/7] Update changelog. --- docs/changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changelog.md b/docs/changelog.md index 899a5e6455..a594718e52 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -27,6 +27,7 @@ **ModelZoo** - Add TSM-MobileNetV2 for Kinetics400 ([#415](https://github.com/open-mmlab/mmaction2/pull/415)) +- Add deeper SlowFast models ([#605](https://github.com/open-mmlab/mmaction2/pull/605)) ### 0.11.0 (31/01/2021) From 27f5eb310e6fcf31d47aacb90019562079c856f1 Mon Sep 17 00:00:00 2001 From: xusu Date: Thu, 25 Feb 2021 18:51:16 +0800 Subject: [PATCH 7/7] Fix typos and workdir. --- configs/recognition/slowfast/README.md | 6 +++--- .../slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py | 2 +- .../slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py | 2 +- .../slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/configs/recognition/slowfast/README.md b/configs/recognition/slowfast/README.md index 54a81a03f3..ba00f13269 100644 --- a/configs/recognition/slowfast/README.md +++ b/configs/recognition/slowfast/README.md @@ -25,9 +25,9 @@ |[slowfast_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py) |short-side 320|8x3| ResNet50|None |75.64|92.3|1.6 ((32+4)x10x3 frames)|6203|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/slowfast_r50_4x16x1_256e_kinetics400_rgb_20200704-bcde7ed7.pth)| [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/20200704_232901.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/20200704_232901.log.json)| |[slowfast_r50_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py) |short-side 256|8x4| ResNet50 |None |75.61|92.34|x|9062|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb_20200810-863812c2.pth)|[log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/20200731_151537.log)|[json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/20200731_151537.log.json)| |[slowfast_r50_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py) |short-side 320|8x3| ResNet50 |None|76.94|92.8|1.3 ((32+8)x10x3 frames)|9062| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/20200716_192653.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/20200716_192653.log.json)| -|[slowfast_r101_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 + ResNet50 |None|76.69|93.07||16628| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb/slowfast_r101_4x16x1_256e_kinetics400_rgb_20210218-d8b58813.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb/20210118_133528.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb/20210118_133528.log.json)| -|[slowfast_r101_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x4| ResNet101 |None|77.90|93.51||25994| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb/slowfast_r101_8x8x1_256e_kinetics400_rgb_20210218-0dd54025.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb/20210218_121513.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb/20210218_121513.log.json)| -|[slowfast_r152_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 + ResNet50 |None|77.13|93.20||10077| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb/slowfast_r152_4x16x1_256e_kinetics400_rgb_20210122-bdeb6b87.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb/20210122_131321.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb/20210122_131321.log.json)| +|[slowfast_r101_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet101 + ResNet50 |None|76.69|93.07||16628| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb/slowfast_r101_4x16x1_256e_kinetics400_rgb_20210218-d8b58813.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb/20210118_133528.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb/20210118_133528.log.json)| +|[slowfast_r101_8x8x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py) |short-side 256|8x4| ResNet101 |None|77.90|93.51||25994| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb/slowfast_r101_8x8x1_256e_kinetics400_rgb_20210218-0dd54025.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb/20210218_121513.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb/20210218_121513.log.json)| +|[slowfast_r152_r50_4x16x1_256e_kinetics400_rgb](/configs/recognition/slowfast/slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py) |short-side 256|8x1| ResNet152 + ResNet50 |None|77.13|93.20||10077| [ckpt](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb/slowfast_r152_4x16x1_256e_kinetics400_rgb_20210122-bdeb6b87.pth) | [log](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb/20210122_131321.log)| [json](https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb/20210122_131321.log.json)| Notes: diff --git a/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py index e69afafd9e..79259e419c 100644 --- a/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py @@ -131,7 +131,7 @@ ]) dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/slowfast_r101_3d_8x8x1_256e_kinetics400_rgb' +work_dir = './work_dirs/slowfast_r101_8x8x1_256e_kinetics400_rgb' load_from = None resume_from = None find_unused_parameters = False diff --git a/configs/recognition/slowfast/slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py index ef70bbaffe..a74dc94f26 100644 --- a/configs/recognition/slowfast/slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowfast/slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py @@ -130,7 +130,7 @@ ]) dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/slowfast_r101_3d_4x16x1_256e_kinetics400_rgb' +work_dir = './work_dirs/slowfast_r101_r50_4x16x1_256e_kinetics400_rgb' load_from = None resume_from = None find_unused_parameters = False diff --git a/configs/recognition/slowfast/slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py index 9e9167c2a6..a581bae143 100644 --- a/configs/recognition/slowfast/slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowfast/slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py @@ -130,7 +130,7 @@ ]) dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/slowfast_r152_3d_4x16x1_256e_kinetics400_rgb' +work_dir = './work_dirs/slowfast_r152_r50_4x16x1_256e_kinetics400_rgb' load_from = None resume_from = None find_unused_parameters = False