-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
083ea33
commit c4a6b39
Showing
8 changed files
with
515 additions
and
13 deletions.
There are no files selected for viewing
123 changes: 123 additions & 0 deletions
123
configs/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
# model settings | ||
model = dict( | ||
type='Recognizer3D', | ||
backbone=dict( | ||
type='ResNet3dCSN', | ||
pretrained2d=False, | ||
pretrained= # noqa: E251 | ||
'https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/csn/ircsn_from_scratch_r152_ig65m_20200807-771c4135.pth', # noqa: E501 | ||
depth=152, | ||
with_pool2=False, | ||
bottleneck_mode='ir', | ||
norm_eval=True, | ||
bn_frozen=True, | ||
zero_init_residual=False), | ||
cls_head=dict( | ||
type='I3DHead', | ||
num_classes=400, | ||
in_channels=2048, | ||
spatial_type='avg', | ||
dropout_ratio=0.5, | ||
init_std=0.01)) | ||
# model training and testing settings | ||
train_cfg = None | ||
test_cfg = dict(average_clips=None) | ||
# dataset settings | ||
dataset_type = 'RawframeDataset' | ||
data_root = 'data/kinetics400/rawframes_train' | ||
data_root_val = 'data/kinetics400/rawframes_val' | ||
ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt' | ||
ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' | ||
ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' | ||
img_norm_cfg = dict( | ||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) | ||
train_pipeline = [ | ||
dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1), | ||
dict(type='FrameSelector'), | ||
dict(type='Resize', scale=(-1, 256)), | ||
dict(type='RandomResizedCrop'), | ||
dict(type='Resize', scale=(224, 224), keep_ratio=False), | ||
dict(type='Flip', flip_ratio=0.5), | ||
dict(type='Normalize', **img_norm_cfg), | ||
dict(type='FormatShape', input_format='NCTHW'), | ||
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), | ||
dict(type='ToTensor', keys=['imgs', 'label']) | ||
] | ||
val_pipeline = [ | ||
dict( | ||
type='SampleFrames', | ||
clip_len=32, | ||
frame_interval=2, | ||
num_clips=1, | ||
test_mode=True), | ||
dict(type='FrameSelector'), | ||
dict(type='Resize', scale=(-1, 256)), | ||
dict(type='CenterCrop', crop_size=224), | ||
dict(type='Flip', flip_ratio=0), | ||
dict(type='Normalize', **img_norm_cfg), | ||
dict(type='FormatShape', input_format='NCTHW'), | ||
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), | ||
dict(type='ToTensor', keys=['imgs']) | ||
] | ||
test_pipeline = [ | ||
dict( | ||
type='SampleFrames', | ||
clip_len=32, | ||
frame_interval=2, | ||
num_clips=10, | ||
test_mode=True), | ||
dict(type='FrameSelector'), | ||
dict(type='Resize', scale=(-1, 256)), | ||
dict(type='ThreeCrop', crop_size=256), | ||
dict(type='Flip', flip_ratio=0), | ||
dict(type='Normalize', **img_norm_cfg), | ||
dict(type='FormatShape', input_format='NCTHW'), | ||
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), | ||
dict(type='ToTensor', keys=['imgs']) | ||
] | ||
data = dict( | ||
videos_per_gpu=3, | ||
workers_per_gpu=4, | ||
train=dict( | ||
type=dataset_type, | ||
ann_file=ann_file_train, | ||
data_prefix=data_root, | ||
pipeline=train_pipeline), | ||
val=dict( | ||
type=dataset_type, | ||
ann_file=ann_file_val, | ||
data_prefix=data_root_val, | ||
pipeline=val_pipeline), | ||
test=dict( | ||
type=dataset_type, | ||
ann_file=ann_file_val, | ||
data_prefix=data_root_val, | ||
pipeline=test_pipeline)) | ||
# optimizer | ||
optimizer = dict( | ||
type='SGD', lr=0.0005, momentum=0.9, weight_decay=0.0001) # 0.0005 for 32g | ||
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) | ||
# learning policy | ||
lr_config = dict( | ||
policy='step', | ||
step=[32, 48], | ||
warmup='linear', | ||
warmup_ratio=0.1, | ||
warmup_by_epoch=True, | ||
warmup_iters=16) | ||
total_epochs = 58 | ||
checkpoint_config = dict(interval=2) | ||
evaluation = dict( | ||
interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'], topk=(1, 5)) | ||
log_config = dict( | ||
interval=20, | ||
hooks=[dict(type='TextLoggerHook'), | ||
dict(type='TensorboardLoggerHook')]) | ||
# runtime settings | ||
dist_params = dict(backend='nccl') | ||
log_level = 'INFO' | ||
work_dir = './work_dirs/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb' # noqa: E501 | ||
load_from = None | ||
resume_from = None | ||
workflow = [('train', 1)] | ||
find_unused_parameters = True |
121 changes: 121 additions & 0 deletions
121
configs/recognition/csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
# model settings | ||
model = dict( | ||
type='Recognizer3D', | ||
backbone=dict( | ||
type='ResNet3dCSN', | ||
pretrained2d=False, | ||
pretrained= # noqa: E251 | ||
'https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/csn/ircsn_from_scratch_r152_ig65m_20200807-771c4135.pth', # noqa: E501 | ||
depth=152, | ||
with_pool2=False, | ||
bottleneck_mode='ir', | ||
norm_eval=False, | ||
zero_init_residual=False), | ||
cls_head=dict( | ||
type='I3DHead', | ||
num_classes=400, | ||
in_channels=2048, | ||
spatial_type='avg', | ||
dropout_ratio=0.5, | ||
init_std=0.01)) | ||
# model training and testing settings | ||
train_cfg = None | ||
test_cfg = dict(average_clips=None) | ||
# dataset settings | ||
dataset_type = 'RawframeDataset' | ||
data_root = 'data/kinetics400/rawframes_train' | ||
data_root_val = 'data/kinetics400/rawframes_val' | ||
ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt' | ||
ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' | ||
ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' | ||
img_norm_cfg = dict( | ||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) | ||
train_pipeline = [ | ||
dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1), | ||
dict(type='FrameSelector'), | ||
dict(type='Resize', scale=(-1, 256)), | ||
dict(type='RandomResizedCrop'), | ||
dict(type='Resize', scale=(224, 224), keep_ratio=False), | ||
dict(type='Flip', flip_ratio=0.5), | ||
dict(type='Normalize', **img_norm_cfg), | ||
dict(type='FormatShape', input_format='NCTHW'), | ||
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), | ||
dict(type='ToTensor', keys=['imgs', 'label']) | ||
] | ||
val_pipeline = [ | ||
dict( | ||
type='SampleFrames', | ||
clip_len=32, | ||
frame_interval=2, | ||
num_clips=1, | ||
test_mode=True), | ||
dict(type='FrameSelector'), | ||
dict(type='Resize', scale=(-1, 256)), | ||
dict(type='CenterCrop', crop_size=224), | ||
dict(type='Flip', flip_ratio=0), | ||
dict(type='Normalize', **img_norm_cfg), | ||
dict(type='FormatShape', input_format='NCTHW'), | ||
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), | ||
dict(type='ToTensor', keys=['imgs']) | ||
] | ||
test_pipeline = [ | ||
dict( | ||
type='SampleFrames', | ||
clip_len=32, | ||
frame_interval=2, | ||
num_clips=10, | ||
test_mode=True), | ||
dict(type='FrameSelector'), | ||
dict(type='Resize', scale=(-1, 256)), | ||
dict(type='ThreeCrop', crop_size=256), | ||
dict(type='Flip', flip_ratio=0), | ||
dict(type='Normalize', **img_norm_cfg), | ||
dict(type='FormatShape', input_format='NCTHW'), | ||
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), | ||
dict(type='ToTensor', keys=['imgs']) | ||
] | ||
data = dict( | ||
videos_per_gpu=3, | ||
workers_per_gpu=4, | ||
train=dict( | ||
type=dataset_type, | ||
ann_file=ann_file_train, | ||
data_prefix=data_root, | ||
pipeline=train_pipeline), | ||
val=dict( | ||
type=dataset_type, | ||
ann_file=ann_file_val, | ||
data_prefix=data_root_val, | ||
pipeline=val_pipeline), | ||
test=dict( | ||
type=dataset_type, | ||
ann_file=ann_file_val, | ||
data_prefix=data_root_val, | ||
pipeline=test_pipeline)) | ||
# optimizer | ||
optimizer = dict( | ||
type='SGD', lr=0.0005, momentum=0.9, weight_decay=0.0001) # 0.0005 for 32g | ||
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) | ||
# learning policy | ||
lr_config = dict( | ||
policy='step', | ||
step=[32, 48], | ||
warmup='linear', | ||
warmup_ratio=0.1, | ||
warmup_by_epoch=True, | ||
warmup_iters=16) | ||
total_epochs = 58 | ||
checkpoint_config = dict(interval=2) | ||
evaluation = dict( | ||
interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'], topk=(1, 5)) | ||
log_config = dict( | ||
interval=20, | ||
hooks=[dict(type='TextLoggerHook'), | ||
dict(type='TensorboardLoggerHook')]) | ||
# runtime settings | ||
dist_params = dict(backend='nccl') | ||
log_level = 'INFO' | ||
work_dir = './work_dirs/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb' | ||
load_from = None | ||
resume_from = None | ||
workflow = [('train', 1)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,12 @@ | ||
from .resnet import ResNet | ||
from .resnet2plus1d import ResNet2Plus1d | ||
from .resnet3d import ResNet3d | ||
from .resnet3d_csn import ResNet3dCSN | ||
from .resnet3d_slowfast import ResNet3dSlowFast | ||
from .resnet3d_slowonly import ResNet3dSlowOnly | ||
from .resnet_tsm import ResNetTSM | ||
|
||
__all__ = [ | ||
'ResNet', 'ResNet3d', 'ResNetTSM', 'ResNet2Plus1d', 'ResNet3dSlowFast', | ||
'ResNet3dSlowOnly' | ||
'ResNet3dSlowOnly', 'ResNet3dCSN' | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.