diff --git a/configs/detection/_base_/models/slowonly_r50.py b/configs/detection/_base_/models/slowonly_r50.py
index b21d631ed5..9db201bdb9 100644
--- a/configs/detection/_base_/models/slowonly_r50.py
+++ b/configs/detection/_base_/models/slowonly_r50.py
@@ -4,7 +4,10 @@
backbone=dict(
type='ResNet3dSlowOnly',
depth=50,
- pretrained=None,
+ pretrained=(
+ 'https://download.openmmlab.com/mmaction/recognition/slowonly/'
+ 'slowonly_r50_4x16x1_256e_kinetics400_rgb/'
+ 'slowonly_r50_4x16x1_256e_kinetics400_rgb_20200704-a69556c6.pth'),
pretrained2d=False,
lateral=False,
num_stages=4,
diff --git a/configs/detection/acrn/README.md b/configs/detection/acrn/README.md
index 9ff2156507..a1016ba3d9 100644
--- a/configs/detection/acrn/README.md
+++ b/configs/detection/acrn/README.md
@@ -20,25 +20,23 @@ Current state-of-the-art approaches for spatio-temporal action localization rely
### AVA2.1
-| Model | Modality | Pretrained | Backbone | Input | gpus | mAP | log | ckpt |
-| :-------------------------------------------------------------------------------: | :------: | :----------: | :------: | :---: | :--: | :---: | :------------------------------------: | :-------------------------------------: |
-| [slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava_rgb](/configs/detection/acrn/slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | 27.58 | [log](https://download.openmmlab.com/) | [ckpt](https://download.openmmlab.com/) |
+| frame sampling strategy | resolution | gpus | backbone | pretrain | mAP | gpu_mem(M) | config | ckpt | log |
+| :---------------------: | :--------: | :--: | :---------------: | :----------: | :---: | :--------: | :---------------------------------------: | :-------------------------------------: | :-------------------------------------: |
+| 8x8x1 | raw | 8 | SlowFast ResNet50 | Kinetics-400 | 27.58 | 15263 | [config](/configs/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb_20220906-0dae1a90.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb.log) |
### AVA2.2
-| Model | Modality | Pretrained | Backbone | Input | gpus | mAP | log | ckpt |
-| :-------------------------------------------------------------------------------: | :------: | :----------: | :------: | :---: | :--: | :---: | :------------------------------------: | :-------------------------------------: |
-| [slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava22_rgb](/configs/detection/acrn/slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava22_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | 27.63 | [log](https://download.openmmlab.com/) | [ckpt](https://download.openmmlab.com/) |
+| frame sampling strategy | resolution | gpus | backbone | pretrain | mAP | gpu_mem(M) | config | ckpt | log |
+| :---------------------: | :--------: | :--: | :---------------: | :----------: | :---: | :--------: | :---------------------------------------: | :-------------------------------------: | :-------------------------------------: |
+| 8x8x1 | raw | 8 | SlowFast ResNet50 | Kinetics-400 | 27.63 | 15263 | [config](/configs/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb_20220906-0dae1a90.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb.log) |
-:::{note}
+Note:
1. The **gpus** indicates the number of gpu we used to get the checkpoint.
According to the [Linear Scaling Rule](https://arxiv.org/abs/1706.02677), you may set the learning rate proportional to the batch size if you use different GPUs or videos per GPU,
e.g., lr=0.01 for 4 GPUs x 2 video/gpu and lr=0.08 for 16 GPUs x 4 video/gpu.
-:::
-
-For more details on data preparation, you can refer to AVA in [Data Preparation](/docs/data_preparation.md).
+For more details on data preparation, you can refer to to [AVA Data Preparation](/tools/data/ava/README.md).
## Train
@@ -51,11 +49,11 @@ python tools/train.py ${CONFIG_FILE} [optional arguments]
Example: train ACRN with SlowFast backbone on AVA in a deterministic option.
```shell
-python tools/train.py configs/detection/acrn/slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava_rgb.py \
+python tools/train.py configs/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb.py \
--cfg-options randomness.seed=0 randomness.deterministic=True
```
-For more details and optional arguments infos, you can refer to **Training setting** part in [getting_started](/docs/getting_started.md#training-setting).
+For more details and optional arguments infos, you can refer to the **Training** part in the [Training and Test Tutorial](/docs/en/user_guides/4_train_test.md).
## Test
@@ -65,13 +63,14 @@ You can use the following command to test a model.
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
-Example: test ACRN with SlowFast backbone.
+Example: test ACRN with SlowFast backbone on AVA and dump the result to a pkl file.
```shell
-python tools/test.py configs/detection/acrn/slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava_rgb.py checkpoints/SOME_CHECKPOINT.pth
+python tools/test.py configs/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb.py \
+ checkpoints/SOME_CHECKPOINT.pth --dump result.pkl
```
-For more details and optional arguments infos, you can refer to **Test a dataset** part in [getting_started](/docs/getting_started.md#test-a-dataset) .
+For more details and optional arguments infos, you can refer to the **Test** part in the [Training and Test Tutorial](/docs/en/user_guides/4_train_test.md).
## Citation
diff --git a/configs/detection/acrn/README_zh-CN.md b/configs/detection/acrn/README_zh-CN.md
deleted file mode 100644
index a8b8794ea1..0000000000
--- a/configs/detection/acrn/README_zh-CN.md
+++ /dev/null
@@ -1,81 +0,0 @@
-# ACRN
-
-## 简介
-
-
-
-```BibTeX
-@inproceedings{gu2018ava,
- title={Ava: A video dataset of spatio-temporally localized atomic visual actions},
- author={Gu, Chunhui and Sun, Chen and Ross, David A and Vondrick, Carl and Pantofaru, Caroline and Li, Yeqing and Vijayanarasimhan, Sudheendra and Toderici, George and Ricco, Susanna and Sukthankar, Rahul and others},
- booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
- pages={6047--6056},
- year={2018}
-}
-```
-
-
-
-```BibTeX
-@inproceedings{sun2018actor,
- title={Actor-centric relation network},
- author={Sun, Chen and Shrivastava, Abhinav and Vondrick, Carl and Murphy, Kevin and Sukthankar, Rahul and Schmid, Cordelia},
- booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
- pages={318--334},
- year={2018}
-}
-```
-
-## 模型库
-
-### AVA2.1
-
-| 配置文件 | 模态 | 预训练 | 主干网络 | 输入 | GPU 数量 | mAP | log | json | ckpt |
-| :--------------------------------------: | :--: | :----------: | :------: | :--: | :------: | :--: | :-------------------------------------: | :--------------------------------------: | :---------------------------------------: |
-| [slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava_rgb](/configs/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | 27.1 | [log](https://download.openmmlab.com/mmaction/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava_rgb/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava_rgb.log) | [json](https://download.openmmlab.com/mmaction/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava_rgb/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava_rgb.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava_rgb/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava_rgb-49b07bf2.pth) |
-
-### AVA2.2
-
-| 配置文件 | 模态 | 预训练 | 主干网络 | 输入 | GPU 数量 | mAP | log | json | ckpt |
-| :--------------------------------------: | :--: | :----------: | :------: | :--: | :------: | :--: | :-------------------------------------: | :--------------------------------------: | :---------------------------------------: |
-| [slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb](/configs/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | 27.8 | [log](https://download.openmmlab.com/mmaction/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.log) | [json](https://download.openmmlab.com/mmaction/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb-2be32625.pth) |
-
-- 注:
-
-1. 这里的 **GPU 数量** 指的是得到模型权重文件对应的 GPU 个数。默认地,MMAction2 所提供的配置文件对应使用 8 块 GPU 进行训练的情况。
- 依据 [线性缩放规则](https://arxiv.org/abs/1706.02677),当用户使用不同数量的 GPU 或者每块 GPU 处理不同视频个数时,需要根据批大小等比例地调节学习率。
- 如,lr=0.01 对应 4 GPUs x 2 video/gpu,以及 lr=0.08 对应 16 GPUs x 4 video/gpu。
-
-对于数据集准备的细节,用户可参考 [数据准备](/docs_zh_CN/data_preparation.md)。
-
-## 如何训练
-
-用户可以使用以下指令进行模型训练。
-
-```shell
-python tools/train.py ${CONFIG_FILE} [optional arguments]
-```
-
-例如:在 AVA 数据集上训练 ACRN 辅以 SlowFast 主干网络,并定期验证。
-
-```shell
-python tools/train.py configs/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py --validate
-```
-
-更多训练细节,可参考 [基础教程](/docs_zh_CN/getting_started.md#训练配置) 中的 **训练配置** 部分。
-
-## 如何测试
-
-用户可以使用以下指令进行模型测试。
-
-```shell
-python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
-```
-
-例如:在 AVA 上测试 ACRN 辅以 SlowFast 主干网络,并将结果存为 csv 文件。
-
-```shell
-python tools/test.py configs/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py checkpoints/SOME_CHECKPOINT.pth --eval mAP --out results.csv
-```
-
-更多测试细节,可参考 [基础教程](/docs_zh_CN/getting_started.md#测试某个数据集) 中的 **测试某个数据集** 部分。
diff --git a/configs/detection/acrn/metafile.yml b/configs/detection/acrn/metafile.yml
index 27c4d2d761..969c8fdbf8 100644
--- a/configs/detection/acrn/metafile.yml
+++ b/configs/detection/acrn/metafile.yml
@@ -6,8 +6,8 @@ Collections:
Title: "Actor-Centric Relation Network"
Models:
- - Name: slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava_rgb
- Config: configs/detection/ava/slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava_rgb.py
+ - Name: slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb
+ Config: configs/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb.py
In Collection: ACRN
Metadata:
Architecture: ResNet50
@@ -23,9 +23,11 @@ Models:
Task: Action Detection
Metrics:
mAP: 27.58
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb_20220906-0dae1a90.pth
- - Name: slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava22_rgb
- Config: configs/detection/ava/slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava22_rgb.py
+ - Name: slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb
+ Config: configs/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb.py
In Collection: ACRN
Metadata:
Architecture: ResNet50
@@ -41,3 +43,5 @@ Models:
Task: Action Detection
Metrics:
mAP: 27.63
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb_20220906-0dae1a90.pth
diff --git a/configs/detection/acrn/slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava_rgb.py b/configs/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb.py
similarity index 94%
rename from configs/detection/acrn/slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava_rgb.py
rename to configs/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb.py
index 547a719c9c..641364bcce 100644
--- a/configs/detection/acrn/slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava_rgb.py
+++ b/configs/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb.py
@@ -7,7 +7,10 @@
_delete_=True,
type='ResNet3dSlowFast',
_scope_='mmaction',
- pretrained=None,
+ pretrained=(
+ 'https://download.openmmlab.com/mmaction/recognition/slowfast/'
+ 'slowfast_r50_8x8x1_256e_kinetics400_rgb/'
+ 'slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth'),
resample_rate=4,
speed_ratio=4,
channel_ratio=8,
@@ -134,7 +137,3 @@
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.00001),
clip_grad=dict(max_norm=40, norm_type=2))
-
-load_from = ('https://download.openmmlab.com/mmaction/recognition/slowfast/'
- 'slowfast_r50_8x8x1_256e_kinetics400_rgb/'
- 'slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth')
diff --git a/configs/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb.py b/configs/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb.py
new file mode 100644
index 0000000000..02992c654a
--- /dev/null
+++ b/configs/detection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb.py
@@ -0,0 +1,73 @@
+_base_ = [('slowfast-acrn_kinetics400-pretrained-r50'
+ '_8xb8-8x8x1-cosine-10e_ava21-rgb.py')]
+
+dataset_type = 'AVADataset'
+data_root = 'data/ava/rawframes'
+anno_root = 'data/ava/annotations'
+
+ann_file_train = f'{anno_root}/ava_train_v2.2.csv'
+ann_file_val = f'{anno_root}/ava_val_v2.2.csv'
+
+exclude_file_train = f'{anno_root}/ava_train_excluded_timestamps_v2.2.csv'
+exclude_file_val = f'{anno_root}/ava_val_excluded_timestamps_v2.2.csv'
+
+label_file = f'{anno_root}/ava_action_list_v2.2_for_activitynet_2019.pbtxt'
+
+proposal_file_train = (f'{anno_root}/ava_dense_proposals_train.FAIR.'
+ 'recall_93.9.pkl')
+proposal_file_val = f'{anno_root}/ava_dense_proposals_val.FAIR.recall_93.9.pkl'
+
+train_pipeline = [
+ dict(type='SampleAVAFrames', clip_len=32, frame_interval=2),
+ dict(type='RawFrameDecode'),
+ dict(type='RandomRescale', scale_range=(256, 320)),
+ dict(type='RandomCrop', size=256),
+ dict(type='Flip', flip_ratio=0.5),
+ dict(type='FormatShape', input_format='NCTHW', collapse=True),
+ dict(type='PackActionInputs')
+]
+# The testing is w/o. any cropping / flipping
+val_pipeline = [
+ dict(
+ type='SampleAVAFrames', clip_len=32, frame_interval=2, test_mode=True),
+ dict(type='RawFrameDecode'),
+ dict(type='Resize', scale=(-1, 256)),
+ dict(type='FormatShape', input_format='NCTHW', collapse=True),
+ dict(type='PackActionInputs')
+]
+
+train_dataloader = dict(
+ batch_size=8,
+ num_workers=8,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ ann_file=ann_file_train,
+ exclude_file=exclude_file_train,
+ pipeline=train_pipeline,
+ label_file=label_file,
+ proposal_file=proposal_file_train,
+ data_prefix=dict(img=data_root)))
+val_dataloader = dict(
+ batch_size=1,
+ num_workers=8,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=False),
+ dataset=dict(
+ type=dataset_type,
+ ann_file=ann_file_val,
+ exclude_file=exclude_file_val,
+ pipeline=val_pipeline,
+ label_file=label_file,
+ proposal_file=proposal_file_val,
+ data_prefix=dict(img=data_root),
+ test_mode=True))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='AVAMetric',
+ ann_file=ann_file_val,
+ label_file=label_file,
+ exclude_file=exclude_file_val)
+test_evaluator = val_evaluator
diff --git a/configs/detection/acrn/slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava22_rgb.py b/configs/detection/acrn/slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava22_rgb.py
deleted file mode 100644
index 7c1424c73e..0000000000
--- a/configs/detection/acrn/slowfast_acrn_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb8_ava22_rgb.py
+++ /dev/null
@@ -1,140 +0,0 @@
-_base_ = [
- '../../_base_/default_runtime.py', '../_base_/models/slowonly_r50.py'
-]
-
-model = dict(
- backbone=dict(
- _delete_=True,
- type='ResNet3dSlowFast',
- _scope_='mmaction',
- pretrained=None,
- resample_rate=4,
- speed_ratio=4,
- channel_ratio=8,
- slow_pathway=dict(
- type='resnet3d',
- depth=50,
- pretrained=None,
- lateral=True,
- fusion_kernel=7,
- conv1_kernel=(1, 7, 7),
- dilations=(1, 1, 1, 1),
- conv1_stride_t=1,
- pool1_stride_t=1,
- inflate=(0, 0, 1, 1),
- spatial_strides=(1, 2, 2, 1)),
- fast_pathway=dict(
- type='resnet3d',
- depth=50,
- pretrained=None,
- lateral=False,
- base_channels=8,
- conv1_kernel=(5, 7, 7),
- conv1_stride_t=1,
- pool1_stride_t=1,
- spatial_strides=(1, 2, 2, 1))),
- roi_head=dict(
- shared_head=dict(type='ACRNHead', in_channels=4608, out_channels=2304),
- bbox_head=dict(in_channels=2304)))
-
-dataset_type = 'AVADataset'
-data_root = 'data/ava/rawframes'
-anno_root = 'data/ava/annotations'
-
-ann_file_train = f'{anno_root}/ava_train_v2.2.csv'
-ann_file_val = f'{anno_root}/ava_val_v2.2.csv'
-
-exclude_file_train = f'{anno_root}/ava_train_excluded_timestamps_v2.2.csv'
-exclude_file_val = f'{anno_root}/ava_val_excluded_timestamps_v2.2.csv'
-
-label_file = f'{anno_root}/ava_action_list_v2.2_for_activitynet_2019.pbtxt'
-
-proposal_file_train = (f'{anno_root}/ava_dense_proposals_train.FAIR.'
- 'recall_93.9.pkl')
-proposal_file_val = f'{anno_root}/ava_dense_proposals_val.FAIR.recall_93.9.pkl'
-
-train_pipeline = [
- dict(type='SampleAVAFrames', clip_len=32, frame_interval=2),
- dict(type='RawFrameDecode'),
- dict(type='RandomRescale', scale_range=(256, 320)),
- dict(type='RandomCrop', size=256),
- dict(type='Flip', flip_ratio=0.5),
- dict(type='FormatShape', input_format='NCTHW', collapse=True),
- dict(type='PackActionInputs')
-]
-# The testing is w/o. any cropping / flipping
-val_pipeline = [
- dict(
- type='SampleAVAFrames', clip_len=32, frame_interval=2, test_mode=True),
- dict(type='RawFrameDecode'),
- dict(type='Resize', scale=(-1, 256)),
- dict(type='FormatShape', input_format='NCTHW', collapse=True),
- dict(type='PackActionInputs')
-]
-
-train_dataloader = dict(
- batch_size=8,
- num_workers=8,
- persistent_workers=True,
- sampler=dict(type='DefaultSampler', shuffle=True),
- dataset=dict(
- type=dataset_type,
- ann_file=ann_file_train,
- exclude_file=exclude_file_train,
- pipeline=train_pipeline,
- label_file=label_file,
- proposal_file=proposal_file_train,
- data_prefix=dict(img=data_root)))
-val_dataloader = dict(
- batch_size=1,
- num_workers=8,
- persistent_workers=True,
- sampler=dict(type='DefaultSampler', shuffle=False),
- dataset=dict(
- type=dataset_type,
- ann_file=ann_file_val,
- exclude_file=exclude_file_val,
- pipeline=val_pipeline,
- label_file=label_file,
- proposal_file=proposal_file_val,
- data_prefix=dict(img=data_root),
- test_mode=True))
-test_dataloader = val_dataloader
-
-val_evaluator = dict(
- type='AVAMetric',
- ann_file=ann_file_val,
- label_file=label_file,
- exclude_file=exclude_file_val)
-test_evaluator = val_evaluator
-
-train_cfg = dict(
- type='EpochBasedTrainLoop', max_epochs=10, val_begin=1, val_interval=1)
-val_cfg = dict(type='ValLoop')
-test_cfg = dict(type='TestLoop')
-
-param_scheduler = [
- dict(
- type='LinearLR',
- start_factor=0.1,
- by_epoch=True,
- begin=0,
- end=2,
- convert_to_iter_based=True),
- dict(
- type='CosineAnnealingLR',
- T_max=8,
- eta_min=0,
- by_epoch=True,
- begin=2,
- end=10,
- convert_to_iter_based=True)
-]
-
-optim_wrapper = dict(
- optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.00001),
- clip_grad=dict(max_norm=40, norm_type=2))
-
-load_from = ('https://download.openmmlab.com/mmaction/recognition/slowfast/'
- 'slowfast_r50_8x8x1_256e_kinetics400_rgb/'
- 'slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth')
diff --git a/configs/detection/ava/README.md b/configs/detection/ava/README.md
index 7d6122649e..21088251da 100644
--- a/configs/detection/ava/README.md
+++ b/configs/detection/ava/README.md
@@ -37,34 +37,35 @@ AVA, with its realistic scene and action complexity, exposes the intrinsic diffi
### AVA2.1
-| Model | Modality | Pretrained | Backbone | Input | gpus | Resolution | mAP | log | ckpt |
-| :----------------------------------------------------------------: | :------: | :----------: | :-------: | :---: | :--: | :------------: | :---: | :------------------------------------: | :-------------------------------------: |
-| [slowonly_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb](/configs/detection/ava/slowonly_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 4x16 | 8 | short-side 256 | 20.76 | [log](https://download.openmmlab.com/) | [ckpt](https://download.openmmlab.com/) |
-| [slowonly_nl_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb](/configs/detection/ava/slowonly_nl_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 4x16 | 8 | short-side 256 | 21.49 | [log](https://download.openmmlab.com/) | [ckpt](https://download.openmmlab.com/) |
-| [slowonly_nl_kinetics400_pretrained_r50_8x8x1_20e_8xb16_ava_rgb](/configs/detection/ava/slowonly_nl_kinetics400_pretrained_r50_8x8x1_20e_8xb16_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 8x8 | 8 | short-side 256 | 23.74 | [log](https://download.openmmlab.com/) | [ckpt](https://download.openmmlab.com/) |
-| [slowonly_kinetics400_pretrained_r101_8x8x1_20e_8xb16_ava_rgb](/configs/detection/ava/slowonly_kinetics400_pretrained_r101_8x8x1_20e_8xb16_ava_rgb.py) | RGB | Kinetics-400 | ResNet101 | 8x8 | 8 | short-side 256 | 24.82 | [log](https://download.openmmlab.com/) | [ckpt](https://download.openmmlab.com/) |
-| [slowfast_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb](/configs/detection/ava/slowfast_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | short-side 256 | 24.27 | [log](https://download.openmmlab.com/) | [ckpt](https://download.openmmlab.com/) |
-| [slowfast_context_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb](/configs/detection/ava/slowfast_context_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | short-side 256 | 25.25 | [log](https://download.openmmlab.com/) | [ckpt](https://download.openmmlab.com/) |
-| [slowfast_kinetics400_pretrained_r50_8x8x1_20e_8xb8_ava_rgb](/configs/detection/ava/slowfast_kinetics400_pretrained_r50_8x8x1_20e_8xb8_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | short-side 256 | 25.73 | [log](https://download.openmmlab.com/) | [ckpt](https://download.openmmlab.com/) |
+| frame sampling strategy | resolution | gpus | backbone | pretrain | mAP | gpu_mem(M) | config | ckpt | log |
+| :---------------------: | :--------: | :--: | :----------------------------------: | :----------: | :---: | :--------: | :---------------------------------: | :-------------------------------: | :------------------------------: |
+| 4x16x1 | raw | 8 | SlowOnly ResNet50 | Kinetics-400 | 20.76 | 8503 | [config](/configs/detection/ava/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb_20220906-953ef5fe.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.log) |
+| 4x16x1 | raw | 8 | SlowOnly ResNet50 | Kinetics-700 | 22.77 | 8503 | [config](/configs/detection/ava/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb_20220906-b3b6d44e.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.log) |
+| 4x16x1 | raw | 8 | SlowOnly ResNet50 (NonLocalEmbedGauss) | Kinetics-400 | 21.49 | 11870 | [config](/configs/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb_20220906-5ae3f91b.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb.log) |
+| 8x8x1 | raw | 8 | SlowOnly ResNet50 (NonLocalEmbedGauss) | Kinetics-400 | 23.74 | 25375 | [config](/configs/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb_20220906-9760eadb.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb.log) |
+| 8x8x1 | raw | 8 | SlowOnly ResNet101 | Kinetics-400 | 24.82 | 23477 | [config](/configs/detection/ava/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb_20220906-43f16877.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb.log) |
+| 4x16x1 | raw | 8 | SlowFast ResNet50 | Kinetics-400 | 24.27 | 18616 | [config](/configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb_20220906-5180ea3c.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb.log) |
+| 4x16x1 | raw | 8 | SlowFast ResNet50 (with context) | Kinetics-400 | 25.25 | 18616 | [config](/configs/detection/ava/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb_20220906-5bb4f6f2.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb.log) |
+| 8x8x1 | raw | 8 | SlowFast ResNet50 | Kinetics-400 | 25.73 | 13802 | [config](/configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb_20220906-39133ec7.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb.log) |
### AVA2.2
-| Model | Modality | Pretrained | Backbone | Input | gpus | mAP | log | ckpt |
-| :-------------------------------------------------------------------------------: | :------: | :----------: | :------: | :---: | :--: | :---: | :------------------------------------: | :-------------------------------------: |
-| [slowfast_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb](/configs/detection/ava/slowfast_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | 25.98 | [log](https://download.openmmlab.com/) | [ckpt](https://download.openmmlab.com/) |
-| [slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb](/configs/detection/ava/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | 26.38 | [log](https://download.openmmlab.com/) | [ckpt](https://download.openmmlab.com/) |
-| [slowfast_temporal_max_focal_alpha3_gamma1_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb](/configs/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | 26.59 | [log](https://download.openmmlab.com/) | [ckpt](https://download.openmmlab.com/) |
+| frame sampling strategy | resolution | gpus | backbone | pretrain | mAP | gpu_mem(M) | config | ckpt | log |
+| :---------------------: | :--------: | :--: | :----------------------------------: | :----------: | :---: | :--------: | :---------------------------------: | :-------------------------------: | :------------------------------: |
+| 8x8x1 | raw | 8 | SlowFast ResNet50 | Kinetics-400 | 25.82 | 10484 | [config](/configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb_20220906-d934a48f.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb.log) |
+| 8x8x1 | raw | 8 | SlowFast ResNet50 (temporal-max) | Kinetics-400 | 26.32 | 10484 | [config](/configs/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb_20220906-13a9078e.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb.log) |
+| 8x8x1 | raw | 8 | SlowFast ResNet50 (temporal-max, focal loss) | Kinetics-400 | 26.58 | 10484 | [config](/configs/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb_20220906-dd59e26f.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb.log) |
-:::{note}
+Note:
1. The **gpus** indicates the number of gpu we used to get the checkpoint.
According to the [Linear Scaling Rule](https://arxiv.org/abs/1706.02677), you may set the learning rate proportional to the batch size if you use different GPUs or videos per GPU,
e.g., lr=0.01 for 4 GPUs x 2 video/gpu and lr=0.08 for 16 GPUs x 4 video/gpu.
-2. **Context** indicates that using both RoI feature and global pooled feature for classification, which leads to around 1% mAP improvement in general.
+2. **With context** indicates that using both RoI feature and global pooled feature for classification, which leads to around 1% mAP improvement in general.
:::
-For more details on data preparation, you can refer to AVA in [Data Preparation](/docs/data_preparation.md).
+For more details on data preparation, you can refer to [AVA Data Preparation](/tools/data/ava/README.md).
## Train
@@ -77,11 +78,11 @@ python tools/train.py ${CONFIG_FILE} [optional arguments]
Example: train the SlowOnly model on AVA in a deterministic option.
```shell
-python tools/train.py configs/detection/ava/slowonly_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py \
+python tools/train.py configs/detection/ava/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py \
--cfg-options randomness.seed=0 randomness.deterministic=True
```
-For more details and optional arguments infos, you can refer to **Training setting** part in [getting_started](/docs/getting_started.md#training-setting) .
+For more details, you can refer to the **Training** part in the [Training and Test Tutorial](/docs/en/user_guides/4_train_test.md).
## Test
@@ -91,13 +92,14 @@ You can use the following command to test a model.
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
-Example: test the SlowOnly model on AVA.
+Example: test the SlowOnly model on AVA and dump the result to a pkl file.
```shell
-python tools/test.py configs/detection/ava/slowonly_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py checkpoints/SOME_CHECKPOINT.pth
+python tools/test.py configs/detection/ava/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py \
+ checkpoints/SOME_CHECKPOINT.pth --dump result.pkl
```
-For more details and optional arguments infos, you can refer to **Test a dataset** part in [getting_started](/docs/getting_started.md#test-a-dataset) .
+For more details, you can refer to the **Test** part in the [Training and Test Tutorial](/docs/en/user_guides/4_train_test.md).
## Citation
diff --git a/configs/detection/ava/README_zh-CN.md b/configs/detection/ava/README_zh-CN.md
deleted file mode 100644
index 5569fb6911..0000000000
--- a/configs/detection/ava/README_zh-CN.md
+++ /dev/null
@@ -1,129 +0,0 @@
-# AVA
-
-
-
-
-
-## 简介
-
-
-
-```BibTeX
-@inproceedings{gu2018ava,
- title={Ava: A video dataset of spatio-temporally localized atomic visual actions},
- author={Gu, Chunhui and Sun, Chen and Ross, David A and Vondrick, Carl and Pantofaru, Caroline and Li, Yeqing and Vijayanarasimhan, Sudheendra and Toderici, George and Ricco, Susanna and Sukthankar, Rahul and others},
- booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
- pages={6047--6056},
- year={2018}
-}
-```
-
-
-
-```BibTeX
-@article{duan2020omni,
- title={Omni-sourced Webly-supervised Learning for Video Recognition},
- author={Duan, Haodong and Zhao, Yue and Xiong, Yuanjun and Liu, Wentao and Lin, Dahua},
- journal={arXiv preprint arXiv:2003.13042},
- year={2020}
-}
-```
-
-
-
-```BibTeX
-@inproceedings{feichtenhofer2019slowfast,
- title={Slowfast networks for video recognition},
- author={Feichtenhofer, Christoph and Fan, Haoqi and Malik, Jitendra and He, Kaiming},
- booktitle={Proceedings of the IEEE international conference on computer vision},
- pages={6202--6211},
- year={2019}
-}
-```
-
-## 模型库
-
-### AVA2.1
-
-| 配置文件 | 模态 | 预训练 | 主干网络 | 输入 | GPU 数量 | 分辨率 | mAP | log | json | ckpt |
-| :-----------------------------------: | :--: | :----------: | :-------: | :--: | :------: | :------: | :---: | :-----------------------------------: | :------------------------------------: | :------------------------------------: |
-| [slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb](/configs/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 4x16 | 8 | 短边 256 | 20.1 | [log](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201127.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201127.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201217-40061d5f.pth) |
-| [slowonly_omnisource_pretrained_r50_4x16x1_20e_ava_rgb](/configs/detection/ava/slowonly_omnisource_pretrained_r50_4x16x1_20e_ava_rgb.py) | RGB | OmniSource | ResNet50 | 4x16 | 8 | 短边 256 | 21.8 | [log](https://download.openmmlab.com/mmaction/detection/ava/slowonly_omnisource_pretrained_r50_4x16x1_20e_ava_rgb/slowonly_omnisource_pretrained_r50_4x16x1_20e_ava_rgb_20201127.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowonly_omnisource_pretrained_r50_4x16x1_20e_ava_rgb/slowonly_omnisource_pretrained_r50_4x16x1_20e_ava_rgb_20201127.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowonly_omnisource_pretrained_r50_4x16x1_20e_ava_rgb/slowonly_omnisource_pretrained_r50_4x16x1_20e_ava_rgb_20201217-0c6d2e98.pth) |
-| [slowonly_nl_kinetics_pretrained_r50_4x16x1_10e_ava_rgb](/configs/detection/ava/slowonly_nl_kinetics_pretrained_r50_4x16x1_10e_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 4x16 | 8 | 短边 256 | 21.75 | [log](https://download.openmmlab.com/mmaction/detection/ava/slowonly_nl_kinetics_pretrained_r50_4x16x1_10e_ava_rgb/20210316_122517.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowonly_nl_kinetics_pretrained_r50_4x16x1_10e_ava_rgb/20210316_122517.log.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowonly_nl_kinetics_pretrained_r50_4x16x1_10e_ava_rgb/slowonly_nl_kinetics_pretrained_r50_4x16x1_10e_ava_rgb_20210316-959829ec.pth) |
-| [slowonly_nl_kinetics_pretrained_r50_8x8x1_10e_ava_rgb](/configs/detection/ava/slowonly_nl_kinetics_pretrained_r50_8x8x1_10e_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 8x8 | 8x2 | 短边 256 | 23.79 | [log](https://download.openmmlab.com/mmaction/detection/ava/slowonly_nl_kinetics_pretrained_r50_8x8x1_10e_ava_rgb/20210316_122517.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowonly_nl_kinetics_pretrained_r50_8x8x1_10e_ava_rgb/20210316_122517.log.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowonly_nl_kinetics_pretrained_r50_8x8x1_10e_ava_rgb/slowonly_nl_kinetics_pretrained_r50_8x8x1_10e_ava_rgb_20210316-5742e4dd.pth) |
-| [slowonly_kinetics_pretrained_r101_8x8x1_20e_ava_rgb](/configs/detection/ava/slowonly_kinetics_pretrained_r101_8x8x1_20e_ava_rgb.py) | RGB | Kinetics-400 | ResNet101 | 8x8 | 8x2 | 短边 256 | 24.6 | [log](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r101_8x8x1_20e_ava_rgb/slowonly_kinetics_pretrained_r101_8x8x1_20e_ava_rgb_20201127.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r101_8x8x1_20e_ava_rgb/slowonly_kinetics_pretrained_r101_8x8x1_20e_ava_rgb_20201127.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r101_8x8x1_20e_ava_rgb/slowonly_kinetics_pretrained_r101_8x8x1_20e_ava_rgb_20201217-1c9b4117.pth) |
-| [slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb](/configs/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb.py) | RGB | OmniSource | ResNet101 | 8x8 | 8x2 | 短边 256 | 25.9 | [log](https://download.openmmlab.com/mmaction/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb_20201127.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb_20201127.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb_20201217-16378594.pth) |
-| [slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb](/configs/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8x2 | 短边 256 | 24.4 | [log](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201217.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201217.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201217-6e7c704d.pth) |
-| [slowfast_context_kinetics_pretrained_r50_4x16x1_20e_ava_rgb](/configs/detection/ava/slowfast_context_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8x2 | 短边 256 | 25.4 | [log](https://download.openmmlab.com/mmaction/detection/ava/slowfast_context_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowfast_context_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201222.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowfast_context_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowfast_context_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201222.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowfast_context_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowfast_context_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201222-f4d209c9.pth) |
-| [slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb](/configs/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8x2 | 短边 256 | 25.5 | [log](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb/slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb_20201217.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb/slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb_20201217.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb/slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb_20201217-ae225e97.pth) |
-
-### AVA2.2
-
-| 配置文件 | 模态 | 预训练 | 主干网络 | 输入 | GPU 数量 | mAP | log | json | ckpt |
-| :--------------------------------------: | :--: | :----------: | :------: | :--: | :------: | :--: | :-------------------------------------: | :--------------------------------------: | :---------------------------------------: |
-| [slowfast_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb](/configs/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | 26.1 | [log](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb/slowfast_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb/slowfast_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb/slowfast_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb-b987b516.pth) |
-| [slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb](/configs/detection/ava/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | 26.4 | [log](https://download.openmmlab.com/mmaction/detection/ava/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb-874e0845.pth) |
-| [slowfast_temporal_max_focal_alpha3_gamma1_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb](/configs/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py) | RGB | Kinetics-400 | ResNet50 | 32x2 | 8 | 26.8 | [log](https://download.openmmlab.com/mmaction/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb/slowfast_temporal_max_focal_alpha3_gamma1_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb/slowfast_temporal_max_focal_alpha3_gamma1_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb/slowfast_temporal_max_focal_alpha3_gamma1_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb-345618cd.pth) |
-
-注:
-
-1. 这里的 **GPU 数量** 指的是得到模型权重文件对应的 GPU 个数。默认地,MMAction2 所提供的配置文件对应使用 8 块 GPU 进行训练的情况。
- 依据 [线性缩放规则](https://arxiv.org/abs/1706.02677),当用户使用不同数量的 GPU 或者每块 GPU 处理不同视频个数时,需要根据批大小等比例地调节学习率。
- 如,lr=0.01 对应 4 GPUs x 2 video/gpu,以及 lr=0.08 对应 16 GPUs x 4 video/gpu。
-2. **Context** 表示同时使用 RoI 特征与全局特征进行分类,可带来约 1% mAP 的提升。
-
-对于数据集准备的细节,用户可参考 [数据准备](/docs_zh_CN/data_preparation.md)。
-
-## 如何训练
-
-用户可以使用以下指令进行模型训练。
-
-```shell
-python tools/train.py ${CONFIG_FILE} [optional arguments]
-```
-
-例如:在 AVA 数据集上训练 SlowOnly,并定期验证。
-
-```shell
-python tools/train.py configs/detection/ava/slowonly_kinetics_pretrained_r50_8x8x1_20e_ava_rgb.py --validate
-```
-
-更多训练细节,可参考 [基础教程](/docs_zh_CN/getting_started.md#训练配置) 中的 **训练配置** 部分。
-
-### 训练 AVA 数据集中的自定义类别
-
-用户可以训练 AVA 数据集中的自定义类别。AVA 中不同类别的样本量很不平衡:其中有超过 100000 样本的类别: `stand`/`listen to (a person)`/`talk to (e.g., self, a person, a group)`/`watch (a person)`,也有样本较少的类别(半数类别不足 500 样本)。大多数情况下,仅使用样本较少的类别进行训练将在这些类别上得到更好精度。
-
-训练 AVA 数据集中的自定义类别包含 3 个步骤:
-
-1. 从原先的类别中选择希望训练的类别,将其填写至配置文件的 `custom_classes` 域中。其中 `0` 不表示具体的动作类别,不应被选择。
-2. 将 `num_classes` 设置为 `num_classes = len(custom_classes) + 1`。
- - 在新的类别到编号的对应中,编号 `0` 仍对应原类别 `0`,编号 `i` (i > 0) 对应原类别 `custom_classes[i-1]`。
- - 配置文件中 3 处涉及 `num_classes` 需要修改:`model -> roi_head -> bbox_head -> num_classes`, `data -> train -> num_classes`, `data -> val -> num_classes`.
- - 若 `num_classes <= 5`, 配置文件 `BBoxHeadAVA` 中的 `topk` 参数应被修改。`topk` 的默认值为 `(3, 5)`,`topk` 中的所有元素应小于 `num_classes`。
-3. 确认所有自定义类别在 `label_file` 中。
-
-以 `slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb` 为例,这一配置文件训练所有 AP 在 `(0.1, 0.3)` 间的类别(这里的 AP 为 AVA 80 类训出模型的表现),即 `[3, 6, 10, 27, 29, 38, 41, 48, 51, 53, 54, 59, 61, 64, 70, 72]`。下表列出了自定义类别训练的模型精度:
-
-| 训练类别 | mAP (自定义类别) | 配置文件 | log | json | ckpt |
-| :--------: | :----------------: | :-----------------------------------------: | :----------------------------------------: | :-----------------------------------------: | :------------------------------------------: |
-| 全部 80 类 | 0.1948 | [slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb](/configs/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py) | [log](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201127.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201127.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201217-40061d5f.pth) |
-| 自定义类别 | 0.3311 | [slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes](/configs/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes.py) | [log](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes-4ab80419.pth) |
-| 全部 80 类 | 0.1864 | [slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb](/configs/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py) | [log](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201217.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201217.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201217-6e7c704d.pth) |
-| 自定义类别 | 0.3785 | [slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes](/configs/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes.py) | [log](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes_20210305.log) | [json](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes_20210305.json) | [ckpt](https://download.openmmlab.com/mmaction/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes_20210305-c6225546.pth) |
-
-## 如何测试
-
-用户可以使用以下指令进行模型测试。
-
-```shell
-python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
-```
-
-例如:在 AVA 上测试 SlowOnly 模型,并将结果存为 csv 文件。
-
-```shell
-python tools/test.py configs/detection/ava/slowonly_kinetics_pretrained_r50_8x8x1_20e_ava_rgb.py checkpoints/SOME_CHECKPOINT.pth --eval mAP --out results.csv
-```
-
-更多测试细节,可参考 [基础教程](/docs_zh_CN/getting_started.md#测试某个数据集) 中的 **测试某个数据集** 部分。
diff --git a/configs/detection/ava/metafile.yml b/configs/detection/ava/metafile.yml
index e989a8b5ea..0f5e365ab9 100644
--- a/configs/detection/ava/metafile.yml
+++ b/configs/detection/ava/metafile.yml
@@ -6,8 +6,8 @@ Collections:
Title: "AVA: A Video Dataset of Spatio-temporally Localized Atomic Visual Actions"
Models:
- - Name: slowonly_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb
- Config: configs/detection/ava/slowonly_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py
+ - Name: slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb
+ Config: configs/detection/ava/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
In Collection: AVA
Metadata:
Architecture: ResNet50
@@ -23,9 +23,31 @@ Models:
Task: Action Detection
Metrics:
mAP: 20.76
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb_20220906-953ef5fe.pth
- - Name: slowonly_nl_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb
- Config: configs/detection/ava/slowonly_nl_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py
+ - Name: slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb
+ Config: configs/detection/ava/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
+ In Collection: AVA
+ Metadata:
+ Architecture: ResNet50
+ Batch Size: 16
+ Epochs: 20
+ Pretrained: Kinetics-700
+ Resolution: short-side 320
+ Training Data: AVA v2.1
+ Training Resources: 8 GPUs
+ Modality: RGB
+ Results:
+ - Dataset: AVA v2.1
+ Task: Action Detection
+ Metrics:
+ mAP: 22.77
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb_20220906-b3b6d44e.pth
+
+ - Name: slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb
+ Config: configs/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb.py
In Collection: AVA
Metadata:
Architecture: ResNet50
@@ -41,9 +63,11 @@ Models:
Task: Action Detection
Metrics:
mAP: 21.49
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb_20220906-5ae3f91b.pth
- - Name: slowonly_nl_kinetics400_pretrained_r50_8x8x1_20e_8xb16_ava_rgb
- Config: configs/detection/ava/slowonly_nl_kinetics400_pretrained_r50_8x8x1_20e_8xb16_ava_rgb.py
+ - Name: slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb
+ Config: configs/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb.py
In Collection: AVA
Metadata:
Architecture: ResNet50
@@ -59,9 +83,11 @@ Models:
Task: Action Detection
Metrics:
mAP: 23.47
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb_20220906-9760eadb.pth
- - Name: slowonly_kinetics400_pretrained_r101_8x8x1_20e_8xb16_ava_rgb
- Config: configs/detection/ava/slowonly_kinetics400_pretrained_r101_8x8x1_20e_8xb16_ava_rgb.py
+ - Name: slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb
+ Config: configs/detection/ava/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb.py
In Collection: AVA
Metadata:
Architecture: ResNet101
@@ -77,9 +103,11 @@ Models:
Task: Action Detection
Metrics:
mAP: 24.82
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb_20220906-43f16877.pth
- - Name: slowfast_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb
- Config: configs/detection/ava/slowfast_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py
+ - Name: slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb
+ Config: configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
In Collection: AVA
Metadata:
Architecture: ResNet50
@@ -95,9 +123,11 @@ Models:
Task: Action Detection
Metrics:
mAP: 24.27
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb_20220906-5180ea3c.pth
- - Name: slowfast_context_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb
- Config: configs/detection/ava/slowfast_context_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py
+ - Name: slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb
+ Config: configs/detection/ava/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb.py
In Collection: AVA
Metadata:
Architecture: ResNet50
@@ -113,9 +143,11 @@ Models:
Task: Action Detection
Metrics:
mAP: 25.25
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb_20220906-5bb4f6f2.pth
- - Name: slowfast_kinetics400_pretrained_r50_8x8x1_20e_8xb8_ava_rgb
- Config: configs/detection/ava/slowfast_kinetics400_pretrained_r50_8x8x1_20e_8xb8_ava_rgb.py
+ - Name: slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb
+ Config: configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb.py
In Collection: AVA
Metadata:
Architecture: ResNet50
@@ -131,9 +163,11 @@ Models:
Task: Action Detection
Metrics:
mAP: 25.73
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb_20220906-39133ec7.pth
- - Name: slowfast_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb
- Config: configs/detection/ava/slowfast_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py
+ - Name: slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb
+ Config: configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb.py
In Collection: AVA
Metadata:
Architecture: ResNet50
@@ -149,9 +183,11 @@ Models:
Task: Action Detection
Metrics:
mAP: 25.98
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb_20220906-d934a48f.pth
- - Name: slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb
- Config: configs/detection/ava/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py
+ - Name: slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb
+ Config: configs/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb.py
In Collection: AVA
Metadata:
Architecture: ResNet50
@@ -167,9 +203,11 @@ Models:
Task: Action Detection
Metrics:
mAP: 26.38
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb_20220906-13a9078e.pth
- - Name: slowfast_temporal_max_focal_alpha3_gamma1_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb
- Config: configs/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py
+ - Name: slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb
+ Config: configs/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb.py
In Collection: AVA
Metadata:
Architecture: ResNet50
@@ -185,3 +223,5 @@ Models:
Task: Action Detection
Metrics:
mAP: 26.59
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb_20220906-dd59e26f.pth
diff --git a/configs/detection/ava/slowfast_context_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py b/configs/detection/ava/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb.py
similarity index 61%
rename from configs/detection/ava/slowfast_context_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py
rename to configs/detection/ava/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb.py
index 3ac1b0dc90..b75c7cabfd 100644
--- a/configs/detection/ava/slowfast_context_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py
+++ b/configs/detection/ava/slowfast_kinetics400-pretrained-r50-context_8xb16-4x16x1-20e_ava21-rgb.py
@@ -1,4 +1,4 @@
-_base_ = ['slowfast_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py']
+_base_ = ['slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py']
model = dict(
roi_head=dict(
diff --git a/configs/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb.py b/configs/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb.py
new file mode 100644
index 0000000000..758b578634
--- /dev/null
+++ b/configs/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max-focal-alpha3-gamma1_8xb6-8x8x1-cosine-10e_ava22-rgb.py
@@ -0,0 +1,8 @@
+_base_ = [
+ 'slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb.py'
+]
+
+model = dict(
+ roi_head=dict(
+ bbox_roi_extractor=dict(temporal_pool_mode='max'),
+ bbox_head=dict(focal_alpha=3.0, focal_gamma=1.0)))
diff --git a/configs/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb.py b/configs/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb.py
new file mode 100644
index 0000000000..9b750c4925
--- /dev/null
+++ b/configs/detection/ava/slowfast_kinetics400-pretrained-r50-temporal-max_8xb6-8x8x1-cosine-10e_ava22-rgb.py
@@ -0,0 +1,5 @@
+_base_ = [
+ 'slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb.py'
+]
+
+model = dict(roi_head=dict(bbox_roi_extractor=dict(temporal_pool_mode='max')))
diff --git a/configs/detection/ava/slowfast_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py b/configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
similarity index 90%
rename from configs/detection/ava/slowfast_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py
rename to configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
index 0870dfd8e6..8b5550aec0 100644
--- a/configs/detection/ava/slowfast_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py
+++ b/configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
@@ -1,11 +1,14 @@
-_base_ = ['slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py']
+_base_ = ['slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py']
model = dict(
backbone=dict(
_delete_=True,
type='ResNet3dSlowFast',
_scope_='mmaction',
- pretrained=None,
+ pretrained=(
+ 'https://download.openmmlab.com/mmaction/recognition/slowfast/'
+ 'slowfast_r50_4x16x1_256e_kinetics400_rgb/'
+ 'slowfast_r50_4x16x1_256e_kinetics400_rgb_20200704-bcde7ed7.pth'),
resample_rate=8,
speed_ratio=8,
channel_ratio=8,
@@ -96,7 +99,3 @@
data_prefix=dict(img=data_root),
test_mode=True))
test_dataloader = val_dataloader
-
-load_from = ('https://download.openmmlab.com/mmaction/recognition/slowfast/'
- 'slowfast_r50_4x16x1_256e_kinetics400_rgb/'
- 'slowfast_r50_4x16x1_256e_kinetics400_rgb_20200704-bcde7ed7.pth')
diff --git a/configs/detection/ava/slowfast_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py b/configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb.py
similarity index 89%
rename from configs/detection/ava/slowfast_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py
rename to configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb.py
index 29fb7eb2d2..3cbf483e57 100644
--- a/configs/detection/ava/slowfast_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py
+++ b/configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb.py
@@ -1,4 +1,11 @@
-_base_ = ['slowfast_kinetics400_pretrained_r50_8x8x1_20e_8xb8_ava_rgb.py']
+_base_ = ['slowfast_kinetics400-pretrained-r50_8xb16-8x8x1-20e_ava21-rgb.py']
+
+model = dict(
+ backbone=dict(
+ pretrained=(
+ 'https://download.openmmlab.com/mmaction/recognition/slowfast/'
+ 'slowfast_r50_8x8x1_256e_kinetics400_rgb/'
+ 'slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth')))
dataset_type = 'AVADataset'
data_root = 'data/ava/rawframes'
@@ -95,7 +102,3 @@
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.075, momentum=0.9, weight_decay=0.00001),
clip_grad=dict(max_norm=40, norm_type=2))
-
-load_from = ('https://download.openmmlab.com/mmaction/recognition/slowfast/'
- 'slowfast_r50_8x8x1_256e_kinetics400_rgb/'
- 'slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth')
diff --git a/configs/detection/ava/slowfast_kinetics400_pretrained_r50_8x8x1_20e_8xb8_ava_rgb.py b/configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb.py
similarity index 77%
rename from configs/detection/ava/slowfast_kinetics400_pretrained_r50_8x8x1_20e_8xb8_ava_rgb.py
rename to configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb.py
index 47d2b9f1df..c1f8b4d6e0 100644
--- a/configs/detection/ava/slowfast_kinetics400_pretrained_r50_8x8x1_20e_8xb8_ava_rgb.py
+++ b/configs/detection/ava/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb.py
@@ -1,8 +1,14 @@
-_base_ = ['slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py']
+_base_ = ['slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py']
model = dict(
backbone=dict(
- resample_rate=4, speed_ratio=4, slow_pathway=dict(fusion_kernel=7)))
+ resample_rate=4,
+ speed_ratio=4,
+ slow_pathway=dict(fusion_kernel=7),
+ prtrained=(
+ 'https://download.openmmlab.com/mmaction/recognition/slowfast/'
+ 'slowfast_r50_8x8x1_256e_kinetics400_rgb/'
+ 'slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth')))
dataset_type = 'AVADataset'
data_root = 'data/ava/rawframes'
@@ -39,10 +45,6 @@
proposal_file=proposal_file_train,
data_prefix=dict(img=data_root)))
-load_from = ('https://download.openmmlab.com/mmaction/recognition/slowfast/'
- 'slowfast_r50_8x8x1_256e_kinetics400_rgb/'
- 'slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth')
-
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.00001),
clip_grad=dict(max_norm=40, norm_type=2))
diff --git a/configs/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py b/configs/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py
deleted file mode 100644
index b90e82cc9d..0000000000
--- a/configs/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py
+++ /dev/null
@@ -1,12 +0,0 @@
-_base_ = [
- 'slowfast_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py'
-]
-
-model = dict(
- roi_head=dict(
- bbox_roi_extractor=dict(temporal_pool_mode='max'),
- bbox_head=dict(focal_alpha=3.0, focal_gamma=1.0)))
-
-load_from = ('https://download.openmmlab.com/mmaction/recognition/slowfast/'
- 'slowfast_r50_8x8x1_256e_kinetics400_rgb/'
- 'slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth')
diff --git a/configs/detection/ava/slowfast_temporal_max_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py b/configs/detection/ava/slowfast_temporal_max_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py
deleted file mode 100644
index 8207d1c604..0000000000
--- a/configs/detection/ava/slowfast_temporal_max_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py
+++ /dev/null
@@ -1,9 +0,0 @@
-_base_ = [
- 'slowfast_kinetics400_pretrained_r50_8x8x1_cosine_10e_8xb6_ava22_rgb.py'
-]
-
-model = dict(roi_head=dict(bbox_roi_extractor=dict(temporal_pool_mode='max')))
-
-load_from = ('https://download.openmmlab.com/mmaction/recognition/slowfast/'
- 'slowfast_r50_8x8x1_256e_kinetics400_rgb/'
- 'slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth')
diff --git a/configs/detection/ava/slowonly_nl_kinetics400_pretrained_r50_8x8x1_20e_8xb16_ava_rgb.py b/configs/detection/ava/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb.py
similarity index 86%
rename from configs/detection/ava/slowonly_nl_kinetics400_pretrained_r50_8x8x1_20e_8xb16_ava_rgb.py
rename to configs/detection/ava/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb.py
index a588c5d5b9..815e61c2fc 100644
--- a/configs/detection/ava/slowonly_nl_kinetics400_pretrained_r50_8x8x1_20e_8xb16_ava_rgb.py
+++ b/configs/detection/ava/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb.py
@@ -1,4 +1,12 @@
-_base_ = ['slowonly_nl_kinetics400_pretrained_r50_4x16x1_20e_8x16_ava_rgb.py']
+_base_ = ['slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py']
+
+model = dict(
+ backbone=dict(
+ depth=101,
+ pretrained=(
+ 'https://download.openmmlab.com/mmaction/recognition/slowonly/'
+ 'omni/slowonly_r101_without_omni_8x8x1_kinetics400_rgb_'
+ '20200926-0c730aef.pth')))
dataset_type = 'AVADataset'
data_root = 'data/ava/rawframes'
@@ -62,9 +70,3 @@
data_prefix=dict(img=data_root),
test_mode=True))
test_dataloader = val_dataloader
-
-load_from = (
- 'https://download.openmmlab.com/mmaction/recognition/slowonly/'
- 'slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb/'
- 'slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb_20210308-e8dd9e82.pth' # noqa: E501
-)
diff --git a/configs/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb.py b/configs/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb.py
new file mode 100644
index 0000000000..43b0fa1a28
--- /dev/null
+++ b/configs/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb.py
@@ -0,0 +1,16 @@
+_base_ = ['slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py']
+
+model = dict(
+ backbone=dict(
+ pretrained=(
+ 'https://download.openmmlab.com/mmaction/recognition/slowonly/'
+ 'slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb/'
+ 'slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb_'
+ '20210308-0d6e5a69.pth'),
+ norm_cfg=dict(type='BN3d', requires_grad=True),
+ non_local=((0, 0, 0), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 0, 0)),
+ non_local_cfg=dict(
+ sub_sample=True,
+ use_scale=True,
+ norm_cfg=dict(type='BN3d', requires_grad=True),
+ mode='embedded_gaussian')))
diff --git a/configs/detection/ava/slowonly_kinetics400_pretrained_r101_8x8x1_20e_8xb16_ava_rgb.py b/configs/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb.py
similarity index 84%
rename from configs/detection/ava/slowonly_kinetics400_pretrained_r101_8x8x1_20e_8xb16_ava_rgb.py
rename to configs/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb.py
index 6285efe789..a962f10c11 100644
--- a/configs/detection/ava/slowonly_kinetics400_pretrained_r101_8x8x1_20e_8xb16_ava_rgb.py
+++ b/configs/detection/ava/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb.py
@@ -1,6 +1,14 @@
-_base_ = ['slowonly_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py']
+_base_ = [
+ 'slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb.py'
+]
-model = dict(backbone=dict(depth=101))
+model = dict(
+ backbone=dict(
+ pretrained=(
+ 'https://download.openmmlab.com/mmaction/recognition/slowonly/'
+ 'slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb/'
+ 'slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb_'
+ '20210308-e8dd9e82.pth')))
dataset_type = 'AVADataset'
data_root = 'data/ava/rawframes'
@@ -64,7 +72,3 @@
data_prefix=dict(img=data_root),
test_mode=True))
test_dataloader = val_dataloader
-
-load_from = ('https://download.openmmlab.com/mmaction/recognition/slowonly/'
- 'omni/slowonly_r101_without_omni_8x8x1_'
- 'kinetics400_rgb_20200926-0c730aef.pth')
diff --git a/configs/detection/ava/slowonly_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py b/configs/detection/ava/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
similarity index 93%
rename from configs/detection/ava/slowonly_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py
rename to configs/detection/ava/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
index 20f047924b..ec107941b3 100644
--- a/configs/detection/ava/slowonly_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py
+++ b/configs/detection/ava/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
@@ -92,7 +92,3 @@
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.2, momentum=0.9, weight_decay=0.00001),
clip_grad=dict(max_norm=40, norm_type=2))
-
-load_from = ('https://download.openmmlab.com/mmaction/recognition/slowonly/'
- 'slowonly_r50_4x16x1_256e_kinetics400_rgb/'
- 'slowonly_r50_4x16x1_256e_kinetics400_rgb_20200704-a69556c6.pth')
diff --git a/configs/detection/ava/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py b/configs/detection/ava/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
new file mode 100644
index 0000000000..c9e10def96
--- /dev/null
+++ b/configs/detection/ava/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
@@ -0,0 +1,9 @@
+_base_ = ['slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py']
+
+model = dict(
+ backbone=dict(
+ pretrained=(
+ 'https://download.openmmlab.com/mmaction/v1.0/recognition/slowonly'
+ '/slowonly_imagenet-pretrained-r50_8xb16-4x16x1-steplr-150e_'
+ 'kinetics700-rgb/slowonly_imagenet-pretrained-r50_16xb16-4x16x1-'
+ 'steplr-150e_kinetics700-rgb_20220901-f73b3e89.pth')))
diff --git a/configs/detection/ava/slowonly_nl_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py b/configs/detection/ava/slowonly_nl_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py
deleted file mode 100644
index f68304ec08..0000000000
--- a/configs/detection/ava/slowonly_nl_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py
+++ /dev/null
@@ -1,17 +0,0 @@
-_base_ = ['slowonly_kinetics400_pretrained_r50_4x16x1_20e_8xb16_ava_rgb.py']
-
-model = dict(
- backbone=dict(
- norm_cfg=dict(type='BN3d', requires_grad=True),
- non_local=((0, 0, 0), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 0, 0)),
- non_local_cfg=dict(
- sub_sample=True,
- use_scale=True,
- norm_cfg=dict(type='BN3d', requires_grad=True),
- mode='embedded_gaussian')))
-
-load_from = (
- 'https://download.openmmlab.com/mmaction/recognition/slowonly/'
- 'slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb/'
- 'slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb_20210308-0d6e5a69.pth' # noqa: E501
-)
diff --git a/configs/detection/lfb/README.md b/configs/detection/lfb/README.md
new file mode 100644
index 0000000000..1d33a7d7e9
--- /dev/null
+++ b/configs/detection/lfb/README.md
@@ -0,0 +1,129 @@
+# LFB
+
+[Long-term feature banks for detailed video understanding](https://openaccess.thecvf.com/content_CVPR_2019/html/Wu_Long-Term_Feature_Banks_for_Detailed_Video_Understanding_CVPR_2019_paper.html)
+
+
+
+## Abstract
+
+
+
+To understand the world, we humans constantly need to relate the present to the past, and put events in context. In this paper, we enable existing video models to do the same. We propose a long-term feature bank---supportive information extracted over the entire span of a video---to augment state-of-the-art video models that otherwise would only view short clips of 2-5 seconds. Our experiments demonstrate that augmenting 3D convolutional networks with a long-term feature bank yields state-of-the-art results on three challenging video datasets: AVA, EPIC-Kitchens, and Charades.
+
+
+
+
+
+
+
+## Results and Models
+
+### AVA2.1
+
+| frame sampling strategy | resolution | gpus | backbone | pretrain | mAP | gpu_mem(M) | config | ckpt | log |
+| :---------------------: | :--------: | :--: | :----------------------------------: | :----------: | :---: | :--------: | :---------------------------------: | :-------------------------------: | :------------------------------: |
+| 4x16x1 | raw | 8 | SlowOnly ResNet50 (with Nonlocal LFB) | Kinetics-400 | 24.05 | 8620 | [config](/configs/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb_20220906-4c5b9f25.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.log) |
+| 4x16x1 | raw | 8 | SlowOnly ResNet50 (with Max LFB) | Kinetics-400 | 22.15 | 8425 | [config](/configs/detection/lfb/slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/detection/lfb/slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb/slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb_20220906-4963135b.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/detection/lfb/slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb/slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.log) |
+
+Note:
+
+1. The **gpus** indicates the number of gpu we used to get the checkpoint.
+ According to the [Linear Scaling Rule](https://arxiv.org/abs/1706.02677), you may set the learning rate proportional to the batch size if you use different GPUs or videos per GPU,
+ e.g., lr=0.01 for 4 GPUs x 2 video/gpu and lr=0.08 for 16 GPUs x 4 video/gpu.
+2. We use `slowonly_r50_4x16x1` instead of `I3D-R50-NL` in the original paper as the backbone of LFB, but we have achieved the similar improvement: (ours: 20.1 -> 24.05 vs. author: 22.1 -> 25.8).
+3. Because the long-term features are randomly sampled in testing, the test accuracy may have some differences.
+4. Before train or test lfb, you need to infer feature bank with the [slowonly-lfb_ava-pretrained-r50_infer-4x16x1_ava21-rgb.py](/configs/detection/lfb/slowonly-lfb_ava-pretrained-r50_infer-4x16x1_ava21-rgb.py). For more details on infer feature bank, you can refer to [Train](#Train) part.
+5. You can also dowonload long-term feature bank from [AVA_train_val_float32_lfb](https://download.openmmlab.com/mmaction/detection/lfb/AVA_train_val_float32_lfb.rar) or [AVA_train_val_float16_lfb](https://download.openmmlab.com/mmaction/detection/lfb/AVA_train_val_float16_lfb.rar), and then put them on `lfb_prefix_path`.
+6. The ROIHead now supports single-label classification (i.e. the network outputs at most
+ one-label per actor). This can be done by (a) setting multilabel=False during training and
+ the test_cfg.rcnn.action_thr for testing.
+
+## Train
+
+### a. Infer long-term feature bank for training
+
+Before train or test lfb, you need to infer long-term feature bank first.
+
+Specifically, run the test on the training, validation, testing dataset with the config file [slowonly-lfb_ava-pretrained-r50_infer-4x16x1_ava21-rgb.py](/configs/detection/lfb/slowonly-lfb_ava-pretrained-r50_infer-4x16x1_ava21-rgb.py) (The config file will only infer the feature bank of training dataset and you need set `dataset_mode = 'val'` to infer the feature bank of validation dataset in the config file.), and the shared head [LFBInferHead](/mmaction/models/roi_heads/shared_heads/lfb_infer_head.py) will generate the feature bank.
+
+A long-term feature bank file of AVA training and validation datasets with float32 precision occupies 3.3 GB. If store the features with float16 precision, the feature bank occupies 1.65 GB.
+
+You can use the following command to infer feature bank of AVA training and validation dataset and the feature bank will be stored in `lfb_prefix_path/lfb_train.pkl` and `lfb_prefix_path/lfb_val.pkl`.
+
+```shell
+# set `dataset_mode = 'train'` in lfb_slowonly_r50_ava_infer.py
+python tools/test.py slowonly-lfb_ava-pretrained-r50_infer-4x16x1_ava21-rgb.py \
+ checkpoints/YOUR_BASELINE_CHECKPOINT.pth --eval mAP
+
+# set `dataset_mode = 'val'` in lfb_slowonly_r50_ava_infer.py
+python tools/test.py slowonly-lfb_ava-pretrained-r50_infer-4x16x1_ava21-rgb.py \
+ checkpoints/YOUR_BASELINE_CHECKPOINT.pth --eval mAP
+```
+
+We use [slowonly_r50_4x16x1 checkpoint](https://download.openmmlab.com/mmaction/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_20201217-40061d5f.pth) from [slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb](/configs/detection/ava/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py) to infer feature bank.
+
+### b. Train LFB
+
+You can use the following command to train a model.
+
+```shell
+python tools/train.py ${CONFIG_FILE} [optional arguments]
+```
+
+Example: train LFB model on AVA with half-precision long-term feature bank.
+
+```shell
+python tools/train.py configs/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py \
+ --validate --seed 0 --deterministic
+```
+
+For more details and optional arguments infos, you can refer to the **Training** part in the [Training and Test Tutorial](/docs/en/user_guides/4_train_test.md).
+
+## Test
+
+### a. Infer long-term feature bank for testing
+
+Before train or test lfb, you also need to infer long-term feature bank first. If you have generated the feature bank file, you can skip it.
+
+The step is the same with **Infer long-term feature bank for training** part in [Train](#Train).
+
+### b. Test LFB
+
+You can use the following command to test a model.
+
+```shell
+python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
+```
+
+Example: test LFB model on AVA with half-precision long-term feature bank and dump the result to a pkl file.
+
+```shell
+python tools/test.py configs/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py \
+ checkpoints/SOME_CHECKPOINT.pth --dump result.pkl
+```
+
+For more details, you can refer to the **Test** part in the [Training and Test Tutorial](/docs/en/user_guides/4_train_test.md).
+
+## Citation
+
+
+
+```BibTeX
+@inproceedings{gu2018ava,
+ title={Ava: A video dataset of spatio-temporally localized atomic visual actions},
+ author={Gu, Chunhui and Sun, Chen and Ross, David A and Vondrick, Carl and Pantofaru, Caroline and Li, Yeqing and Vijayanarasimhan, Sudheendra and Toderici, George and Ricco, Susanna and Sukthankar, Rahul and others},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={6047--6056},
+ year={2018}
+}
+```
+
+```BibTeX
+@inproceedings{wu2019long,
+ title={Long-term feature banks for detailed video understanding},
+ author={Wu, Chao-Yuan and Feichtenhofer, Christoph and Fan, Haoqi and He, Kaiming and Krahenbuhl, Philipp and Girshick, Ross},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={284--293},
+ year={2019}
+}
+```
diff --git a/configs/detection/lfb/metafile.yml b/configs/detection/lfb/metafile.yml
new file mode 100644
index 0000000000..055032ad18
--- /dev/null
+++ b/configs/detection/lfb/metafile.yml
@@ -0,0 +1,47 @@
+Collections:
+- Name: LFB
+ README: configs/detection/lfb/README.md
+ Paper:
+ URL: https://arxiv.org/abs/1812.05038
+ Title: "Long-Term Feature Banks for Detailed Video Understanding"
+
+Models:
+ - Name: slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb
+ Config: configs/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py
+ In Collection: LFB
+ Metadata:
+ Architecture: ResNet50
+ Batch Size: 12
+ Epochs: 20
+ Pretrained: Kinetics-400
+ Resolution: short-side 320
+ Training Data: AVA v2.1
+ Training Resources: 8 GPUs
+ Modality: RGB
+ Results:
+ - Dataset: AVA v2.1
+ Task: Action Detection
+ Metrics:
+ mAP: 24.05
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb_20220906-4c5b9f25.pth
+
+ - Name: slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb
+ Config: slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py
+ In Collection: LFB
+ Metadata:
+ Architecture: ResNet50
+ Batch Size: 12
+ Epochs: 20
+ Pretrained: Kinetics-400
+ Resolution: short-side 320
+ Training Data: AVA v2.1
+ Training Resources: 8 GPUs
+ Modality: RGB
+ Results:
+ - Dataset: AVA v2.1
+ Task: Action Detection
+ Metrics:
+ mAP: 22.15
+ Training Log: https://download.openmmlab.com/mmaction/v1.0/detection/lfb/slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb/slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.log
+ Weights: https://download.openmmlab.com/mmaction/v1.0/detection/lfb/slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb/slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb_20220906-4963135b.pth
diff --git a/configs/detection/lfb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py b/configs/detection/lfb/slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py
similarity index 58%
rename from configs/detection/lfb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py
rename to configs/detection/lfb/slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py
index 6d451eb15c..7bedd34cd6 100644
--- a/configs/detection/lfb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py
+++ b/configs/detection/lfb/slowonly-lfb-max_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py
@@ -1,4 +1,6 @@
-_base_ = ['lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py']
+_base_ = [
+ 'slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py'
+]
model = dict(
roi_head=dict(
diff --git a/configs/detection/lfb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py b/configs/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py
similarity index 88%
rename from configs/detection/lfb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py
rename to configs/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py
index 93416e9740..2da2bd3a7c 100644
--- a/configs/detection/lfb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py
+++ b/configs/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py
@@ -4,6 +4,7 @@
# model settings
lfb_prefix_path = 'data/ava/lfb_half'
+
max_num_sampled_feat = 5
window_size = 60
lfb_channels = 2048
@@ -49,13 +50,9 @@
'recall_93.9.pkl')
proposal_file_val = f'{anno_root}/ava_dense_proposals_val.FAIR.recall_93.9.pkl'
-file_client_args = dict(
- io_backend='petrel',
- path_mapping=dict({'data/ava': 's3://openmmlab/datasets/action/ava'}))
-
train_pipeline = [
dict(type='SampleAVAFrames', clip_len=4, frame_interval=16),
- dict(type='RawFrameDecode', **file_client_args),
+ dict(type='RawFrameDecode'),
dict(type='RandomRescale', scale_range=(256, 320)),
dict(type='RandomCrop', size=256),
dict(type='Flip', flip_ratio=0.5),
@@ -66,7 +63,7 @@
val_pipeline = [
dict(
type='SampleAVAFrames', clip_len=4, frame_interval=16, test_mode=True),
- dict(type='RawFrameDecode', **file_client_args),
+ dict(type='RawFrameDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='FormatShape', input_format='NCTHW', collapse=True),
dict(type='PackActionInputs')
@@ -100,7 +97,7 @@
label_file=label_file,
proposal_file=proposal_file_val,
data_prefix=dict(img=data_root),
- person_det_score_thr=0.9,
+ person_det_score_thr=0.85,
test_mode=True))
test_dataloader = val_dataloader
@@ -109,7 +106,8 @@
type='AVAMetric',
ann_file=ann_file_val,
label_file=label_file,
- exclude_file=exclude_file_val)
+ exclude_file=exclude_file_val,
+ action_thr=0.0)
test_evaluator = val_evaluator
default_hooks = dict(checkpoint=dict(interval=3, max_keep_ckpts=3))
@@ -139,8 +137,3 @@
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.15, momentum=0.9, weight_decay=1e-05),
clip_grad=dict(max_norm=20, norm_type=2))
-
-find_unused_parameters = False
-load_from = ('https://download.openmmlab.com/mmaction/recognition/slowonly/'
- 'slowonly_r50_4x16x1_256e_kinetics400_rgb/'
- 'slowonly_r50_4x16x1_256e_kinetics400_rgb_20200704-a69556c6.pth')
diff --git a/configs/detection/lfb/lfb_infer.py b/configs/detection/lfb/slowonly-lfb_ava-pretrained-r50_infer-4x16x1_ava21-rgb.py
similarity index 100%
rename from configs/detection/lfb/lfb_infer.py
rename to configs/detection/lfb/slowonly-lfb_ava-pretrained-r50_infer-4x16x1_ava21-rgb.py