feat: Update crowdhuman dataset configs.

Peterande · Nov 16, 2024 · 8ed01a1 · 8ed01a1
1 parent 414be07
commit 8ed01a1
Show file tree

Hide file tree

Showing 11 changed files with 381 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -239,6 +239,13 @@ python tools/resize_obj365.py --base_dir ${BASE_DIR}
     ```
 
 
+</details>
+
+<details>
+<summary>CrowdHuman</summary>
+
+Download COCO format dataset here: [url](https://aistudio.baidu.com/datasetdetail/231455)
+
 </details>
 
 <details>

diff --git a/README_cn.md b/README_cn.md
@@ -234,6 +234,13 @@ python tools/resize_obj365.py --base_dir ${BASE_DIR}
     ```
 
 
+</details>
+
+<details>
+<summary>CrowdHuman</summary>
+
+在此下载 COCO 格式的数据集：[链接](https://aistudio.baidu.com/datasetdetail/231455)
+
 </details>
 
 <details>

diff --git a/README_ja.md b/README_ja.md
@@ -242,6 +242,13 @@ python tools/resize_obj365.py --base_dir ${BASE_DIR}
     ```
 
 
+</details>
+
+<details>
+<summary>CrowdHuman</summary>
+
+こちらからCOCOフォーマットのデータセットをダウンロードしてください：[リンク](https://aistudio.baidu.com/datasetdetail/231455)
+
 </details>
 
 <details>

diff --git a/configs/dataset/crowdhuman_detection.yml b/configs/dataset/crowdhuman_detection.yml
@@ -0,0 +1,41 @@
+task: detection
+
+evaluator:
+  type: CocoEvaluator
+  iou_types: ['bbox', ]
+
+num_classes: 1 # your dataset classes
+remap_mscoco_category: False
+
+train_dataloader:
+  type: DataLoader
+  dataset:
+    type: CocoDetection
+    img_folder: /data/CrowdHuman/coco/CrowdHuman_train
+    ann_file: /data/CrowdHuman/coco/Chuman-train.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: True
+  num_workers: 4
+  drop_last: True
+  collate_fn:
+    type: BatchImageCollateFunction
+
+
+val_dataloader:
+  type: DataLoader
+  dataset:
+    type: CocoDetection
+    img_folder: /data/CrowdHuman/coco/CrowdHuman_val
+    ann_file: /data/CrowdHuman/coco/Chuman-val.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: False
+  num_workers: 4
+  drop_last: False
+  collate_fn:
+    type: BatchImageCollateFunction
diff --git a/configs/dfine/crowdhuman/dfine_hgnetv2_l_ch.yml b/configs/dfine/crowdhuman/dfine_hgnetv2_l_ch.yml
@@ -0,0 +1,44 @@
+__include__: [
+  '../../dataset/crowdhuman_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_l_crowdhuman
+
+
+HGNetv2:
+  name: 'B4'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000125
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+# Increase to search for the optimal ema
+epoches: 140
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 120
+  collate_fn:
+    stop_epoch: 120
+    ema_restart_decay: 0.9999
+    base_size_repeat: 4
diff --git a/configs/dfine/crowdhuman/dfine_hgnetv2_m_ch.yml b/configs/dfine/crowdhuman/dfine_hgnetv2_m_ch.yml
@@ -0,0 +1,60 @@
+__include__: [
+  '../../dataset/crowdhuman_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_m_crowdhuman
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B2'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+DFINETransformer:
+  num_layers: 4  # 5 6
+  eval_idx: -1  # -2 -3
+
+HybridEncoder:
+  in_channels: [384, 768, 1536]
+  hidden_dim: 256
+  depth_mult: 0.67
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.000025
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.000025
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+# Increase to search for the optimal ema
+epoches: 220
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 200
+  collate_fn:
+    stop_epoch: 200
+    ema_restart_decay: 0.9999
+    base_size_repeat: 6
diff --git a/configs/dfine/crowdhuman/dfine_hgnetv2_n_ch.yml b/configs/dfine/crowdhuman/dfine_hgnetv2_n_ch.yml
@@ -0,0 +1,82 @@
+__include__: [
+  '../../dataset/crowdhuman_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_n_crowdhuman
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+
+HybridEncoder:
+  in_channels: [512, 1024]
+  feat_strides: [16, 32]
+
+  # intra
+  hidden_dim: 128
+  use_encoder_idx: [1]
+  dim_feedforward: 512
+
+  # cross
+  expansion: 0.34
+  depth_mult: 0.5
+
+
+DFINETransformer:
+  feat_channels: [128, 128]
+  feat_strides: [16, 32]
+  hidden_dim: 128
+  dim_feedforward: 512
+  num_levels: 2
+
+  num_layers: 3
+  eval_idx: -1
+
+  num_points: [6, 6]
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0004
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.0004
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0008
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+# Increase to search for the optimal ema
+epoches: 220
+train_dataloader:
+  total_batch_size: 128
+  dataset:
+    transforms:
+      policy:
+        epoch: 200
+  collate_fn:
+    stop_epoch: 200
+    ema_restart_decay: 0.9999
+    base_size_repeat: ~
+
+val_dataloader:
+  total_batch_size: 256
diff --git a/configs/dfine/crowdhuman/dfine_hgnetv2_s_ch.yml b/configs/dfine/crowdhuman/dfine_hgnetv2_s_ch.yml
@@ -0,0 +1,65 @@
+__include__: [
+  '../../dataset/crowdhuman_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_s_crowdhuman
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+DFINETransformer:
+  num_layers: 3  # 4 5 6
+  eval_idx: -1  # -2 -3 -4
+
+HybridEncoder:
+  in_channels: [256, 512, 1024]
+  hidden_dim: 256
+  depth_mult: 0.34
+  expansion: 0.5
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0002
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.0002
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0004
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+# Increase to search for the optimal ema
+epoches: 220
+train_dataloader:
+  total_batch_size: 64
+  dataset:
+    transforms:
+      policy:
+        epoch: 200
+  collate_fn:
+    stop_epoch: 200
+    ema_restart_decay: 0.9999
+    base_size_repeat: 20
+
+val_dataloader:
+  total_batch_size: 128
diff --git a/configs/dfine/crowdhuman/dfine_hgnetv2_x_ch.yml b/configs/dfine/crowdhuman/dfine_hgnetv2_x_ch.yml
@@ -0,0 +1,55 @@
+__include__: [
+  '../../dataset/crowdhuman_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_x_crowdhuman
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B5'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+
+HybridEncoder:
+  hidden_dim: 384
+  dim_feedforward: 2048
+
+DFINETransformer:
+  feat_channels: [384, 384, 384]
+  reg_scale: 8
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000025
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+# Increase to search for the optimal ema
+epoches: 140
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 120
+  collate_fn:
+    stop_epoch: 120
+    ema_restart_decay: 0.9998
+    base_size_repeat: 3