From 53ba6d83ae40effc705681312fb15f5e7eab9a44 Mon Sep 17 00:00:00 2001 From: "daiwenxun.vendor" Date: Wed, 9 Nov 2022 17:10:42 +0800 Subject: [PATCH 1/5] add sync buffers hook to default hooks --- configs/_base_/default_runtime.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py index 92fb66701a..3a64a0750f 100644 --- a/configs/_base_/default_runtime.py +++ b/configs/_base_/default_runtime.py @@ -6,7 +6,8 @@ logger=dict(type='LoggerHook', interval=20, ignore_last=False), param_scheduler=dict(type='ParamSchedulerHook'), checkpoint=dict(type='CheckpointHook', interval=1, save_best='auto'), - sampler_seed=dict(type='DistSamplerSeedHook')) + sampler_seed=dict(type='DistSamplerSeedHook'), + sync_buffers=dict(type='SyncBuffersHook')) env_cfg = dict( cudnn_benchmark=False, From 713fa70731740a1590eba91adf2ac1574139b89f Mon Sep 17 00:00:00 2001 From: "daiwenxun.vendor" Date: Wed, 9 Nov 2022 20:23:11 +0800 Subject: [PATCH 2/5] add randomness --- tools/train.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/tools/train.py b/tools/train.py index ac45698412..07fb45f1ea 100644 --- a/tools/train.py +++ b/tools/train.py @@ -19,8 +19,8 @@ def parse_args(): type=str, const='auto', help='If specify checkpint path, resume from it, while if not ' - 'specify, try to auto resume from the latest checkpoint ' - 'in the work directory.') + 'specify, try to auto resume from the latest checkpoint ' + 'in the work directory.') parser.add_argument( '--amp', action='store_true', @@ -33,17 +33,26 @@ def parse_args(): '--auto-scale-lr', action='store_true', help='whether to auto scale the learning rate according to the ' - 'actual batch size and the original batch size.') + 'actual batch size and the original batch size.') + parser.add_argument('--seed', type=int, default=None, help='random seed') + parser.add_argument( + '--diff-rank-seed', + action='store_true', + help='Whether or not set different seeds for different ranks') + parser.add_argument( + '--deterministic', + action='store_true', + help='whether to set deterministic options for CUDNN backend.') parser.add_argument( '--cfg-options', nargs='+', action=DictAction, help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' - 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' - 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') parser.add_argument( '--launcher', choices=['none', 'pytorch', 'slurm', 'mpi'], @@ -96,6 +105,12 @@ def merge_args(cfg, args): if args.auto_scale_lr: cfg.auto_scale_lr.enable = True + # set random seeds + cfg.randomness = dict( + seed=args.seed, + diff_rank_seed=args.diff_rank_seed, + deterministic=args.deterministic) + if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) From 7a559da8f7da4a00a17d5ff48c7fd79fc11b46b9 Mon Sep 17 00:00:00 2001 From: "daiwenxun.vendor" Date: Wed, 9 Nov 2022 20:38:21 +0800 Subject: [PATCH 3/5] fix --- docs/en/user_guides/4_train_test.md | 3 +++ tools/train.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/en/user_guides/4_train_test.md b/docs/en/user_guides/4_train_test.md index 4eb5db3418..6f63886a28 100644 --- a/docs/en/user_guides/4_train_test.md +++ b/docs/en/user_guides/4_train_test.md @@ -28,6 +28,9 @@ CUDA_VISIBLE_DEVICES=-1 python tools/train.py ${CONFIG_FILE} [ARGS] | `--amp` | Enable automatic-mixed-precision training. | | `--no-validate` | **Not suggested**. Disable checkpoint evaluation during training. | | `--auto-scale-lr` | Auto scale the learning rate according to the actual batch size and the original batch size. | +| `--seed` | Random seed. | +| `--diff-rank-seed` | Whether or not set different seeds for different ranks. | +| `--deterministic` | Whether to set deterministic options for CUDNN backend. | | `--cfg-options CFG_OPTIONS` | Override some settings in the used config, the key-value pair in xxx=yyy format will be merged into the config file. If the value to be overwritten is a list, it should be of the form of either `key="[a,b]"` or `key=a,b`. The argument also allows nested list/tuple values, e.g. `key="[(a,b),(c,d)]"`. Note that the quotation marks are necessary and that no white space is allowed. | | `--launcher {none,pytorch,slurm,mpi}` | Options for job launcher. Defaults to `none`. | diff --git a/tools/train.py b/tools/train.py index 07fb45f1ea..d1a88b8609 100644 --- a/tools/train.py +++ b/tools/train.py @@ -38,7 +38,7 @@ def parse_args(): parser.add_argument( '--diff-rank-seed', action='store_true', - help='Whether or not set different seeds for different ranks') + help='whether or not set different seeds for different ranks') parser.add_argument( '--deterministic', action='store_true', From 938b0264a03f736c2948f3177cb00d081a07cab8 Mon Sep 17 00:00:00 2001 From: Dai-Wenxun Date: Wed, 9 Nov 2022 20:39:09 +0800 Subject: [PATCH 4/5] fix lint --- docs/en/user_guides/4_train_test.md | 6 +++--- tools/train.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/en/user_guides/4_train_test.md b/docs/en/user_guides/4_train_test.md index 6f63886a28..a67448fde3 100644 --- a/docs/en/user_guides/4_train_test.md +++ b/docs/en/user_guides/4_train_test.md @@ -28,9 +28,9 @@ CUDA_VISIBLE_DEVICES=-1 python tools/train.py ${CONFIG_FILE} [ARGS] | `--amp` | Enable automatic-mixed-precision training. | | `--no-validate` | **Not suggested**. Disable checkpoint evaluation during training. | | `--auto-scale-lr` | Auto scale the learning rate according to the actual batch size and the original batch size. | -| `--seed` | Random seed. | -| `--diff-rank-seed` | Whether or not set different seeds for different ranks. | -| `--deterministic` | Whether to set deterministic options for CUDNN backend. | +| `--seed` | Random seed. | +| `--diff-rank-seed` | Whether or not set different seeds for different ranks. | +| `--deterministic` | Whether to set deterministic options for CUDNN backend. | | `--cfg-options CFG_OPTIONS` | Override some settings in the used config, the key-value pair in xxx=yyy format will be merged into the config file. If the value to be overwritten is a list, it should be of the form of either `key="[a,b]"` or `key=a,b`. The argument also allows nested list/tuple values, e.g. `key="[(a,b),(c,d)]"`. Note that the quotation marks are necessary and that no white space is allowed. | | `--launcher {none,pytorch,slurm,mpi}` | Options for job launcher. Defaults to `none`. | diff --git a/tools/train.py b/tools/train.py index d1a88b8609..644a730ecb 100644 --- a/tools/train.py +++ b/tools/train.py @@ -19,8 +19,8 @@ def parse_args(): type=str, const='auto', help='If specify checkpint path, resume from it, while if not ' - 'specify, try to auto resume from the latest checkpoint ' - 'in the work directory.') + 'specify, try to auto resume from the latest checkpoint ' + 'in the work directory.') parser.add_argument( '--amp', action='store_true', @@ -33,7 +33,7 @@ def parse_args(): '--auto-scale-lr', action='store_true', help='whether to auto scale the learning rate according to the ' - 'actual batch size and the original batch size.') + 'actual batch size and the original batch size.') parser.add_argument('--seed', type=int, default=None, help='random seed') parser.add_argument( '--diff-rank-seed', @@ -48,11 +48,11 @@ def parse_args(): nargs='+', action=DictAction, help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' - 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' - 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') parser.add_argument( '--launcher', choices=['none', 'pytorch', 'slurm', 'mpi'], From 89ff56e25d25fc9af4ce77d1e5dd0dddd962ff46 Mon Sep 17 00:00:00 2001 From: "daiwenxun.vendor" Date: Thu, 10 Nov 2022 16:54:21 +0800 Subject: [PATCH 5/5] fix --- tools/train.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/train.py b/tools/train.py index 644a730ecb..e424a7a634 100644 --- a/tools/train.py +++ b/tools/train.py @@ -106,10 +106,11 @@ def merge_args(cfg, args): cfg.auto_scale_lr.enable = True # set random seeds - cfg.randomness = dict( - seed=args.seed, - diff_rank_seed=args.diff_rank_seed, - deterministic=args.deterministic) + if cfg.get('randomness', None) is None: + cfg.randomness = dict( + seed=args.seed, + diff_rank_seed=args.diff_rank_seed, + deterministic=args.deterministic) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options)