Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update usage of deprecated distributed_backend #5009

Merged
merged 4 commits into from
Dec 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions benchmarks/test_sharded_parity.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,29 +105,29 @@ def test_ddp_string_sharded_plugin_correctness_amp_multi_gpu():
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1',
reason="test should be run outside of pytest")
@DDPLauncher.run("--distributed_backend ddp --gpus 2 --precision 32")
@DDPLauncher.run("--accelerator ddp --gpus 2 --precision 32")
def test_ddp_sharded_plugin_correctness_multi_gpu_ddp(tmpdir, args=None):
plugin_parity_test(
gpus=args.gpus,
precision=args.precision,
accelerator=args.distributed_backend,
accelerator=args.accelerator,
plugin=DDPShardedPlugin(),
model_cls=SeedTrainLoaderModel
model_cls=SeedTrainLoaderModel,
)


@pytest.mark.skipif(not FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1',
reason="test should be run outside of pytest")
@DDPLauncher.run("--distributed_backend ddp --gpus 2 --precision 16")
@DDPLauncher.run("--accelerator ddp --gpus 2 --precision 16")
def test_ddp_sharded_plugin_correctness_amp_multi_gpu_ddp(tmpdir, args=None):
plugin_parity_test(
gpus=args.gpus,
precision=args.precision,
accelerator=args.distributed_backend,
accelerator=args.accelerator,
plugin=DDPShardedPlugin(),
model_cls=SeedTrainLoaderModel
model_cls=SeedTrainLoaderModel,
)


Expand Down
2 changes: 1 addition & 1 deletion pl_examples/domain_templates/imagenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def main(args: Namespace) -> None:
if args.seed is not None:
pl.seed_everything(args.seed)

if args.distributed_backend == 'ddp':
if args.accelerator == 'ddp':
# When using a single GPU per process and per
# DistributedDataParallel, we need to divide the batch size
# ourselves based on the total number of GPUs we have
Expand Down
2 changes: 1 addition & 1 deletion pl_examples/domain_templates/reinforce_learn_Qnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def main(args) -> None:

trainer = pl.Trainer(
gpus=1,
distributed_backend='dp',
accelerator='dp',
val_check_interval=100
)

Expand Down
2 changes: 1 addition & 1 deletion pl_examples/domain_templates/semantic_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def main(hparams: Namespace):
logger=logger,
max_epochs=hparams.epochs,
accumulate_grad_batches=hparams.grad_batches,
distributed_backend=hparams.distributed_backend,
accelerator=hparams.accelerator,
precision=16 if hparams.use_amp else 32,
)

Expand Down
12 changes: 6 additions & 6 deletions pytorch_lightning/accelerators/accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def on_trainer_init(
self.trainer.tpu_id = self.trainer.tpu_cores[0] if isinstance(self.trainer.tpu_cores, list) else None

if num_processes != 1 and distributed_backend != "ddp_cpu":
rank_zero_warn("num_processes is only used for distributed_backend=\"ddp_cpu\". Ignoring it.")
rank_zero_warn("num_processes is only used for `accelerator='ddp_cpu'`. Ignoring it.")
self.trainer.num_processes = num_processes

# override with environment flag
Expand Down Expand Up @@ -276,7 +276,7 @@ def select_accelerator(self):
accelerator_backend = accelerators.CPUAccelerator(self.trainer, cluster_env)
else:
raise MisconfigurationException(
f'Trainer(distributed_backend={self.trainer.distributed_backend} is not a supported backend'
f'Trainer(accelerator={self.trainer.distributed_backend} is not a supported backend'
)

return accelerator_backend
Expand All @@ -299,8 +299,8 @@ def set_distributed_mode(self):
elif self.trainer.num_gpus > 1:
rank_zero_warn(
'You requested multiple GPUs but did not specify a backend, e.g.'
' Trainer(distributed_backend="dp"|"ddp"|"ddp2").'
' Setting distributed_backend="ddp_spawn" for you.'
' `Trainer(accelerator="dp"|"ddp"|"ddp2")`.'
' Setting `accelerator="ddp_spawn"` for you.'
)
self.trainer.distributed_backend = "ddp_spawn"

Expand Down Expand Up @@ -342,7 +342,7 @@ def set_distributed_mode(self):
if self.trainer.num_nodes > 1 and not (self.trainer.use_ddp2 or self.trainer.use_ddp):
raise MisconfigurationException(
'DataParallel does not support num_nodes > 1. Switching to DistributedDataParallel for you. '
'To silence this warning set distributed_backend=ddp or distributed_backend=ddp2'
'To silence this warning set `accelerator="ddp"` or `accelerator="ddp2"`'
)

rank_zero_info(f'GPU available: {torch.cuda.is_available()}, used: {self.trainer.on_gpu}')
Expand All @@ -366,7 +366,7 @@ def check_horovod(self):
"""Raises a `MisconfigurationException` if the Trainer is not configured correctly for Horovod."""
if not HOROVOD_AVAILABLE:
raise MisconfigurationException(
'Requested `distributed_backend="horovod"`, but Horovod is not installed.'
'Requested `accelerator="horovod"`, but Horovod is not installed.'
'Install with \n $HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]'
)

Expand Down
2 changes: 1 addition & 1 deletion pytorch_lightning/accelerators/ddp_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def _check_can_spawn_children(self):
if self._has_spawned_children:
raise RuntimeError(
"You tried to run `.fit` or `.test` multiple times in the same script."
" This is not supported in DDP mode, switch to `distributed_backend='ddp_spawn'` instead."
" This is not supported in DDP mode, switch to `accelerator='ddp_spawn'` instead."
)

def set_world_ranks(self, process_idx):
Expand Down
6 changes: 3 additions & 3 deletions pytorch_lightning/trainer/data_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ def _worker_check(self, dataloader: DataLoader, name: str) -> None:
if dataloader.num_workers > 0 and using_spawn:
rank_zero_warn('Dataloader(num_workers>0) and ddp_spawn do not mix well!'
' Your performance might suffer dramatically.'
' Please consider setting distributed_backend=ddp to use num_workers > 0'
' Please consider setting accelerator=ddp to use num_workers > 0'
' (this is a bottleneck of Python .spawn() and PyTorch')

elif dataloader.num_workers == 0 and using_spawn:
rank_zero_warn('You are using `distributed_backend=ddp_spawn` with num_workers=0.'
' For much faster performance, switch to `distributed_backend=ddp`'
rank_zero_warn('You are using `accelerator=ddp_spawn` with num_workers=0.'
' For much faster performance, switch to `accelerator=ddp`'
' and set `num_workers>0`')

elif dataloader.num_workers <= 2 and multiprocessing.cpu_count() > 2 and not using_spawn:
Expand Down
2 changes: 1 addition & 1 deletion tests/backends/ddp_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def main():
parser.add_argument('--tmpdir')
parser.add_argument('--workdir')
parser.set_defaults(gpus=2)
parser.set_defaults(distributed_backend="ddp")
parser.set_defaults(accelerator="ddp")
args = parser.parse_args()

model = EvalModelTemplate()
Expand Down
40 changes: 20 additions & 20 deletions tests/backends/test_accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp_cpu',
callbacks=[CB()]
accelerator='ddp_cpu',
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand All @@ -68,9 +68,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp',
accelerator='ddp',
gpus=1,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand All @@ -90,9 +90,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp_spawn',
accelerator='ddp_spawn',
gpus=1,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand Down Expand Up @@ -120,9 +120,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp',
accelerator='ddp',
gpus=2,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand Down Expand Up @@ -152,9 +152,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp2',
accelerator='ddp2',
gpus=2,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand All @@ -181,9 +181,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp',
accelerator='ddp',
gpus=2,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand All @@ -210,9 +210,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp2',
accelerator='ddp2',
gpus=2,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand All @@ -239,9 +239,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp_cpu',
accelerator='ddp_cpu',
num_processes=1,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand All @@ -267,9 +267,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp_cpu',
accelerator='ddp_cpu',
num_processes=1,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand Down Expand Up @@ -304,9 +304,9 @@ def on_fit_start(self, trainer, pl_module):
trainer = Trainer(
plugins=[CustomCluster()],
fast_dev_run=True,
distributed_backend='ddp_cpu',
accelerator='ddp_cpu',
num_processes=1,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand Down
6 changes: 3 additions & 3 deletions tests/backends/test_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@


@pytest.mark.parametrize('cli_args', [
pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'),
pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):
Expand All @@ -38,7 +38,7 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):


@pytest.mark.parametrize('cli_args', [
pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'),
pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):
Expand All @@ -54,7 +54,7 @@ def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):


@pytest.mark.parametrize('cli_args', [
pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'),
pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args):
Expand Down
8 changes: 4 additions & 4 deletions tests/backends/test_ddp_spawn.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
limit_train_batches=10,
limit_val_batches=10,
gpus=[0, 1],
distributed_backend='ddp_spawn',
accelerator='ddp_spawn',
)

model = EvalModelTemplate()
Expand All @@ -51,8 +51,8 @@ def test_multi_gpu_model_ddp_spawn(tmpdir):
limit_train_batches=10,
limit_val_batches=10,
gpus=[0, 1],
distributed_backend='ddp_spawn',
progress_bar_refresh_rate=0
accelerator='ddp_spawn',
progress_bar_refresh_rate=0,
)

model = EvalModelTemplate()
Expand All @@ -79,7 +79,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
limit_train_batches=0.2,
limit_val_batches=0.2,
gpus=[0, 1],
distributed_backend='ddp_spawn'
accelerator='ddp_spawn',
)
result = trainer.fit(model, **fit_options)
assert result == 1, "DDP doesn't work with dataloaders passed to fit()."
8 changes: 4 additions & 4 deletions tests/backends/test_dp.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_multi_gpu_early_stop_dp(tmpdir):
limit_train_batches=10,
limit_val_batches=10,
gpus=[0, 1],
distributed_backend='dp',
accelerator='dp',
)

model = EvalModelTemplate()
Expand All @@ -54,8 +54,8 @@ def test_multi_gpu_model_dp(tmpdir):
limit_train_batches=10,
limit_val_batches=10,
gpus=[0, 1],
distributed_backend='dp',
progress_bar_refresh_rate=0
accelerator='dp',
progress_bar_refresh_rate=0,
)

model = EvalModelTemplate()
Expand All @@ -80,7 +80,7 @@ def test_dp_test(tmpdir):
limit_train_batches=10,
limit_val_batches=10,
gpus=[0, 1],
distributed_backend='dp',
accelerator='dp',
)
trainer.fit(model)
assert 'ckpt' in trainer.checkpoint_callback.best_model_path
Expand Down
2 changes: 1 addition & 1 deletion tests/core/test_datamodules.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def test_full_loop_dp(tmpdir):
default_root_dir=tmpdir,
max_epochs=3,
weights_summary=None,
distributed_backend='dp',
accelerator='dp',
gpus=2,
deterministic=True,
)
Expand Down
2 changes: 1 addition & 1 deletion tests/loggers/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def _test_logger_created_on_rank_zero_only(tmpdir, logger_class):
trainer = Trainer(
logger=logger,
default_root_dir=tmpdir,
distributed_backend='ddp_cpu',
accelerator='ddp_cpu',
num_processes=2,
max_steps=1,
checkpoint_callback=True,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/data/horovod/train_default_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def run_test_from_config(trainer_options):
trainer.checkpoint_connector.hpc_load(ckpt_path, on_gpu=args.on_gpu)

if args.on_gpu:
trainer = Trainer(gpus=1, distributed_backend='horovod', max_epochs=1)
trainer = Trainer(gpus=1, accelerator='horovod', max_epochs=1)
# Test the root_gpu property
assert trainer.root_gpu == hvd.local_rank()

Expand Down
Loading