Skip to content
This repository has been archived by the owner on Jan 7, 2025. It is now read-only.

Bugfix - Torch and CUDA_VISIBLE_DEVICES #1130

Merged
merged 1 commit into from
Oct 4, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions digits/model/tasks/torch_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,28 @@
TORCH_MODEL_FILE = 'model.lua'
TORCH_SNAPSHOT_PREFIX = 'snapshot'


def subprocess_visible_devices(gpus):
"""
Calculates CUDA_VISIBLE_DEVICES for a subprocess
"""
if not isinstance(gpus, list):
raise ValueError('gpus should be a list')
gpus = [int(g) for g in gpus]

old_cvd = os.environ.get('CUDA_VISIBLE_DEVICES', None)
if old_cvd is None:
real_gpus = gpus
else:
map_visible_to_real = {}
for visible, real in enumerate(old_cvd.split(',')):
map_visible_to_real[visible] = int(real)
real_gpus = []
for visible_gpu in gpus:
real_gpus.append(map_visible_to_real[visible_gpu])
return ','.join(str(g) for g in real_gpus)


@subclass
class TorchTrainTask(TrainTask):
"""
Expand Down Expand Up @@ -239,7 +261,7 @@ def task_arguments(self, resources, env):
# don't make other GPUs visible though since Torch will load
# CUDA libraries and allocate memory on all visible GPUs by
# default.
env['CUDA_VISIBLE_DEVICES'] = ','.join(identifiers)
env['CUDA_VISIBLE_DEVICES'] = subprocess_visible_devices(identifiers)
# switch to GPU mode
args.append('--type=cuda')
else:
Expand Down Expand Up @@ -570,7 +592,7 @@ def infer_one_image(self, image, snapshot_epoch=None, layers=None, gpu=None):
if gpu is not None:
args.append('--type=cuda')
# make only the selected GPU visible
env['CUDA_VISIBLE_DEVICES'] = "%d" % gpu
env['CUDA_VISIBLE_DEVICES'] = subprocess_visible_devices([gpu])
else:
args.append('--type=float')

Expand Down Expand Up @@ -860,7 +882,7 @@ def infer_many_images(self, images, snapshot_epoch=None, gpu=None):
if gpu is not None:
args.append('--type=cuda')
# make only the selected GPU visible
env['CUDA_VISIBLE_DEVICES'] = "%d" % gpu
env['CUDA_VISIBLE_DEVICES'] = subprocess_visible_devices([gpu])
else:
args.append('--type=float')

Expand Down