diff --git a/torchbenchmark/models/LearningToPaint/baseline/utils/util.py b/torchbenchmark/models/LearningToPaint/baseline/utils/util.py index adbdfe90fa..46e4c4627f 100644 --- a/torchbenchmark/models/LearningToPaint/baseline/utils/util.py +++ b/torchbenchmark/models/LearningToPaint/baseline/utils/util.py @@ -37,7 +37,7 @@ def prBlack(prt): def to_numpy(var): - return var.cpu().data.numpy() if USE_CUDA else var.data.numpy() + return var.cpu().data.numpy() def to_tensor(ndarray, device): diff --git a/torchbenchmark/models/nvidia_deeprecommender/nvinfer.py b/torchbenchmark/models/nvidia_deeprecommender/nvinfer.py index 74e7333c72..b9f9527bfb 100644 --- a/torchbenchmark/models/nvidia_deeprecommender/nvinfer.py +++ b/torchbenchmark/models/nvidia_deeprecommender/nvinfer.py @@ -57,7 +57,7 @@ def getCommandLineArgs() : return args -def getBenchmarkArgs(forceCuda): +def getBenchmarkArgs(forceCuda, device='cuda'): class Args: pass @@ -76,10 +76,11 @@ class Args: args.batch_size = 1 args.jit = False args.forcecuda = forceCuda - args.forcecpu = not forceCuda + args.forcecpu = False if forceCuda else device == 'cpu' args.nooutput = True args.silent = True args.profile = False + args.device = device return args @@ -93,20 +94,22 @@ def processArgState(args) : quit() args.use_cuda = torch.cuda.is_available() # global flag + args.use_xpu = torch.xpu.is_available() if not args.silent: - if args.use_cuda: + if args.use_cuda or args.use_xpu: print('GPU is available.') else: print('GPU is not available.') - if args.use_cuda and args.forcecpu: + if args.forcecpu: args.use_cuda = False - + args.use_xpu = False + if not args.silent: - if args.use_cuda: + if args.use_cuda or args.use_xpu: print('Running On GPU') else: - print('Running On CUDA') + print('Running On CPU') if args.profile: print('Profiler Enabled') @@ -134,11 +137,13 @@ def __init__(self, device = 'cpu', jit=False, batch_size=256, usecommandlineargs forcecuda = False elif device == "cuda": forcecuda = True + elif device == "xpu": + forcecuda = False else: # unknown device string, quit init return - self.args = getBenchmarkArgs(forcecuda) + self.args = getBenchmarkArgs(forcecuda, device) args = processArgState(self.args) @@ -199,6 +204,7 @@ def __init__(self, device = 'cpu', jit=False, batch_size=256, usecommandlineargs if self.args.use_cuda: self.rencoder = self.rencoder.cuda() + elif self.args.use_xpu: self.rencoder = self.rencoder.xpu() if self.toytest == False: self.inv_userIdMap = {v: k for k, v in self.data_layer.userIdMap.items()} @@ -214,7 +220,7 @@ def eval(self, niter=1): continue for i, ((out, src), majorInd) in enumerate(self.eval_data_layer.iterate_one_epoch_eval(for_inf=True)): - inputs = Variable(src.cuda().to_dense() if self.args.use_cuda else src.to_dense()) + inputs = Variable(src.to(device).to_dense()) targets_np = out.to_dense().numpy()[0, :] out = self.rencoder(inputs) @@ -237,7 +243,7 @@ def TimedInferenceRun(self) : e_start_time = time.time() if self.args.profile: - with profiler.profile(record_shapes=True, use_cuda=True) as prof: + with profiler.profile(record_shapes=True, use_cuda=self.args.use_cuda, use_xpu=self.args.use_xpu) as prof: with profiler.record_function("Inference"): self.eval() else: diff --git a/torchbenchmark/models/nvidia_deeprecommender/nvtrain.py b/torchbenchmark/models/nvidia_deeprecommender/nvtrain.py index 7a73956f36..902d0977f9 100644 --- a/torchbenchmark/models/nvidia_deeprecommender/nvtrain.py +++ b/torchbenchmark/models/nvidia_deeprecommender/nvtrain.py @@ -120,8 +120,9 @@ def processTrainArgState(args) : quit() args.use_cuda = torch.cuda.is_available() # global flag + args.use_xpu = args.device == 'xpu' if not args.silent: - if args.use_cuda: + if args.use_cuda or args.use_xpu: print('GPU is available.') else: print('GPU is not available.') @@ -130,8 +131,8 @@ def processTrainArgState(args) : args.use_cuda = False if not args.silent: - if args.use_cuda: - print('Running On CUDA') + if args.use_cuda or args.use_xpu: + print('Running On GPU') else: print('Running On CPU') @@ -164,13 +165,13 @@ def log_var_and_grad_summaries(logger, layers, global_step, prefix, log_histogra logger.histo_summary(tag="Gradients/{}_{}".format(prefix, ind), values=w.grad.data.cpu().numpy(), step=global_step) -def DoTrainEval(encoder, evaluation_data_layer, use_cuda): +def DoTrainEval(encoder, evaluation_data_layer, device): encoder.eval() denom = 0.0 total_epoch_loss = 0.0 for i, (eval, src) in enumerate(evaluation_data_layer.iterate_one_epoch_eval()): - inputs = Variable(src.cuda().to_dense() if use_cuda else src.to_dense()) - targets = Variable(eval.cuda().to_dense() if use_cuda else eval.to_dense()) + inputs = Variable(src.to(device).to_dense()) + targets = Variable(eval.to(device).to_dense()) outputs = encoder(inputs) loss, num_ratings = model.MSEloss(outputs, targets) total_epoch_loss += loss.item() @@ -203,12 +204,15 @@ def TrainInit(self, device="cpu", jit=False, batch_size=256, processCommandLine forcecuda = False elif device == "cuda": forcecuda = True + elif device == "xpu": + forcecuda = False else: # unknown device string, quit init return self.args.forcecuda = forcecuda - self.args.forcecpu = not forcecuda + self.args.forcecpu = not forcecuda and device == 'cpu' + self.args.device = device self.args = processTrainArgState(self.args) @@ -279,9 +283,9 @@ def TrainInit(self, device="cpu", jit=False, batch_size=256, processCommandLine self.rencoder = nn.DataParallel(self.rencoder, device_ids=gpu_ids) - self.rencoder = self.rencoder.cuda() - self.toyinputs = self.toyinputs.to(device) + self.toyinputs = self.toyinputs.to(device) + self.rencoder = self.rencoder.to(device) if self.args.optimizer == "adam": self.optimizer = optim.Adam(self.rencoder.parameters(), @@ -326,7 +330,7 @@ def DoTrain(self): for i, mb in enumerate(self.data_layer.iterate_one_epoch()): - inputs = Variable(mb.cuda().to_dense() if self.args.use_cuda else mb.to_dense()) + inputs = Variable(mb.to(self.args.device).to_dense()) self.optimizer.zero_grad() @@ -404,7 +408,7 @@ def train(self, niter=1) : self.logger.scalar_summary("Training_RMSE_per_epoch", sqrt(self.total_epoch_loss/self.denom), self.epoch) self.logger.scalar_summary("Epoch_time", e_end_time - e_start_time, self.epoch) if self.epoch % self.args.save_every == 0 or self.epoch == self.args.num_epochs - 1: - eval_loss = DoTrainEval(self.rencoder, self.eval_data_layer, self.args.use_cuda) + eval_loss = DoTrainEval(self.rencoder, self.eval_data_layer, self.args.device) print('Epoch {} EVALUATION LOSS: {}'.format(self.epoch, eval_loss)) self.logger.scalar_summary("EVALUATION_RMSE", eval_loss, self.epoch) @@ -417,13 +421,13 @@ def train(self, niter=1) : # save to onnx dummy_input = Variable(torch.randn(self.params['batch_size'], self.data_layer.vector_dim).type(torch.float)) - torch.onnx.export(self.rencoder.float(), dummy_input.cuda() if self.args.use_cuda else dummy_input, + torch.onnx.export(self.rencoder.float(), dummy_input.to(device), self.model_checkpoint + ".onnx", verbose=True) print("ONNX model saved to {}!".format(self.model_checkpoint + ".onnx")) def TimedTrainingRun(self): if self.args.profile: - with profiler.profile(record_shapes=True, use_cuda=self.args.use_cuda) as prof: + with profiler.profile(record_shapes=True, use_cuda=self.args.use_cuda, use_xpu=self.args.use_xpu) as prof: with profiler.record_function("training_epoch"): self.train(self.args.num_epochs) else: diff --git a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/__init__.py b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/__init__.py index d584c3df62..5b1bc3e5b1 100644 --- a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/__init__.py +++ b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/__init__.py @@ -35,16 +35,18 @@ def __init__(self, test, device, batch_size=None, extra_args=[]): results_arg = f"--results_dir {results_dir}" data_root = os.path.join(DATA_PATH, "pytorch_CycleGAN_and_pix2pix_inputs") device_arg = "" + device_type_arg = f"--device_type {self.device}" if self.device == "cpu": device_arg = "--gpu_ids -1" - elif self.device == "cuda": + else: device_arg = "--gpu_ids 0" + if self.test == "train": train_args = f"--tb_device {self.device} --dataroot {data_root}/datasets/horse2zebra --name horse2zebra --model cycle_gan --display_id 0 --n_epochs 3 " + \ - f"--n_epochs_decay 3 {device_arg} {checkpoints_arg}" + f"--n_epochs_decay 3 {device_type_arg} {device_arg} {checkpoints_arg}" self.training_loop = prepare_training_loop(train_args.split(' ')) args = f"--dataroot {data_root}/datasets/horse2zebra/testA --name horse2zebra_pretrained --model test " + \ - f"--no_dropout {device_arg} {checkpoints_arg} {results_arg}" + f"--no_dropout {device_type_arg} {device_arg} {checkpoints_arg} {results_arg}" self.model, self.input = get_model(args, self.device) def get_module(self): diff --git a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/base_model.py b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/base_model.py index 0e98c7f30f..e752acf1ea 100644 --- a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/base_model.py +++ b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/base_model.py @@ -31,8 +31,9 @@ def __init__(self, opt): """ self.opt = opt self.gpu_ids = opt.gpu_ids + self.device_type = opt.device_type self.isTrain = opt.isTrain - self.device = torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu') # get device name: CPU or GPU + self.device = torch.device('{}:{}'.format(self.device_type, self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu') # get device name: CPU or GPU self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) # save all the checkpoints to save_dir if opt.preprocess != 'scale_width': # with [scale_width], input images might have different sizes, which hurts the performance of cudnn.benchmark. torch.backends.cudnn.benchmark = True diff --git a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/cycle_gan_model.py b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/cycle_gan_model.py index 9628788b31..0ea9b4dd60 100644 --- a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/cycle_gan_model.py +++ b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/cycle_gan_model.py @@ -71,15 +71,15 @@ def __init__(self, opt): # The naming is different from those used in the paper. # Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X) self.netG_A = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, opt.norm, - not opt.no_dropout, opt.init_type, opt.init_gain, self.gpu_ids) + not opt.no_dropout, opt.init_type, opt.init_gain, self.gpu_ids, self.device_type) self.netG_B = networks.define_G(opt.output_nc, opt.input_nc, opt.ngf, opt.netG, opt.norm, - not opt.no_dropout, opt.init_type, opt.init_gain, self.gpu_ids) + not opt.no_dropout, opt.init_type, opt.init_gain, self.gpu_ids, self.device_type) if self.isTrain: # define discriminators self.netD_A = networks.define_D(opt.output_nc, opt.ndf, opt.netD, - opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.gpu_ids) + opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.gpu_ids, self.device_type) self.netD_B = networks.define_D(opt.input_nc, opt.ndf, opt.netD, - opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.gpu_ids) + opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.gpu_ids, self.device_type) if self.isTrain: if opt.lambda_identity > 0.0: # only works when input and output images have the same number of channels diff --git a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/networks.py b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/networks.py index ea69828612..7a3eda60ac 100644 --- a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/networks.py +++ b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/networks.py @@ -97,25 +97,28 @@ def init_func(m): # define the initialization function net.apply(init_func) # apply the initialization function -def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[]): +def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[], device='cuda'): """Initialize a network: 1. register CPU/GPU device (with multi-GPU support); 2. initialize the network weights Parameters: net (network) -- the network to be initialized init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal gain (float) -- scaling factor for normal, xavier and orthogonal. gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + device (str) -- device type: cpu/cuda/xpu Return an initialized network. """ if len(gpu_ids) > 0: - assert(torch.cuda.is_available()) - net.to(gpu_ids[0]) + assert(device != 'cpu') + assert(hasattr(torch, device)) + assert(getattr(torch, device).is_available()) + net.to('{}:{}'.format(device, gpu_ids[0])) net = torch.nn.DataParallel(net, gpu_ids) # multi-GPUs init_weights(net, init_type, init_gain=init_gain) return net -def define_G(input_nc, output_nc, ngf, netG, norm='batch', use_dropout=False, init_type='normal', init_gain=0.02, gpu_ids=[]): +def define_G(input_nc, output_nc, ngf, netG, norm='batch', use_dropout=False, init_type='normal', init_gain=0.02, gpu_ids=[], device='cuda'): """Create a generator Parameters: @@ -128,6 +131,7 @@ def define_G(input_nc, output_nc, ngf, netG, norm='batch', use_dropout=False, in init_type (str) -- the name of our initialization method. init_gain (float) -- scaling factor for normal, xavier and orthogonal. gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + device (str) -- device type: cpu/cuda/xpu Returns a generator @@ -155,21 +159,22 @@ def define_G(input_nc, output_nc, ngf, netG, norm='batch', use_dropout=False, in net = UnetGenerator(input_nc, output_nc, 8, ngf, norm_layer=norm_layer, use_dropout=use_dropout) else: raise NotImplementedError('Generator model name [%s] is not recognized' % netG) - return init_net(net, init_type, init_gain, gpu_ids) + return init_net(net, init_type, init_gain, gpu_ids, device) -def define_D(input_nc, ndf, netD, n_layers_D=3, norm='batch', init_type='normal', init_gain=0.02, gpu_ids=[]): +def define_D(input_nc, ndf, netD, n_layers_D=3, norm='batch', init_type='normal', init_gain=0.02, gpu_ids=[], device='cuda'): """Create a discriminator Parameters: - input_nc (int) -- the number of channels in input images - ndf (int) -- the number of filters in the first conv layer - netD (str) -- the architecture's name: basic | n_layers | pixel - n_layers_D (int) -- the number of conv layers in the discriminator; effective when netD=='n_layers' - norm (str) -- the type of normalization layers used in the network. - init_type (str) -- the name of the initialization method. - init_gain (float) -- scaling factor for normal, xavier and orthogonal. - gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the first conv layer + netD (str) -- the architecture's name: basic | n_layers | pixel + n_layers_D (int) -- the number of conv layers in the discriminator; effective when netD=='n_layers' + norm (str) -- the type of normalization layers used in the network. + init_type (str) -- the name of the initialization method. + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + device (str) -- device type: cpu/cuda/xpu Returns a discriminator @@ -199,7 +204,7 @@ def define_D(input_nc, ndf, netD, n_layers_D=3, norm='batch', init_type='normal' net = PixelDiscriminator(input_nc, ndf, norm_layer=norm_layer) else: raise NotImplementedError('Discriminator model name [%s] is not recognized' % netD) - return init_net(net, init_type, init_gain, gpu_ids) + return init_net(net, init_type, init_gain, gpu_ids, device) ############################################################################## @@ -281,7 +286,7 @@ def cal_gradient_penalty(netD, real_data, fake_data, device, type='mixed', const netD (network) -- discriminator network real_data (tensor array) -- real images fake_data (tensor array) -- generated images from the generator - device (str) -- GPU / CPU: from torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu') + device (str) -- cpu / cuda / xpu type (str) -- if we mix real and fake data or not [real | fake | mixed]. constant (float) -- the constant used in formula ( ||gradient||_2 - constant)^2 lambda_gp (float) -- weight for this loss diff --git a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/pix2pix_model.py b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/pix2pix_model.py index 939eb887ee..38fedadc9c 100644 --- a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/pix2pix_model.py +++ b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/pix2pix_model.py @@ -54,11 +54,11 @@ def __init__(self, opt): self.model_names = ['G'] # define networks (both generator and discriminator) self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, opt.norm, - not opt.no_dropout, opt.init_type, opt.init_gain, self.gpu_ids) + not opt.no_dropout, opt.init_type, opt.init_gain, self.gpu_ids, self.device_type) if self.isTrain: # define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc self.netD = networks.define_D(opt.input_nc + opt.output_nc, opt.ndf, opt.netD, - opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.gpu_ids) + opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.gpu_ids, self.device_type) if self.isTrain: # define loss functions diff --git a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/template_model.py b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/template_model.py index 68cdaf6a9a..cc827d9326 100644 --- a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/template_model.py +++ b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/template_model.py @@ -57,7 +57,7 @@ def __init__(self, opt): # you can use opt.isTrain to specify different behaviors for training and test. For example, some networks will not be used during test, and you don't need to load them. self.model_names = ['G'] # define networks; you can use opt.isTrain to specify different behaviors for training and test. - self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, gpu_ids=self.gpu_ids) + self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, gpu_ids=self.gpu_ids, device=self.device_type) if self.isTrain: # only defined during training time # define your loss functions. You can use losses provided by torch.nn such as torch.nn.L1Loss. # We also provide a GANLoss class "networks.GANLoss". self.criterionGAN = networks.GANLoss().to(self.device) diff --git a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/test_model.py b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/test_model.py index fe15f40176..5510b982c2 100644 --- a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/test_model.py +++ b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/test_model.py @@ -43,7 +43,7 @@ def __init__(self, opt): # specify the models you want to save to the disk. The training/test scripts will call and self.model_names = ['G' + opt.model_suffix] # only generator is needed. self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, - opt.norm, not opt.no_dropout, opt.init_type, opt.init_gain, self.gpu_ids) + opt.norm, not opt.no_dropout, opt.init_type, opt.init_gain, self.gpu_ids, self.device_type) # assigns the model to self.netG_[suffix] so that it can be loaded # please see diff --git a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/options/base_options.py b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/options/base_options.py index 22a3ee95d1..8833c3995a 100644 --- a/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/options/base_options.py +++ b/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/options/base_options.py @@ -23,6 +23,7 @@ def initialize(self, parser): parser.add_argument('--dataroot', required=True, help='path to images (should have subfolders trainA, trainB, valA, valB, etc)') parser.add_argument('--name', type=str, default='experiment_name', help='name of the experiment. It decides where to store samples and models') parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') + parser.add_argument('--device_type', type=str, default='cpu', help='device type: e.g. cpu, cuda, xpu') parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here') # model parameters parser.add_argument('--model', type=str, default='cycle_gan', help='chooses which model to use. [cycle_gan | pix2pix | test | colorization]') @@ -128,7 +129,9 @@ def parse(self, args=None): if id >= 0: opt.gpu_ids.append(id) if len(opt.gpu_ids) > 0: - torch.cuda.set_device(opt.gpu_ids[0]) + assert(hasattr(torch, opt.device_type)) + assert(hasattr(getattr(torch, opt.device_type), 'set_device')) + getattr(torch, opt.device_type).set_device(opt.gpu_ids[0]) self.opt = opt return self.opt diff --git a/torchbenchmark/models/tacotron2/__init__.py b/torchbenchmark/models/tacotron2/__init__.py index cf2088ca2e..a2101e15c2 100644 --- a/torchbenchmark/models/tacotron2/__init__.py +++ b/torchbenchmark/models/tacotron2/__init__.py @@ -28,7 +28,7 @@ def __init__(self, test, device, batch_size=None, extra_args=[]): raise NotImplementedError("Tacotron2 doesn't support CPU because load_model assumes CUDA.") self.hparams = self.create_hparams(batch_size=self.batch_size) - self.model = load_model(self.hparams).to(device=device) + self.model = load_model(self.hparams, device) self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hparams.learning_rate, weight_decay=self.hparams.weight_decay) diff --git a/torchbenchmark/models/tacotron2/train_tacotron2.py b/torchbenchmark/models/tacotron2/train_tacotron2.py index 373b082ce7..3a37c67cd4 100644 --- a/torchbenchmark/models/tacotron2/train_tacotron2.py +++ b/torchbenchmark/models/tacotron2/train_tacotron2.py @@ -70,8 +70,8 @@ def prepare_directories_and_logger(output_directory, log_directory, rank): return logger -def load_model(hparams): - model = Tacotron2(hparams).cuda() +def load_model(hparams, device='cuda'): + model = Tacotron2(hparams).to(device) if hparams.fp16_run: model.decoder.attention_layer.score_mask_value = finfo('float16').min diff --git a/torchbenchmark/models/yolov3/yolo_utils/torch_utils.py b/torchbenchmark/models/yolov3/yolo_utils/torch_utils.py index 5e09407fc8..24ba8b0aa0 100644 --- a/torchbenchmark/models/yolov3/yolo_utils/torch_utils.py +++ b/torchbenchmark/models/yolov3/yolo_utils/torch_utils.py @@ -24,16 +24,21 @@ def init_seeds(seed=0): def select_device(device='', apex=False, batch_size=None): - # device = 'cpu' or '0' or '0,1,2,3' + # device = 'cpu', 'xpu' or '0' or '0,1,2,3' cpu_request = device.lower() == 'cpu' - if device and not cpu_request: # if device requested other than 'cpu' + xpu_request = device.lower() == 'xpu' + if device and not cpu_request and not xpu_request: # if device requested other than 'cpu'and 'xpu' os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity - cuda = False if cpu_request else torch.cuda.is_available() + cuda = False if cpu_request or xpu_request else torch.cuda.is_available() if cuda: return torch.device(f"cuda:{torch.cuda.current_device()}") + if xpu_request: + print('Using XPU') + return torch.device(f"xpu:{torch.xpu.current_device()}") + print('Using CPU') return torch.device('cpu')