You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Try to compile with our model with the following setup:
(lightning_py310) fehlneas@hitssv565:/local_data/fehlneas/SPACE/Spherinator$ vim main.py
""" Uses the command line client to start the training.
"""
import numpy as np
import torch
import time
from lightning.pytorch.cli import LightningCLI
# Raise a FloatingPointError for any kind of floating-point errors
if __debug__:
np.seterr(all='raise')
print('debug on')
else:
print('debug off')
# Set the default precision of torch operations to float32
torch.set_float32_matmul_precision('high')
class TorchCompileCLI(LightningCLI):
def fit(self, model, **kwargs):
compiled_model = torch.compile(model)
self.trainer.fit(compiled_model, **kwargs)
if __name__ == "__main__":
start = time.time()
cli = TorchCompileCLI(save_config_kwargs={"overwrite": True})
#cli = LightningCLI(save_config_kwargs={"overwrite": True})
end = time.time()
elapsed = end - start
print(f"Time elapsed {elapsed/60:.2f} min")
(lightning_py310) fehlneas@hitssv565:/local_data/fehlneas/SPACE/Spherinator$ python main.py fit -c experiments/illustris-power.yaml > everything.log 2>&1
Global seed set to 42
/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/cli.py:625: UserWarning: RotationalVariationalAutoencoderPower.configure_optimizers will be overridden by TorchCompileCLI.configure_optimizers.
_warn(
/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/fabric/connector.py:554: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
rank_zero_warn(
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:67: UserWarning: Starting from v1.9.0, tensorboardX has been removed as a dependency of the lightning.pytorch package, due to potential conflicts with other packages in the ML ecosystem. For this reason, logger=True will use CSVLogger as the default logger, unless the tensorboard or tensorboardX packages are found. Please pip install lightning[extra] or one of them to enable TensorBoard support by default
warning_cache.warn(
debug on
/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/trainer/configuration_validator.py:69: UserWarning: You passed in a val_dataloader but have no validation_step. Skipping val loop.
rank_zero_warn("You passed in a val_dataloader but have no validation_step. Skipping val loop.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Try to compile the model in order to get faster training times (https://sebastianraschka.com/blog/2023/pytorch-faster.html)
Try to compile with our model with the following setup:
(lightning_py310) fehlneas@hitssv565:/local_data/fehlneas/SPACE/Spherinator$ vim main.py
""" Uses the command line client to start the training.
"""
(lightning_py310) fehlneas@hitssv565:/local_data/fehlneas/SPACE/Spherinator$ python main.py fit -c experiments/illustris-power.yaml > everything.log 2>&1
Global seed set to 42
/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/cli.py:625: UserWarning:
RotationalVariationalAutoencoderPower.configure_optimizers
will be overridden byTorchCompileCLI.configure_optimizers
._warn(
/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/fabric/connector.py:554: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
rank_zero_warn(
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:67: UserWarning: Starting from v1.9.0,
tensorboardX
has been removed as a dependency of thelightning.pytorch
package, due to potential conflicts with other packages in the ML ecosystem. For this reason,logger=True
will useCSVLogger
as the default logger, unless thetensorboard
ortensorboardX
packages are found. Pleasepip install lightning[extra]
or one of them to enable TensorBoard support by defaultwarning_cache.warn(
debug on
/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/trainer/configuration_validator.py:69: UserWarning: You passed in a
val_dataloader
but have novalidation_step
. Skipping val loop.rank_zero_warn("You passed in a
val_dataloader
but have novalidation_step
. Skipping val loop.")LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
| Name | Type | Params | In sizes | Out sizes
0 | conv0 | Conv2d | 448 | [1, 3, 128, 128] | [1, 16, 128, 128]
1 | pool0 | MaxPool2d | 0 | [1, 16, 128, 128] | [1, 16, 64, 64]
2 | conv1 | Conv2d | 4.6 K | [1, 16, 64, 64] | [1, 32, 64, 64]
3 | pool1 | MaxPool2d | 0 | [1, 32, 64, 64] | [1, 32, 32, 32]
4 | conv2 | Conv2d | 18.5 K | [1, 32, 32, 32] | [1, 64, 32, 32]
5 | pool2 | MaxPool2d | 0 | [1, 64, 32, 32] | [1, 64, 16, 16]
6 | conv3 | Conv2d | 73.9 K | [1, 64, 16, 16] | [1, 128, 16, 16]
7 | pool3 | MaxPool2d | 0 | [1, 128, 16, 16] | [1, 128, 8, 8]
8 | conv4 | Conv2d | 295 K | [1, 128, 8, 8] | [1, 256, 8, 8]
9 | pool4 | MaxPool2d | 0 | [1, 256, 8, 8] | [1, 256, 4, 4]
10 | fc1 | Linear | 1.0 M | [1, 4096] | [1, 256]
11 | fc_mean | Linear | 771 | [1, 256] | [1, 3]
12 | fc_var | Linear | 257 | [1, 256] | [1, 1]
13 | fc2 | Linear | 1.0 K | [1, 3] | [1, 256]
14 | fc3 | Linear | 1.1 M | [1, 256] | [1, 4096]
15 | deconv1 | ConvTranspose2d | 524 K | [1, 256, 4, 4] | [1, 128, 8, 8]
16 | deconv2 | ConvTranspose2d | 262 K | [1, 128, 8, 8] | [1, 128, 16, 16]
17 | deconv3 | ConvTranspose2d | 131 K | [1, 128, 16, 16] | [1, 64, 32, 32]
18 | deconv4 | ConvTranspose2d | 32.8 K | [1, 64, 32, 32] | [1, 32, 64, 64]
19 | deconv5 | ConvTranspose2d | 4.6 K | [1, 32, 64, 64] | [1, 16, 127, 127]
20 | deconv6 | ConvTranspose2d | 195 | [1, 16, 127, 127] | [1, 3, 128, 128]
3.5 M Trainable params
0 Non-trainable params
3.5 M Total params
13.806 Total estimated model params size (MB)
^MTraining: 0it [00:00, ?it/s]^MTraining: 0%| | 0/79 [00:00<?, ?it/s]^MEpoch 0: 0%| | 0/79 [00:00<?, ?it/s] [2023-11-06 20:56:02,370] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf1 with estimated runtime 0.0
[2023-11-06 20:56:02,418] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf4 with estimated runtime 0.0
[2023-11-06 20:56:02,553] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf10 with estimated runtime 0.0
[2023-11-06 20:56:02,683] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf16 with estimated runtime 0.0
[2023-11-06 20:56:02,799] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf22 with estimated runtime 0.0
[2023-11-06 20:56:02,913] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf28 with estimated runtime 0.0
[2023-11-06 20:56:02,997] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf30 with estimated runtime 0.0
[2023-11-06 20:56:03,101] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf33 with estimated runtime 0.0
[2023-11-06 20:56:03,182] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf37 with estimated runtime 0.0
[2023-11-06 20:56:03,251] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf38 with estimated runtime 0.0
[2023-11-06 20:56:03,340] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf43 with estimated runtime 0.0
[2023-11-06 20:56:08,033] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf4 with estimated runtime 0.0
[2023-11-06 20:56:08,053] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf5 with estimated runtime 0.0
[2023-11-06 20:56:08,073] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf7 with estimated runtime 0.0
[2023-11-06 20:56:08,099] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf11 with estimated runtime 0.0
[2023-11-06 20:56:08,116] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf14 with estimated runtime 0.0
[2023-11-06 20:56:08,125] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf16 with estimated runtime 0.0
[2023-11-06 20:56:08,419] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf23 with estimated runtime 0.0
[2023-11-06 20:56:08,676] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf24 with estimated runtime 0.0
[2023-11-06 20:56:08,776] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf31 with estimated runtime 0.0
[2023-11-06 20:56:09,038] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf32 with estimated runtime 0.0
[2023-11-06 20:56:09,137] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf39 with estimated runtime 0.0
[2023-11-06 20:56:09,378] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf40 with estimated runtime 0.0
[2023-11-06 20:56:09,528] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf47 with estimated runtime 0.0
[2023-11-06 20:56:09,800] [0/1] torch._inductor.scheduler: [ERROR] Generating code for node buf48 with estimated runtime 0.0
[2023-11-06 20:56:11,781] [21/1] torch._inductor.scheduler: [ERROR] Generating code for node buf0 with estimated runtime 0.0
[2023-11-06 20:56:11,818] [21/1] torch._inductor.scheduler: [ERROR] Generating code for node buf1 with estimated runtime 0.0
[2023-11-06 20:56:12,134] [23/1] torch._inductor.scheduler: [ERROR] Generating code for node buf0 with estimated runtime 0.0
[2023-11-06 20:56:12,297] [25/1] torch._dynamo.variables.higher_order_ops: [WARNING] speculate_subgraph: while introspecting the user-defined autograd.Function, we were unable to trace function
trampoline_autograd_bwd
into a single graph. This means that Dynamo was unable to prove safety for this API and will fall back to eager-mode PyTorch, which could lead to a slowdown.[2023-11-06 20:56:12,297] [25/1] torch._dynamo.variables.higher_order_ops: [ERROR] 'inline in skipfiles: _Dirichlet.backward | wrapper /local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/torch/autograd/function.py, skipped according skipfiles.SKIP_DIRS'
[2023-11-06 20:56:12,359] [26/1] torch._dynamo.variables.higher_order_ops: [WARNING] speculate_subgraph: while introspecting the user-defined autograd.Function, we were unable to trace function
trampoline_autograd_bwd
into a single graph. This means that Dynamo was unable to prove safety for this API and will fall back to eager-mode PyTorch, which could lead to a slowdown.[2023-11-06 20:56:12,359] [26/1] torch._dynamo.variables.higher_order_ops: [ERROR] 'inline in skipfiles: _Dirichlet.backward | wrapper /local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/torch/autograd/function.py, skipped according skipfiles.SKIP_DIRS'
[2023-11-06 20:56:12,952] [29/1] torch._inductor.scheduler: [ERROR] Generating code for node buf0 with estimated runtime 0.0
[2023-11-06 20:56:12,982] [29/1] torch._inductor.scheduler: [ERROR] Generating code for node buf1 with estimated runtime 0.0
[2023-11-06 20:56:13,728] [30/1] torch._inductor.scheduler: [ERROR] Generating code for node buf0 with estimated runtime 0.0
[2023-11-06 20:56:13,768] [30/1] torch._inductor.scheduler: [ERROR] Generating code for node buf1 with estimated runtime 0.0
[2023-11-06 20:56:14,267] [30/1] torch._inductor.scheduler: [ERROR] Generating code for node buf0 with estimated runtime 0.0
[2023-11-06 20:56:14,280] [30/1] torch._inductor.scheduler: [ERROR] Generating code for node buf1 with estimated runtime 0.0
[2023-11-06 20:56:14,926] [31/1] torch._inductor.scheduler: [ERROR] Generating code for node buf0 with estimated runtime 0.0
[2023-11-06 20:56:14,939] [31/1] torch._inductor.scheduler: [ERROR] Generating code for node buf1 with estimated runtime 0.0
[2023-11-06 20:56:14,961] [31/1] torch._inductor.scheduler: [ERROR] Generating code for node buf2 with estimated runtime 0.0
[2023-11-06 20:56:17,578] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf0 with estimated runtime 0.0
[2023-11-06 20:56:17,605] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf3 with estimated runtime 0.0
[2023-11-06 20:56:17,649] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf7 with estimated runtime 0.0
[2023-11-06 20:56:17,682] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf11 with estimated runtime 0.0
[2023-11-06 20:56:17,725] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf15 with estimated runtime 0.0
[2023-11-06 20:56:17,765] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf19 with estimated runtime 0.0
[2023-11-06 20:56:17,805] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf23 with estimated runtime 0.0
[2023-11-06 20:56:17,855] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf27 with estimated runtime 0.0
[2023-11-06 20:56:17,917] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf31 with estimated runtime 0.0
[2023-11-06 20:56:20,334] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf0 with estimated runtime 0.0
[2023-11-06 20:56:20,464] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf7 with estimated runtime 0.0
[2023-11-06 20:56:20,478] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf8 with estimated runtime 0.0
[2023-11-06 20:56:20,680] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf15 with estimated runtime 0.0
[2023-11-06 20:56:20,695] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf16 with estimated runtime 0.0
[2023-11-06 20:56:20,847] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf23 with estimated runtime 0.0
[2023-11-06 20:56:20,862] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf24 with estimated runtime 0.0
[2023-11-06 20:56:20,954] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf31 with estimated runtime 0.0
[2023-11-06 20:56:20,968] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf32 with estimated runtime 0.0
[2023-11-06 20:56:21,061] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf39 with estimated runtime 0.0
[2023-11-06 20:56:21,099] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf46 with estimated runtime 0.0
[2023-11-06 20:56:21,123] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf49 with estimated runtime 0.0
[2023-11-06 20:56:21,137] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf51 with estimated runtime 0.0
[2023-11-06 20:56:21,162] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf54 with estimated runtime 0.0
[2023-11-06 20:56:21,171] [32/1] torch._inductor.scheduler: [ERROR] Generating code for node buf55 with estimated runtime 0.0
[2023-11-06 20:56:22,789] [49/0] torch._inductor.scheduler: [ERROR] Generating code for node buf0 with estimated runtime 0.0
[2023-11-06 20:56:22,815] [49/0] torch._inductor.scheduler: [ERROR] Generating code for node buf5 with estimated runtime 0.0
[2023-11-06 20:56:23,132] [49/0] torch._inductor.scheduler: [ERROR] Generating code for node buf0 with estimated runtime 0.0
[2023-11-06 20:56:23,140] [49/0] torch._inductor.scheduler: [ERROR] Generating code for node buf5 with estimated runtime 0.0
[2023-11-06 20:56:23,150] [49/0] torch._inductor.scheduler: [ERROR] Generating code for node buf10 with estimated runtime 0.0
[2023-11-06 20:56:42,146] torch._dynamo.convert_frame: [WARNING] torch._dynamo hit config.cache_size_limit (8)
[2023-11-06 20:56:42,146] torch._dynamo.convert_frame: [WARNING] function: 'rotate' (/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/torchvision/transforms/functional.py:1065)
[2023-11-06 20:56:42,146] torch._dynamo.convert_frame: [WARNING] to diagnose recompilation issues, set env variable TORCHDYNAMO_REPORT_GUARD_FAILURES=1 and also see https://pytorch.org/docs/master/compile/troubleshooting.html.
[2023-11-06 20:56:50,383] torch._dynamo.convert_frame: [WARNING] torch._dynamo hit config.cache_size_limit (8)
[2023-11-06 20:56:50,383] torch._dynamo.convert_frame: [WARNING] function: 'rotate' (/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/torchvision/transforms/_functional_tensor.py:652)
[2023-11-06 20:56:50,383] torch._dynamo.convert_frame: [WARNING] to diagnose recompilation issues, set env variable TORCHDYNAMO_REPORT_GUARD_FAILURES=1 and also see https://pytorch.org/docs/master/compile/troubleshooting.html.
[2023-11-06 20:56:52,825] [94/0] torch._inductor.utils: [WARNING] DeviceCopy in input program
Traceback (most recent call last):
File "/local_data/fehlneas/SPACE/Spherinator/main.py", line 27, in
cli = TorchCompileCLI(save_config_kwargs={"overwrite": True})
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/cli.py", line 359, in init
self._run_subcommand(self.subcommand)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/cli.py", line 650, in _run_subcommand
fn(**fn_kwargs)
File "/local_data/fehlneas/SPACE/Spherinator/main.py", line 22, in fit
self.trainer.fit(compiled_model, **kwargs)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 532, in fit
call._call_and_handle_interrupt(
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 43, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 571, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 980, in _run
results = self._run_stage()
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 1023, in _run_stage
self.fit_loop.run()
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py", line 202, in run
self.advance()
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py", line 355, in advance
self.epoch_loop.run(self._data_fetcher)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/loops/training_epoch_loop.py", line 133, in run
self.advance(data_fetcher)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/loops/training_epoch_loop.py", line 219, in advance
batch_output = self.automatic_optimization.run(trainer.optimizers[0], kwargs)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 188, in run
self._optimizer_step(kwargs.get("batch_idx", 0), closure)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 266, in _optimizer_step
call._call_lightning_module_hook(
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 146, in _call_lightning_module_hook
output = fn(*args, **kwargs)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/core/module.py", line 1276, in optimizer_step
optimizer.step(closure=optimizer_closure)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/core/optimizer.py", line 161, in step
step_output = self._strategy.optimizer_step(self._optimizer, closure, **kwargs)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/strategies/strategy.py", line 231, in optimizer_step
return self.precision_plugin.optimizer_step(optimizer, model=model, closure=closure, **kwargs)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/plugins/precision/amp.py", line 76, in optimizer_step
closure_result = closure()
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 142, in call
self._result = self.closure(*args, **kwargs)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 137, in closure
self._backward_fn(step_output.closure_loss)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 237, in backward_fn
call._call_strategy_hook(self.trainer, "backward", loss, optimizer)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 294, in _call_strategy_hook
output = fn(*args, **kwargs)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/strategies/strategy.py", line 203, in backward
closure_loss = self.precision_plugin.pre_backward(closure_loss, self.lightning_module)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/lightning/pytorch/plugins/precision/amp.py", line 61, in pre_backward
tensor = self.scaler.scale(tensor)
File "/local_data/fehlneas/miniforge3/envs/lightning_py310/lib/python3.10/site-packages/torch/cuda/amp/grad_scaler.py", line 201, in scale
assert outputs.is_cuda or outputs.device.type == "xla"
AssertionError
^MEpoch 0: 0%| | 0/79 [01:12<?, ?it/s]
The text was updated successfully, but these errors were encountered: