diff --git a/pytorch_optimizer/optimizer/prodigy.py b/pytorch_optimizer/optimizer/prodigy.py index ccef49c5..ba0bb7b0 100644 --- a/pytorch_optimizer/optimizer/prodigy.py +++ b/pytorch_optimizer/optimizer/prodigy.py @@ -111,7 +111,7 @@ def step(self, closure: CLOSURE = None) -> LOSS: if 'd_numerator' not in group: group['d_numerator'] = torch.tensor([0.0], device=device) elif group['d_numerator'].device != device: - group['d_numerator'] = group['d_numerator'].to(device) + group['d_numerator'] = group['d_numerator'].to(device) # pragma: no cover d_numerator = group['d_numerator'] d_numerator.mul_(beta3) diff --git a/tests/test_optimizers.py b/tests/test_optimizers.py index aaf9024e..b862661f 100644 --- a/tests/test_optimizers.py +++ b/tests/test_optimizers.py @@ -76,8 +76,7 @@ def _closure() -> float: for _ in range(iterations): optimizer.zero_grad() - y_pred = model(x_data) - loss = loss_fn(y_pred, y_data) + loss = loss_fn(model(x_data), y_data) if init_loss == np.inf: init_loss = loss