From affeaf69c6a026e20b57a0f8672f45afbe76e411 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 8 Jan 2021 13:08:01 +0100 Subject: [PATCH 1/4] resolve bug --- .../logger_connector/logger_connector.py | 32 +++++++++---------- .../test_train_loop_logging_1_0.py | 6 +++- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 54bf2f9a90cea..4741222383843 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from copy import deepcopy import os +from copy import deepcopy from pprint import pprint from typing import Iterable, Union @@ -158,7 +158,7 @@ def cache_training_step_metrics(self, opt_closure_result): self.logged_metrics.update(logged_metrics_tmp) self.cached_results.legacy_batch_log_metrics.update(logged_metrics_tmp) - def log_metrics(self, metrics, grad_norm_dic, step=None, log_train_step_metrics=False): + def log_metrics(self, metrics, grad_norm_dic, step=None): """Logs the metric dict passed in. If `step` parameter is None and `step` key is presented is metrics, uses metrics["step"] as a step @@ -186,11 +186,8 @@ def log_metrics(self, metrics, grad_norm_dic, step=None, log_train_step_metrics= elif step is None: # added metrics by Lightning for convenience - if log_train_step_metrics: - step = self.trainer.total_batch_idx - else: - scalar_metrics['epoch'] = self.trainer.current_epoch - step = self.trainer.global_step + scalar_metrics['epoch'] = self.trainer.current_epoch + step = self.trainer.global_step # log actual metrics if self.trainer.logger is not None: @@ -593,13 +590,14 @@ def __gather_result_across_time_and_optimizers(self, epoch_output): return gathered_epoch_outputs def log_train_step_metrics(self, batch_output): - _, batch_log_metrics = self.cached_results.update_logger_connector() - # when metrics should be logged - if self.should_update_logs or self.trainer.fast_dev_run is True: - # logs user requested information to logger - grad_norm_dic = batch_output.grad_norm_dic - if grad_norm_dic is None: - grad_norm_dic = {} - if len(batch_log_metrics) > 0 or len(grad_norm_dic) > 0: - self.log_metrics(batch_log_metrics, grad_norm_dic, log_train_step_metrics=True) - self.callback_metrics.update(batch_log_metrics) + if not self.trainer.train_loop.should_accumulate(): + _, batch_log_metrics = self.cached_results.update_logger_connector() + # when metrics should be logged + if self.should_update_logs or self.trainer.fast_dev_run is True: + # logs user requested information to logger + grad_norm_dic = batch_output.grad_norm_dic + if grad_norm_dic is None: + grad_norm_dic = {} + if len(batch_log_metrics) > 0 or len(grad_norm_dic) > 0: + self.log_metrics(batch_log_metrics, grad_norm_dic) + self.callback_metrics.update(batch_log_metrics) diff --git a/tests/trainer/logging_tests/test_train_loop_logging_1_0.py b/tests/trainer/logging_tests/test_train_loop_logging_1_0.py index 51b9c2ac69496..617cd6fa3cbd1 100644 --- a/tests/trainer/logging_tests/test_train_loop_logging_1_0.py +++ b/tests/trainer/logging_tests/test_train_loop_logging_1_0.py @@ -24,12 +24,16 @@ import numpy as np import pytest import torch -from torch.utils.data import Dataset +from torch.nn import functional as F +from torch.utils.data import DataLoader, Dataset, random_split +from torchvision import transforms +from torchvision.datasets.mnist import MNIST import pytorch_lightning as pl from pytorch_lightning import callbacks, Trainer from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint from pytorch_lightning.core.lightning import LightningModule +from pytorch_lightning.loggers import WandbLogger from tests.base.boring_model import BoringModel, RandomDictDataset, RandomDictStringDataset from tests.base.deterministic_model import DeterministicModel From 2bc7f4555b7eded08a87467c0eea08f06ab7b20c Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 8 Jan 2021 14:02:50 +0100 Subject: [PATCH 2/4] resolve tests --- tests/loggers/test_all.py | 4 ++-- tests/loggers/test_tensorboard.py | 20 ++++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py index 795b1a91e688e..4bf15ff8d99a1 100644 --- a/tests/loggers/test_all.py +++ b/tests/loggers/test_all.py @@ -126,7 +126,7 @@ def log_metrics(self, metrics, step): if logger_class == TensorBoardLogger: expected = [ (0, ['hp_metric']), - (0, ['train_some_val']), + (0, ['epoch', 'train_some_val']), (0, ['early_stop_on', 'epoch', 'val_acc']), (0, ['hp_metric']), (1, ['epoch', 'test_acc', 'test_loss']) @@ -134,7 +134,7 @@ def log_metrics(self, metrics, step): assert log_metric_names == expected else: expected = [ - (0, ['train_some_val']), + (0, ['epoch', 'train_some_val']), (0, ['early_stop_on', 'epoch', 'val_acc']), (1, ['epoch', 'test_acc', 'test_loss']) ] diff --git a/tests/loggers/test_tensorboard.py b/tests/loggers/test_tensorboard.py index fa5c711357ba3..b6c75cfd248a8 100644 --- a/tests/loggers/test_tensorboard.py +++ b/tests/loggers/test_tensorboard.py @@ -213,19 +213,23 @@ def test_tensorboard_with_accummulated_gradients(mock_log_metrics, expected, tmp Tests to ensure that tensorboard log properly when accumulated_gradients > 1 """ class TestModel(BoringModel): - _count = 0 - _indexes = [] + + def __init__(self): + super().__init__() + self._count = 0 + self._indexes = [] def training_step(self, batch, batch_idx): + print(self._indexes) output = self.layer(batch) loss = self.loss(batch, output) self.log('count', self._count, on_step=True, on_epoch=True) self.log('loss', loss, on_step=True, on_epoch=True) - if self.trainer.logger_connector.should_update_logs: - self._indexes.append(self._count) + if not self.trainer.train_loop.should_accumulate(): + if self.trainer.logger_connector.should_update_logs or self.trainer.fast_dev_run is True: + self._indexes.append(self.trainer.global_step) - self._count += 1 return loss def validation_step(self, batch, batch_idx): @@ -245,14 +249,13 @@ def configure_optimizers(self): logger_0 = TensorBoardLogger(tmpdir, default_hp_metric=False) - accumulate_grad_batches = 2 trainer = Trainer( default_root_dir=tmpdir, limit_train_batches=12, - limit_val_batches=12, + limit_val_batches=0, max_epochs=3, gpus=0, - accumulate_grad_batches=accumulate_grad_batches, + accumulate_grad_batches=2, logger=[logger_0], log_every_n_steps=3, ) @@ -260,5 +263,6 @@ def configure_optimizers(self): mock_count_epochs = [m[2]["step"] for m in mock_log_metrics.mock_calls if "count_epoch" in m[2]["metrics"]] assert mock_count_epochs == expected + mock_count_steps = [m[2]["step"] for m in mock_log_metrics.mock_calls if "count_step" in m[2]["metrics"]] assert model._indexes == mock_count_steps From b1521b04f0e7c2be7a96308d1501fc04cfbf2886 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 8 Jan 2021 14:44:32 +0100 Subject: [PATCH 3/4] update --- .../logger_connector/logger_connector.py | 23 ++++++++++--------- tests/loggers/test_tensorboard.py | 1 - 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 4741222383843..6cf020aa65fa1 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -590,14 +590,15 @@ def __gather_result_across_time_and_optimizers(self, epoch_output): return gathered_epoch_outputs def log_train_step_metrics(self, batch_output): - if not self.trainer.train_loop.should_accumulate(): - _, batch_log_metrics = self.cached_results.update_logger_connector() - # when metrics should be logged - if self.should_update_logs or self.trainer.fast_dev_run is True: - # logs user requested information to logger - grad_norm_dic = batch_output.grad_norm_dic - if grad_norm_dic is None: - grad_norm_dic = {} - if len(batch_log_metrics) > 0 or len(grad_norm_dic) > 0: - self.log_metrics(batch_log_metrics, grad_norm_dic) - self.callback_metrics.update(batch_log_metrics) + if self.trainer.train_loop.should_accumulate() and self.trainer.train_loop.automatic_optimization: + return + _, batch_log_metrics = self.cached_results.update_logger_connector() + # when metrics should be logged + if self.should_update_logs or self.trainer.fast_dev_run is True: + # logs user requested information to logger + grad_norm_dic = batch_output.grad_norm_dic + if grad_norm_dic is None: + grad_norm_dic = {} + if len(batch_log_metrics) > 0 or len(grad_norm_dic) > 0: + self.log_metrics(batch_log_metrics, grad_norm_dic) + self.callback_metrics.update(batch_log_metrics) diff --git a/tests/loggers/test_tensorboard.py b/tests/loggers/test_tensorboard.py index b6c75cfd248a8..3ba2f6b640cc9 100644 --- a/tests/loggers/test_tensorboard.py +++ b/tests/loggers/test_tensorboard.py @@ -220,7 +220,6 @@ def __init__(self): self._indexes = [] def training_step(self, batch, batch_idx): - print(self._indexes) output = self.layer(batch) loss = self.loss(batch, output) self.log('count', self._count, on_step=True, on_epoch=True) From c0ac3bdb04f1ea949579909b29ed4416154b31a0 Mon Sep 17 00:00:00 2001 From: chaton Date: Fri, 8 Jan 2021 17:52:16 +0100 Subject: [PATCH 4/4] Update tests/loggers/test_tensorboard.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- tests/loggers/test_tensorboard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/loggers/test_tensorboard.py b/tests/loggers/test_tensorboard.py index 3ba2f6b640cc9..148ad550e74c7 100644 --- a/tests/loggers/test_tensorboard.py +++ b/tests/loggers/test_tensorboard.py @@ -226,7 +226,7 @@ def training_step(self, batch, batch_idx): self.log('loss', loss, on_step=True, on_epoch=True) if not self.trainer.train_loop.should_accumulate(): - if self.trainer.logger_connector.should_update_logs or self.trainer.fast_dev_run is True: + if self.trainer.logger_connector.should_update_logs: self._indexes.append(self.trainer.global_step) return loss