diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e09d665c3f..d79b55c808 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,7 +31,7 @@ repos: - pyvista>=0.34.2 - scikit-image>=0.22.0 - torch>=2.3 - - torchmetrics>=0.10 + - torchmetrics>=1.1.1 - torchvision>=0.18 exclude: (build|data|dist|logo|logs|output)/ - repo: https://github.com/pre-commit/mirrors-prettier diff --git a/pyproject.toml b/pyproject.toml index a4e125f5a3..5fc58c5740 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,8 +74,8 @@ dependencies = [ "timm>=0.4.12", # torch 1.13+ required by torchvision "torch>=1.13", - # torchmetrics 0.10+ required for binary/multiclass/multilabel classification metrics - "torchmetrics>=0.10", + # torchmetrics 1.1.1+ required for average argument to MeanAveragePrecision + "torchmetrics>=1.1.1", # torchvision 0.14+ required for torchvision.models.swin_v2_b "torchvision>=0.14", ] diff --git a/requirements/min-reqs.old b/requirements/min-reqs.old index a4a57e1096..f4c07ae21d 100644 --- a/requirements/min-reqs.old +++ b/requirements/min-reqs.old @@ -18,7 +18,7 @@ segmentation-models-pytorch==0.2.0 shapely==1.8.0 timm==0.4.12 torch==1.13.0 -torchmetrics==0.10.0 +torchmetrics==1.1.1 torchvision==0.14.0 # datasets diff --git a/torchgeo/trainers/segmentation.py b/torchgeo/trainers/segmentation.py index f8e519fa49..c56a06d109 100644 --- a/torchgeo/trainers/segmentation.py +++ b/torchgeo/trainers/segmentation.py @@ -12,7 +12,14 @@ from matplotlib.figure import Figure from torch import Tensor from torchmetrics import MetricCollection -from torchmetrics.classification import MulticlassAccuracy, MulticlassJaccardIndex +from torchmetrics.classification import ( + Accuracy, + FBetaScore, + JaccardIndex, + Precision, + Recall, +) +from torchmetrics.wrappers import ClasswiseWrapper from torchvision.models._api import WeightsEnum from ..datasets import RGBBandsMissingError, unbind_samples @@ -31,6 +38,7 @@ def __init__( weights: WeightsEnum | str | bool | None = None, in_channels: int = 3, num_classes: int = 1000, + labels: list[str] | None = None, num_filters: int = 3, loss: str = 'ce', class_weights: Tensor | None = None, @@ -55,6 +63,7 @@ def __init__( are not supported yet. in_channels: Number of input channels to model. num_classes: Number of prediction classes (including the background). + labels: List of class labels. num_filters: Number of filters. Only applicable when model='fcn'. loss: Name of the loss function, currently supports 'ce', 'jaccard' or 'focal' loss. @@ -190,27 +199,70 @@ def configure_metrics(self) -> None: .. note:: * 'Micro' averaging suits overall performance evaluation but may not reflect minority class accuracy. - * 'Macro' averaging, not used here, gives equal weight to each class, useful + * 'Macro' averaging gives equal weight to each class, useful for balanced performance assessment across imbalanced classes. """ num_classes: int = self.hparams['num_classes'] ignore_index: int | None = self.hparams['ignore_index'] - metrics = MetricCollection( - [ - MulticlassAccuracy( - num_classes=num_classes, - ignore_index=ignore_index, - multidim_average='global', - average='micro', - ), - MulticlassJaccardIndex( - num_classes=num_classes, ignore_index=ignore_index, average='micro' - ), - ] - ) - self.train_metrics = metrics.clone(prefix='train_') - self.val_metrics = metrics.clone(prefix='val_') - self.test_metrics = metrics.clone(prefix='test_') + labels: list[str] | None = self.hparams['labels'] + + metric_classes = { + 'Accuracy': Accuracy, + 'F1Score': FBetaScore, + 'JaccardIndex': JaccardIndex, + 'Precision': Precision, + 'Recall': Recall, + } + + metrics_dict = {} + + # Loop through the types of averaging + for average in ['micro', 'macro']: + for metric_name, metric_class in metric_classes.items(): + name = ( + f'Overall{metric_name}' + if average == 'micro' + else f'Average{metric_name}' + ) + params = { + 'task': 'multiclass', + 'num_classes': num_classes, + 'average': average, + 'ignore_index': ignore_index, + } + if metric_name in ['Accuracy', 'F1Score', 'Precision', 'Recall']: + params['multidim_average'] = 'global' + if metric_name == 'F1Score': + params['beta'] = 1.0 + metrics_dict[name] = metric_class(**params) + + # Loop through the classwise metrics + for metric_name, metric_class in metric_classes.items(): + if metric_name != 'JaccardIndex': + metrics_dict[metric_name] = ClasswiseWrapper( + metric_class( + task='multiclass', + num_classes=num_classes, + average='none', + multidim_average='global', + ignore_index=ignore_index, + ), + labels=labels, + ) + else: + metrics_dict[metric_name] = ClasswiseWrapper( + metric_class( + task='multiclass', + num_classes=num_classes, + average='none', + ignore_index=ignore_index, + ), + labels=labels, + ) + + self.train_metrics = MetricCollection(metrics_dict, prefix='train_') + self.val_metrics = self.train_metrics.clone(prefix='val_') + self.test_metrics = self.train_metrics.clone(prefix='test_') def training_step( self, batch: Any, batch_idx: int, dataloader_idx: int = 0 @@ -230,9 +282,14 @@ def training_step( batch_size = x.shape[0] y_hat = self(x) loss: Tensor = self.criterion(y_hat, y) - self.log('train_loss', loss, batch_size=batch_size) + self.log('train_loss', loss, batch_size=batch_size, on_step=True, on_epoch=True) self.train_metrics(y_hat, y) - self.log_dict(self.train_metrics, batch_size=batch_size) + self.log_dict( + {f'{k}': v for k, v in self.train_metrics.compute().items()}, + batch_size=batch_size, + on_step=True, + on_epoch=True, + ) return loss def validation_step( @@ -250,9 +307,14 @@ def validation_step( batch_size = x.shape[0] y_hat = self(x) loss = self.criterion(y_hat, y) - self.log('val_loss', loss, batch_size=batch_size) + self.log('val_loss', loss, batch_size=batch_size, on_step=False, on_epoch=True) self.val_metrics(y_hat, y) - self.log_dict(self.val_metrics, batch_size=batch_size) + self.log_dict( + {f'{k}': v for k, v in self.val_metrics.compute().items()}, + batch_size=batch_size, + on_step=False, + on_epoch=True, + ) if ( batch_idx < 10 @@ -294,9 +356,14 @@ def test_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> None batch_size = x.shape[0] y_hat = self(x) loss = self.criterion(y_hat, y) - self.log('test_loss', loss, batch_size=batch_size) + self.log('test_loss', loss, batch_size=batch_size, on_step=False, on_epoch=True) self.test_metrics(y_hat, y) - self.log_dict(self.test_metrics, batch_size=batch_size) + self.log_dict( + {f'{k}': v for k, v in self.test_metrics.compute().items()}, + batch_size=batch_size, + on_step=False, + on_epoch=True, + ) def predict_step( self, batch: Any, batch_idx: int, dataloader_idx: int = 0