diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index 85fbc9f8..00000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "python.formatting.provider": "black",
-    "python.linting.enabled": true,
-    "python.linting.flake8Enabled": true,
-    "python.linting.pylintEnabled": false,
-}
\ No newline at end of file
diff --git a/cv_lib/cv_lib/event_handlers/logging_handlers.py b/cv_lib/cv_lib/event_handlers/logging_handlers.py
index 29ea6e76..a1045de8 100644
--- a/cv_lib/cv_lib/event_handlers/logging_handlers.py
+++ b/cv_lib/cv_lib/event_handlers/logging_handlers.py
@@ -5,6 +5,8 @@
 import logging.config
 from toolz import curry
 
+import numpy as np
+np.set_printoptions(precision=3)
 
 @curry
 def log_training_output(engine, log_interval=100):
@@ -23,7 +25,7 @@ def log_lr(optimizer, engine):
     logger.info(f"lr - {lr}")
 
 
-_DEFAULT_METRICS = {"accuracy": "Avg accuracy :", "nll": "Avg loss :"}
+_DEFAULT_METRICS = {"pixacc": "Avg accuracy :", "nll": "Avg loss :"}
 
 
 @curry
@@ -38,6 +40,16 @@ def log_metrics(log_msg, engine, metrics_dict=_DEFAULT_METRICS):
         + metrics_msg
     )
 
+@curry
+def log_class_metrics(log_msg, engine, metrics_dict):
+    logger = logging.getLogger(__name__)
+    metrics = engine.state.metrics
+    metrics_msg = "\n".join(f"{metrics_dict[k]} {metrics[k].numpy()}" for k in metrics_dict)
+    logger.info(
+        f"{log_msg} - Epoch {engine.state.epoch} [{engine.state.max_epochs}]\n"
+        + metrics_msg
+    )
+
 
 class Evaluator:
     def __init__(self, evaluation_engine, data_loader):
diff --git a/cv_lib/cv_lib/segmentation/dutchf3/metrics/__init__.py b/cv_lib/cv_lib/segmentation/dutchf3/metrics/__init__.py
deleted file mode 100644
index 3c4d5540..00000000
--- a/cv_lib/cv_lib/segmentation/dutchf3/metrics/__init__.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import numpy as np
-import warnings
-
-import numpy as np
-import torch
-from ignite.metrics import Metric
-
-
-def _fast_hist(label_true, label_pred, n_class):
-    mask = (label_true >= 0) & (label_true < n_class)
-    hist = np.bincount(
-        n_class * label_true[mask].astype(int) + label_pred[mask],
-        minlength=n_class ** 2,
-    ).reshape(n_class, n_class)
-    return hist
-
-
-def _torch_hist(label_true, label_pred, n_class):
-    """Calculates the confusion matrix for the labels
-    
-    Args:
-        label_true ([type]): [description]
-        label_pred ([type]): [description]
-        n_class ([type]): [description]
-    
-    Returns:
-        [type]: [description]
-    """
-    # TODO Add exceptions
-    assert len(label_true.shape) == 1, "Labels need to be 1D"
-    assert len(label_pred.shape) == 1, "Predictions need to be 1D"
-    mask = (label_true >= 0) & (label_true < n_class)
-    hist = torch.bincount(
-        n_class * label_true[mask] + label_pred[mask], minlength=n_class ** 2
-    ).reshape(n_class, n_class)
-    return hist
-
-
-class ConfusionMatrix(Metric):
-    def __init__(self, num_classes, device, output_transform=lambda x: x):
-        self._num_classes = num_classes
-        self._device = device
-        super(ConfusionMatrix, self).__init__(output_transform=output_transform)
-
-    def reset(self):
-        self._confusion_matrix = torch.zeros(
-            (self._num_classes, self._num_classes), dtype=torch.long
-        ).to(self._device)
-
-    def update(self, output):
-        y_pred, y = output
-        # TODO: Make assertion exception
-        assert y.shape == y_pred.max(1)[1].squeeze().shape, "Shape not the same"
-        self._confusion_matrix += _torch_hist(
-            torch.flatten(y),
-            torch.flatten(y_pred.max(1)[1].squeeze()),  # Get the maximum index
-            self._num_classes,
-        )
-
-    def compute(self):
-        return self._confusion_matrix.cpu().numpy()
-
-
-class MeanIoU(ConfusionMatrix):
-    def compute(self):
-        hist = self._confusion_matrix.cpu().numpy()
-        iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
-        mean_iu = np.nanmean(iu)
-        return mean_iu
-
-
-class PixelwiseAccuracy(ConfusionMatrix):
-    def compute(self):
-        hist = self._confusion_matrix.cpu().numpy()
-        acc = np.diag(hist).sum() / hist.sum()
-        return acc
-
-
-class FrequencyWeightedIoU(ConfusionMatrix):
-    def compute(self):
-        hist = self._confusion_matrix.cpu().numpy()
-        iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
-        freq = (
-            hist.sum(axis=1) / hist.sum()
-        )  # fraction of the pixels that come from each class
-        fwiou = (freq[freq > 0] * iu[freq > 0]).sum()
-        return fwiou
-
-
-class MeanClassAccuracy(ConfusionMatrix):
-    def compute(self):
-        hist = self._confusion_matrix.cpu().numpy()
-        acc = np.diag(hist).sum() / hist.sum()
-        acc_cls = np.diag(hist) / hist.sum(axis=1)
-        mean_acc_cls = np.nanmean(acc_cls)
-        return mean_acc_cls
diff --git a/cv_lib/cv_lib/segmentation/dutchf3/metrics/apex.py b/cv_lib/cv_lib/segmentation/dutchf3/metrics/apex.py
deleted file mode 100644
index 400bacca..00000000
--- a/cv_lib/cv_lib/segmentation/dutchf3/metrics/apex.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import numpy as np
-import torch
-import torch.distributed as dist
-from cv_lib.segmentation.dutchf3 import metrics
-from ignite.exceptions import NotComputableError
-from ignite.metrics.metric import Metric
-
-
-@torch.no_grad()
-def reduce_tensor(tensor, world_size):
-    rt = tensor.clone()
-    dist.all_reduce(rt, op=dist.reduce_op.SUM)
-    rt /= world_size
-    return rt
-
-
-@torch.no_grad()
-def sum_reduce_tensor(tensor):
-    rt = tensor.clone()
-    dist.all_reduce(rt, op=dist.reduce_op.SUM)
-    return rt
-
-
-@torch.no_grad()
-def gather_tensor(tensor, world_size):
-    gather_t = [torch.ones_like(tensor).cuda() for _ in range(dist.get_world_size())]
-    dist.all_gather(gather_t, tensor)
-    return gather_t
-
-
-class AverageMetric(Metric):
-    def __init__(self, world_size, batch_size, output_transform=lambda x: x):
-        super(AverageMetric, self).__init__(output_transform=output_transform)
-        self._world_size = world_size
-        self._batch_size = batch_size
-        self._metric_name = "Metric"
-
-    def reset(self):
-        self._sum = 0
-        self._num_examples = 0
-
-    @torch.no_grad()
-    def update(self, output):
-        reduced_metric = reduce_tensor(output, self._world_size)
-        self._sum += reduced_metric * self._batch_size
-        self._num_examples += self._batch_size
-
-    @torch.no_grad()
-    def compute(self):
-        if self._num_examples == 0:
-            raise NotComputableError(
-                f"{self._metric_name} must have at least one example before it can be computed."
-            )
-        return self._sum / self._num_examples
-
-
-class LossMetric(AverageMetric):
-    def __init__(self, loss_fn, world_size, batch_size, output_transform=lambda x: x):
-        super(LossMetric, self).__init__(
-            world_size, batch_size, output_transform=output_transform
-        )
-        self._loss_fn = loss_fn
-        self._metric_name = "Loss"
-
-    def update(self, output):
-        pred, y = output
-        loss = self._loss_fn(pred, y)
-        super().update(loss)
-
-
-class ConfusionMatrix(metrics.ConfusionMatrix):
-    def compute(self):
-        reduced_metric = sum_reduce_tensor(self._confusion_matrix)
-        return reduced_metric.cpu().numpy()
-
-
-class PixelwiseAccuracy(ConfusionMatrix):
-    def compute(self):
-        hist = super(PixelwiseAccuracy, self).compute()
-        acc = np.diag(hist).sum() / hist.sum()
-        return acc
-
-
-class MeanIoU(ConfusionMatrix):
-    def compute(self):
-        hist = super(MeanIoU, self).compute()
-        iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
-        mean_iu = np.nanmean(iu)
-        return mean_iu
-
-
-class FrequencyWeightedIoU(ConfusionMatrix):
-    def compute(self):
-        hist = super(FrequencyWeightedIoU, self).compute()
-        iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
-        freq = (
-            hist.sum(axis=1) / hist.sum()
-        )  # fraction of the pixels that come from each class
-        fwiou = (freq[freq > 0] * iu[freq > 0]).sum()
-        return fwiou
-
-
-class MeanClassAccuracy(ConfusionMatrix):
-    def compute(self):
-        hist = super(MeanClassAccuracy, self).compute()
-        acc = np.diag(hist).sum() / hist.sum()
-        acc_cls = np.diag(hist) / hist.sum(axis=1)
-        mean_acc_cls = np.nanmean(acc_cls)
-        return mean_acc_cls
diff --git a/cv_lib/cv_lib/segmentation/dutchf3/utils.py b/cv_lib/cv_lib/segmentation/dutchf3/utils.py
index 1d509b95..adad1e97 100644
--- a/cv_lib/cv_lib/segmentation/dutchf3/utils.py
+++ b/cv_lib/cv_lib/segmentation/dutchf3/utils.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 import torch
-import torchvision.utils as vutils
 from git import Repo
 from datetime import datetime
 import os
@@ -23,7 +22,6 @@ def np_to_tb(array):
         array = np.expand_dims(array, axis=0)
 
     array = torch.from_numpy(array)
-    # array = vutils.make_grid(array, normalize=True, scale_each=True)
     return array
 
 
diff --git a/cv_lib/cv_lib/segmentation/metrics.py b/cv_lib/cv_lib/segmentation/metrics.py
new file mode 100644
index 00000000..02230fa9
--- /dev/null
+++ b/cv_lib/cv_lib/segmentation/metrics.py
@@ -0,0 +1,106 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import torch
+import ignite
+
+
+def pixelwise_accuracy(num_classes, output_transform=lambda x: x, device=None):
+    """Calculates class accuracy
+
+    Args:
+        num_classes (int): number of classes
+        output_transform (callable, optional): a callable that is used to transform the
+            output into the form expected by the metric.
+
+    Returns:
+        MetricsLambda
+
+    """
+    cm = ignite.metrics.ConfusionMatrix(
+        num_classes=num_classes, output_transform=output_transform, device=device
+    )
+    # Increase floating point precision and pass to CPU
+    cm = cm.type(torch.DoubleTensor)
+
+    pix_cls = ignite.metrics.confusion_matrix.cmAccuracy(cm)
+
+    return pix_cls
+
+
+def class_accuracy(num_classes, output_transform=lambda x: x, device=None):
+    """Calculates class accuracy
+
+    Args:
+        num_classes (int): number of classes
+        output_transform (callable, optional): a callable that is used to transform the
+            output into the form expected by the metric.
+
+    Returns:
+        MetricsLambda
+
+    """
+    cm = ignite.metrics.ConfusionMatrix(
+        num_classes=num_classes, output_transform=output_transform, device=device
+    )
+    # Increase floating point precision and pass to CPU
+    cm = cm.type(torch.DoubleTensor)
+
+    acc_cls = cm.diag() / (cm.sum(dim=1) + 1e-15)
+
+    return acc_cls
+
+
+def mean_class_accuracy(num_classes, output_transform=lambda x: x, device=None):
+    """Calculates mean class accuracy
+
+    Args:
+        num_classes (int): number of classes
+        output_transform (callable, optional): a callable that is used to transform the
+            output into the form expected by the metric.
+
+    Returns:
+        MetricsLambda
+
+    """
+    return class_accuracy(
+        num_classes=num_classes, output_transform=output_transform, device=device
+    ).mean()
+
+
+def class_iou(
+    num_classes, output_transform=lambda x: x, device=None, ignore_index=None
+):
+    """Calculates per-class intersection-over-union
+
+    Args:
+        num_classes (int): number of classes
+        output_transform (callable, optional): a callable that is used to transform the
+            output into the form expected by the metric.
+
+    Returns:
+        MetricsLambda
+
+    """
+    cm = ignite.metrics.ConfusionMatrix(
+        num_classes=num_classes, output_transform=output_transform, device=device
+    )
+    return ignite.metrics.IoU(cm, ignore_index=ignore_index)
+
+
+def mean_iou(num_classes, output_transform=lambda x: x, device=None, ignore_index=None):
+    """Calculates mean intersection-over-union
+
+    Args:
+        num_classes (int): number of classes
+        output_transform (callable, optional): a callable that is used to transform the
+            output into the form expected by the metric.
+
+    Returns:
+        MetricsLambda
+
+    """
+    cm = ignite.metrics.ConfusionMatrix(
+        num_classes=num_classes, output_transform=output_transform, device=device
+    )
+    return ignite.metrics.mIoU(cm, ignore_index=ignore_index)
diff --git a/cv_lib/tests/test_metrics.py b/cv_lib/tests/test_metrics.py
new file mode 100644
index 00000000..83cf57cf
--- /dev/null
+++ b/cv_lib/tests/test_metrics.py
@@ -0,0 +1,140 @@
+import torch
+import numpy as np
+from pytest import approx
+
+from ignite.metrics import ConfusionMatrix, MetricsLambda
+
+from cv_lib.segmentation.metrics import class_accuracy, mean_class_accuracy
+
+
+# source repo:
+# https://github.com/pytorch/ignite/blob/master/tests/ignite/metrics/test_confusion_matrix.py
+def _get_y_true_y_pred():
+    # Generate an image with labels 0 (background), 1, 2
+    # 3 classes:
+    y_true = np.zeros((30, 30), dtype=np.int)
+    y_true[1:11, 1:11] = 1
+    y_true[15:25, 15:25] = 2
+
+    y_pred = np.zeros((30, 30), dtype=np.int)
+    y_pred[20:30, 1:11] = 1
+    y_pred[20:30, 20:30] = 2
+    return y_true, y_pred
+
+
+# source repo:
+# https://github.com/pytorch/ignite/blob/master/tests/ignite/metrics/test_confusion_matrix.py
+def _compute_th_y_true_y_logits(y_true, y_pred):
+    # Create torch.tensor from numpy
+    th_y_true = torch.from_numpy(y_true).unsqueeze(0)
+    # Create logits torch.tensor:
+    num_classes = max(np.max(y_true), np.max(y_pred)) + 1
+    y_probas = np.ones((num_classes,) + y_true.shape) * -10
+    for i in range(num_classes):
+        y_probas[i, (y_pred == i)] = 720
+    th_y_logits = torch.from_numpy(y_probas).unsqueeze(0)
+    return th_y_true, th_y_logits
+
+
+# Dependency metrics do not get updated automatically, so need to retrieve and
+# update confusion matrix manually
+def _get_cm(metriclambda):
+    metrics = list(metriclambda.args)
+    while metrics:
+        metric = metrics[0]
+        if isinstance(metric, ConfusionMatrix):
+            return metric
+        elif isinstance(metric, MetricsLambda):
+            metrics.extend(metric.args)
+        del metrics[0]
+
+
+def test_class_accuracy():
+    y_true, y_pred = _get_y_true_y_pred()
+
+    ## Perfect prediction
+    th_y_true, th_y_logits = _compute_th_y_true_y_logits(y_true, y_true)
+    # Update metric
+    output = (th_y_logits, th_y_true)
+    acc_metric = class_accuracy(num_classes=3)
+    acc_metric.update(output)
+
+    # Retrieve and update confusion matrix
+    metric_cm = _get_cm(acc_metric)
+    # assert confusion matrix exists and is all zeroes
+    assert metric_cm is not None
+    assert (
+        torch.min(metric_cm.confusion_matrix) == 0.0
+        and torch.max(metric_cm.confusion_matrix) == 0.0
+    )
+    metric_cm.update(output)
+
+    # Expected result
+    true_res = [1.0, 1.0, 1.0]
+    res = acc_metric.compute().numpy()
+    assert np.all(res == true_res), "Result {} vs. expected values {}".format(
+        res, true_res
+    )
+
+    ## Imperfect prediction
+    th_y_true, th_y_logits = _compute_th_y_true_y_logits(y_true, y_pred)
+    # Update metric
+    output = (th_y_logits, th_y_true)
+    acc_metric = class_accuracy(num_classes=3)
+    acc_metric.update(output)
+
+    # Retrieve and update confusion matrix
+    metric_cm = _get_cm(acc_metric)
+    assert metric_cm is not None
+    assert (
+        torch.min(metric_cm.confusion_matrix) == 0.0
+        and torch.max(metric_cm.confusion_matrix) == 0.0
+    )
+    metric_cm.update(output)
+
+    # Expected result
+    true_res = [0.75, 0.0, 0.25]
+    res = acc_metric.compute().numpy()
+    assert np.all(res == true_res), "Result {} vs. expected values {}".format(
+        res, true_res
+    )
+
+
+def test_mean_class_accuracy():
+    y_true, y_pred = _get_y_true_y_pred()
+
+    ## Perfect prediction
+    th_y_true, th_y_logits = _compute_th_y_true_y_logits(y_true, y_true)
+    # Update metric
+    output = (th_y_logits, th_y_true)
+    acc_metric = mean_class_accuracy(num_classes=3)
+    acc_metric.update(output)
+
+    # Retrieve and update confusion matrix
+    metric_cm = _get_cm(acc_metric)
+    metric_cm.update(output)
+
+    # Expected result
+    true_res = 1.0
+    res = acc_metric.compute().numpy()
+    assert res == approx(true_res), "Result {} vs. expected value {}".format(
+        res, true_res
+    )
+
+    ## Imperfect prediction
+    th_y_true, th_y_logits = _compute_th_y_true_y_logits(y_true, y_pred)
+    # Update metric
+    output = (th_y_logits, th_y_true)
+    acc_metric = mean_class_accuracy(num_classes=3)
+    acc_metric.update(output)
+
+    # Retrieve and update confusion matrix
+    metric_cm = _get_cm(acc_metric)
+    metric_cm.update(output)
+
+    # Expected result
+    true_res = 1 / 3
+    res = acc_metric.compute().numpy()
+    assert res == approx(true_res), "Result {} vs. expected value {}".format(
+        res, true_res
+    )
diff --git a/environment/anaconda/local/environment.yml b/environment/anaconda/local/environment.yml
index 15973ae0..77099297 100644
--- a/environment/anaconda/local/environment.yml
+++ b/environment/anaconda/local/environment.yml
@@ -16,7 +16,7 @@ dependencies:
   - tqdm
   - pip:
     - segyio
-    - pytorch-ignite
+    - pytorch-ignite==0.3.0.dev20191105 # pre-release until stable available
     - fire
     - toolz
     - tabulate==0.8.2
diff --git a/examples/interpretation/notebooks/F3_block_training_and_evaluation_local.ipynb b/examples/interpretation/notebooks/F3_block_training_and_evaluation_local.ipynb
new file mode 100644
index 00000000..e6cfd60b
--- /dev/null
+++ b/examples/interpretation/notebooks/F3_block_training_and_evaluation_local.ipynb
@@ -0,0 +1,729 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Copyright (c) Microsoft Corporation.\n",
+    "\n",
+    "Licensed under the MIT License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Model training and evaluation on F3 Netherlands dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Seismic interpretation, also referred to as facies classification, is a task of determining types of rock in the earth’s subsurface, given seismic data. Seismic interpretation is used as a standard approach for determining precise locations of oil deposits for drilling, therefore reducing risks and potential losses. In recent years, there has been a great interest in using fully-supervised deep learning models for seismic interpretation. \n",
+    "\n",
+    "In this notebook, we demonstrate how to train a deep neural network for facies prediction using F3 Netherlands dataset. The F3 block is located in the North Sea off the shores of Netherlands. The dataset contains 6 classes (facies or lithostratigraphic units), all of which are of varying thickness (class imbalance). Processed data is available in numpy format as a `401 x 701 x 255` array. The processed F3 data is made available by [Alaudah et al. 2019](https://github.com/olivesgatech/facies_classification_benchmark). "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Environment setup\n",
+    "\n",
+    "To set up the conda environment, please follow the instructions in the top-level [README.md](../../../README.md) file.\n",
+    "\n",
+    "__Note__: To register the conda environment in Jupyter, run:\n",
+    "`python -m ipykernel install --user --name envname`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Library imports"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's load required libraries."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "import logging.config\n",
+    "from os import path\n",
+    "import random\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams.update({\"font.size\": 16})\n",
+    "\n",
+    "import yacs.config\n",
+    "\n",
+    "import cv2\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "from albumentations import Compose, HorizontalFlip, Normalize, PadIfNeeded, Resize\n",
+    "from ignite.contrib.handlers import CosineAnnealingScheduler\n",
+    "from ignite.handlers import ModelCheckpoint\n",
+    "from ignite.engine import Events\n",
+    "from ignite.metrics import Loss\n",
+    "from ignite.utils import convert_tensor\n",
+    "from toolz import compose\n",
+    "from torch.utils import data\n",
+    "\n",
+    "from cv_lib.event_handlers import SnapshotHandler, logging_handlers\n",
+    "from cv_lib.segmentation import models, tgs_salt\n",
+    "from cv_lib.segmentation.dutchf3.engine import create_supervised_trainer\n",
+    "\n",
+    "from cv_lib.segmentation.dutchf3.utils import (\n",
+    "    current_datetime,\n",
+    "    generate_path,\n",
+    "    git_branch,\n",
+    "    git_hash,\n",
+    "    np_to_tb,\n",
+    ")\n",
+    "\n",
+    "from deepseismic_interpretation.dutchf3.data import (\n",
+    "    get_patch_loader,\n",
+    "    decode_segmap,\n",
+    "    get_test_loader,\n",
+    ")\n",
+    "\n",
+    "from utilities import (\n",
+    "    plot_aline,\n",
+    "    plot_f3block_interactive,\n",
+    "    prepare_batch,\n",
+    "    _patch_label_2d,\n",
+    "    _compose_processing_pipeline,\n",
+    "    _output_processing_pipeline,\n",
+    "    _write_section_file,\n",
+    "    runningScore,\n",
+    ")\n",
+    "\n",
+    "# set device\n",
+    "device = \"cpu\"\n",
+    "if torch.cuda.is_available():\n",
+    "    device = \"cuda\"\n",
+    "\n",
+    "# number of images to score\n",
+    "N_EVALUATE = 20\n",
+    "# experiment configuration file\n",
+    "CONFIG_FILE = \"./configs/patch_deconvnet_skip.yaml\"\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data download and preparation\n",
+    "\n",
+    "To download and prepare the F3 data set, please follow the instructions [here](../../../interpretation/dutchf3_patch/README.md). Once you've downloaded and prepared the data set, you'll find your files in the following directory tree:\n",
+    "\n",
+    "```\n",
+    "data\n",
+    "├── splits\n",
+    "├── test_once\n",
+    "│   ├── test1_labels.npy\n",
+    "│   ├── test1_seismic.npy\n",
+    "│   ├── test2_labels.npy\n",
+    "│   └── test2_seismic.npy\n",
+    "└── train\n",
+    "    ├── train_labels.npy\n",
+    "    └── train_seismic.npy\n",
+    "```\n",
+    "\n",
+    "We recommend saving the data under `/mnt/dutchf3` since this notebook will use that location as the data root. Otherwise, modify the `DATASET.ROOT` field in the configuration file, described next. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Experiment configuration file"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We use configuration files to specify experiment configuration, such as hyperparameters used in training and evaluation, as well as other experiment settings. We provide several configuration files for this notebook, under `./configs`, mainly differing in the DNN architecture used for defining the model.\n",
+    "\n",
+    "Modify the `CONFIG_FILE` variable above if you would like to run the experiment using a different configuration file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(CONFIG_FILE, \"rt\") as f_read:\n",
+    "    config = yacs.config.load_cfg(f_read)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## F3 data set "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's visualize a few sections of the F3 data set. The processed F3 data set is stored as a 3D numpy array. Let's view slices of the data along inline and crossline directions. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load training data and labels\n",
+    "train_seismic = np.load(path.join(config.DATASET.ROOT, \"train/train_seismic.npy\"))\n",
+    "train_labels = np.load(path.join(config.DATASET.ROOT, \"train/train_labels.npy\"))\n",
+    "\n",
+    "print(f\"Number of inline slices: {train_seismic.shape[0]}\")\n",
+    "print(f\"Number of crossline slices: {train_seismic.shape[1]}\")\n",
+    "print(f\"Depth dimension : {train_seismic.shape[2]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_f3block_interactive(train_labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's plot an __inline__ slice."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "idx = 100\n",
+    "x_in = train_seismic[idx, :, :].swapaxes(0, 1)\n",
+    "x_inl = train_labels[idx, :, :].swapaxes(0, 1)\n",
+    "\n",
+    "plot_aline(x_in, x_inl, xlabel=\"inline\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's plot a __crossline__ slice."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_cr = train_seismic[:, idx, :].swapaxes(0, 1)\n",
+    "x_crl = train_labels[:, idx, :].swapaxes(0, 1)\n",
+    "\n",
+    "plot_aline(x_cr, x_crl, xlabel=\"crossline\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Model training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set up logging\n",
+    "logging.config.fileConfig(config.LOG_CONFIG)\n",
+    "logger = logging.getLogger(__name__)\n",
+    "logger.debug(config.WORKERS)\n",
+    "torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Set up data augmentation\n",
+    "\n",
+    "Let's define our data augmentation pipeline, which includes basic transformations, such as _data normalization, resizing, and padding_ if necessary. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup Augmentations\n",
+    "basic_aug = Compose(\n",
+    "    [\n",
+    "        Normalize(\n",
+    "            mean=(config.TRAIN.MEAN,), std=(config.TRAIN.STD,), max_pixel_value=1\n",
+    "        ),\n",
+    "        Resize(\n",
+    "            config.TRAIN.AUGMENTATIONS.RESIZE.HEIGHT,\n",
+    "            config.TRAIN.AUGMENTATIONS.RESIZE.WIDTH,\n",
+    "            always_apply=True,\n",
+    "        ),\n",
+    "        PadIfNeeded(\n",
+    "            min_height=config.TRAIN.AUGMENTATIONS.PAD.HEIGHT,\n",
+    "            min_width=config.TRAIN.AUGMENTATIONS.PAD.WIDTH,\n",
+    "            border_mode=cv2.BORDER_CONSTANT,\n",
+    "            always_apply=True,\n",
+    "            mask_value=255,\n",
+    "        ),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "if config.TRAIN.AUGMENTATION:\n",
+    "    train_aug = Compose([basic_aug, HorizontalFlip(p=0.5)])\n",
+    "else:\n",
+    "    train_aug = basic_aug"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load the data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For training the model, we will use a patch-based approach. Rather than using entire sections (crosslines or inlines) of the data, we extract a large number of small patches from the sections, and use the patches as our data. This allows us to generate larger set of images for training, but is also a more feasible approach for large seismic volumes. \n",
+    "\n",
+    "We are using a custom patch data loader from our __`deepseismic_interpretation`__ library for generating and loading patches from seismic section data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "TrainPatchLoader = get_patch_loader(config)\n",
+    "\n",
+    "train_set = TrainPatchLoader(\n",
+    "    config.DATASET.ROOT,\n",
+    "    split=\"train\",\n",
+    "    is_transform=True,\n",
+    "    stride=config.TRAIN.STRIDE,\n",
+    "    patch_size=config.TRAIN.PATCH_SIZE,\n",
+    "    augmentations=train_aug,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "train_loader = data.DataLoader(\n",
+    "    train_set,\n",
+    "    batch_size=config.TRAIN.BATCH_SIZE_PER_GPU,\n",
+    "    num_workers=config.WORKERS,\n",
+    "    shuffle=True,\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Set up model training"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, let's define a model to train, an optimization algorithm, and a loss function. \n",
+    "\n",
+    "Note that the model is loaded from our __`cv_lib`__ library, using the name of the model as specified in the configuration file. To load a different model, either change the `MODEL.NAME` field in the configuration file, or create a new one corresponding to the model you wish to train."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load a model\n",
+    "model = getattr(models, config.MODEL.NAME).get_seg_model(config)\n",
+    "\n",
+    "# Send to GPU if available\n",
+    "model = model.to(device)\n",
+    "\n",
+    "# SGD optimizer\n",
+    "optimizer = torch.optim.SGD(\n",
+    "    model.parameters(),\n",
+    "    lr=config.TRAIN.MAX_LR,\n",
+    "    momentum=config.TRAIN.MOMENTUM,\n",
+    "    weight_decay=config.TRAIN.WEIGHT_DECAY,\n",
+    ")\n",
+    "\n",
+    "# learning rate scheduler\n",
+    "scheduler_step = config.TRAIN.END_EPOCH // config.TRAIN.SNAPSHOTS\n",
+    "snapshot_duration = scheduler_step * len(train_loader)\n",
+    "scheduler = CosineAnnealingScheduler(\n",
+    "    optimizer, \"lr\", config.TRAIN.MAX_LR, config.TRAIN.MIN_LR, snapshot_duration\n",
+    ")\n",
+    "\n",
+    "# weights are inversely proportional to the frequency of the classes in the training set\n",
+    "class_weights = torch.tensor(\n",
+    "    config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False\n",
+    ")\n",
+    "\n",
+    "# loss function\n",
+    "criterion = torch.nn.CrossEntropyLoss(\n",
+    "    weight=class_weights, ignore_index=255, reduction=\"mean\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Training the model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We use [ignite](https://pytorch.org/ignite/index.html) framework to create training and validation loops in our codebase. Ignite provides an easy way to create compact training/validation loops without too much boilerplate code.\n",
+    "\n",
+    "In this notebook, we demonstrate the use of ignite on the training loop only. We create a training engine `trainer` that loops multiple times over the training dataset and updates model parameters. In addition, we add various events to the trainer, using an event system, that allows us to interact with the engine on each step of the run, such as, when the trainer is started/completed, when the epoch is started/completed and so on. \n",
+    "\n",
+    "In the cell below, we use event handlers to add the following events to the training loop:\n",
+    "- log training output\n",
+    "- log and schedule learning rate and\n",
+    "- periodically save model to disk."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create training engine\n",
+    "trainer = create_supervised_trainer(\n",
+    "    model, optimizer, criterion, prepare_batch, device=device\n",
+    ")\n",
+    "\n",
+    "# add learning rate scheduler\n",
+    "trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)\n",
+    "\n",
+    "# add logging of traininig output\n",
+    "trainer.add_event_handler(\n",
+    "    Events.ITERATION_COMPLETED,\n",
+    "    logging_handlers.log_training_output(log_interval=config.PRINT_FREQ),\n",
+    ")\n",
+    "\n",
+    "# add logging of learning rate\n",
+    "trainer.add_event_handler(Events.EPOCH_STARTED, logging_handlers.log_lr(optimizer))\n",
+    "\n",
+    "# add model checkpointing\n",
+    "output_dir = path.join(config.OUTPUT_DIR, config.TRAIN.MODEL_DIR)\n",
+    "checkpoint_handler = ModelCheckpoint(\n",
+    "    output_dir, \"model\", save_interval=2, n_saved=3, create_dir=True, require_empty=False\n",
+    ")\n",
+    "trainer.add_event_handler(\n",
+    "    Events.EPOCH_COMPLETED, checkpoint_handler, {config.MODEL.NAME: model}\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Start the training engine run."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer.run(train_loader, max_epochs=config.TRAIN.END_EPOCH)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will next evaluate the performance of the model by looking at how well it predicts facies labels on samples from the test set.\n",
+    "\n",
+    "We will use the following evaluation metrics:\n",
+    "\n",
+    "- Pixel Accuracy (PA)\n",
+    "- Class Accuracy (CA)\n",
+    "- Mean Class Accuracy (MCA)\n",
+    "- Frequency Weighted intersection-over-union (FW IoU)\n",
+    "- Mean IoU (MIoU)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's first load the model saved previously."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = getattr(models, config.MODEL.NAME).get_seg_model(config)\n",
+    "model.load_state_dict(torch.load(config.TEST.MODEL_PATH), strict=False)\n",
+    "model = model.to(device)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next we load the test data and define the augmentations on it. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Augmentation\n",
+    "section_aug = Compose(\n",
+    "    [Normalize(mean=(config.TRAIN.MEAN,), std=(config.TRAIN.STD,), max_pixel_value=1)]\n",
+    ")\n",
+    "\n",
+    "patch_aug = Compose(\n",
+    "    [\n",
+    "        Resize(\n",
+    "            config.TRAIN.AUGMENTATIONS.RESIZE.HEIGHT,\n",
+    "            config.TRAIN.AUGMENTATIONS.RESIZE.WIDTH,\n",
+    "            always_apply=True,\n",
+    "        ),\n",
+    "        PadIfNeeded(\n",
+    "            min_height=config.TRAIN.AUGMENTATIONS.PAD.HEIGHT,\n",
+    "            min_width=config.TRAIN.AUGMENTATIONS.PAD.WIDTH,\n",
+    "            border_mode=cv2.BORDER_CONSTANT,\n",
+    "            always_apply=True,\n",
+    "            mask_value=255,\n",
+    "        ),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "# Process test data\n",
+    "pre_processing = _compose_processing_pipeline(config.TRAIN.DEPTH, aug=patch_aug)\n",
+    "output_processing = _output_processing_pipeline(config)\n",
+    "\n",
+    "# Select the test split\n",
+    "split = \"test1\" if \"both\" in config.TEST.SPLIT else config.TEST.SPLIT\n",
+    "\n",
+    "labels = np.load(path.join(config.DATASET.ROOT, \"test_once\", split + \"_labels.npy\"))\n",
+    "section_file = path.join(config.DATASET.ROOT, \"splits\", \"section_\" + split + \".txt\")\n",
+    "_write_section_file(labels, section_file, config)\n",
+    "\n",
+    "# Load test data\n",
+    "TestSectionLoader = get_test_loader(config)\n",
+    "test_set = TestSectionLoader(\n",
+    "    config.DATASET.ROOT, split=split, is_transform=True, augmentations=section_aug\n",
+    ")\n",
+    "\n",
+    "test_loader = data.DataLoader(\n",
+    "    test_set, batch_size=1, num_workers=config.WORKERS, shuffle=False\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict segmentation mask on the test data\n",
+    "\n",
+    "For demonstration purposes and efficiency, we will only use a subset of the test data to predict segmentation mask on. More precisely, we will score `N_EVALUATE` images. If you would like to evaluate more images, set this variable to the desired number of images."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "CLASS_NAMES = [\n",
+    "    \"upper_ns\",\n",
+    "    \"middle_ns\",\n",
+    "    \"lower_ns\",\n",
+    "    \"rijnland_chalk\",\n",
+    "    \"scruff\",\n",
+    "    \"zechstein\",\n",
+    "]\n",
+    "\n",
+    "n_classes = len(CLASS_NAMES)\n",
+    "\n",
+    "# keep only N_EVALUATE sections to score\n",
+    "test_subset = random.sample(list(test_loader), N_EVALUATE)\n",
+    "\n",
+    "results = list()\n",
+    "running_metrics_split = runningScore(n_classes)\n",
+    "\n",
+    "# testing mode\n",
+    "with torch.no_grad():\n",
+    "    model.eval()\n",
+    "    # loop over testing data\n",
+    "    for i, (images, labels) in enumerate(test_subset):\n",
+    "        logger.info(f\"split: {split}, section: {i}\")\n",
+    "        outputs = _patch_label_2d(\n",
+    "            model,\n",
+    "            images,\n",
+    "            pre_processing,\n",
+    "            output_processing,\n",
+    "            config.TRAIN.PATCH_SIZE,\n",
+    "            config.TEST.TEST_STRIDE,\n",
+    "            config.VALIDATION.BATCH_SIZE_PER_GPU,\n",
+    "            device,\n",
+    "            n_classes,\n",
+    "        )\n",
+    "\n",
+    "        pred = outputs.detach().max(1)[1].numpy()\n",
+    "        gt = labels.numpy()\n",
+    "        \n",
+    "        # update evaluation metrics\n",
+    "        running_metrics_split.update(gt, pred)\n",
+    "        \n",
+    "        # keep ground truth and result for plotting\n",
+    "        results.append((np.squeeze(gt), np.squeeze(pred)))\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's view the obtained metrics on this subset of test images. Note that we trained our model for for a small number of epochs, for demonstration purposes, so the performance results here are not meant to be representative. \n",
+    "\n",
+    "The performance exceed the ones shown here when the models are trained properly. For the full report on benchmarking performance results, please refer to the [README.md](../../../README.md) file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get scores\n",
+    "score, _ = running_metrics_split.get_scores()\n",
+    "\n",
+    "# Log split results\n",
+    "print(f'Pixel Acc: {score[\"Pixel Acc: \"]:.3f}')\n",
+    "for cdx, class_name in enumerate(CLASS_NAMES):\n",
+    "    print(f'  {class_name}_accuracy {score[\"Class Accuracy: \"][cdx]:.3f}')\n",
+    "\n",
+    "print(f'Mean Class Acc: {score[\"Mean Class Acc: \"]:.3f}')\n",
+    "print(f'Freq Weighted IoU: {score[\"Freq Weighted IoU: \"]:.3f}')\n",
+    "print(f'Mean IoU: {score[\"Mean IoU: \"]:0.3f}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Visualize predictions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's visualize the predictions on entire test sections. Note that the crosslines and inlines have different dimensions, however we were able to use them jointly for our network training and evaluation, since we were using smaller patches from the sections, whose size we can control via hyperparameter in the experiment configuration file. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "fig = plt.figure(figsize=(15,50))\n",
+    "\n",
+    "nplot = min(N_EVALUATE, 10)\n",
+    "for idx in range(nplot):\n",
+    "    # plot actual\n",
+    "    plt.subplot(nplot, 2, 2*(idx+1)-1)\n",
+    "    plt.imshow(results[idx][0])\n",
+    "    # plot predicted\n",
+    "    plt.subplot(nplot, 2, 2*(idx+1))\n",
+    "    plt.imshow(results[idx][1])\n",
+    "\n",
+    "f_axes = fig.axes\n",
+    "_ = f_axes[0].set_title('Actual')\n",
+    "_ = f_axes[1].set_title('Predicted')    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "seismic-interpretation",
+   "language": "python",
+   "name": "seismic-interpretation"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/experiments/interpretation/dutchf3_patch/distributed/train.py b/experiments/interpretation/dutchf3_patch/distributed/train.py
index ae0c17f7..3eab5dfb 100644
--- a/experiments/interpretation/dutchf3_patch/distributed/train.py
+++ b/experiments/interpretation/dutchf3_patch/distributed/train.py
@@ -1,6 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-# /* spell-checker: disable */
+
 """Train models on Dutch F3 salt dataset
 
 Trains models using PyTorch DistributedDataParallel
@@ -16,13 +16,7 @@
 import fire
 import numpy as np
 import torch
-from albumentations import (
-    Compose,
-    HorizontalFlip,
-    Normalize,
-    Resize,
-    PadIfNeeded,
-)
+from albumentations import Compose, HorizontalFlip, Normalize, Resize, PadIfNeeded
 from cv_lib.event_handlers import (
     SnapshotHandler,
     logging_handlers,
@@ -35,15 +29,21 @@
 )
 from cv_lib.segmentation import models
 from cv_lib.segmentation import extract_metric_from
-from deepseismic_interpretation.dutchf3.data import (
-    get_patch_loader,
-    decode_segmap,
-)
+from deepseismic_interpretation.dutchf3.data import get_patch_loader, decode_segmap
 from cv_lib.segmentation.dutchf3.engine import (
     create_supervised_evaluator,
     create_supervised_trainer,
 )
-from cv_lib.segmentation.dutchf3.metrics import apex
+
+from ignite.metrics import Loss
+from cv_lib.segmentation.metrics import (
+    pixelwise_accuracy,
+    class_accuracy,
+    mean_class_accuracy,
+    class_iou,
+    mean_iou,
+)
+
 from cv_lib.segmentation.dutchf3.utils import (
     current_datetime,
     generate_path,
@@ -82,12 +82,13 @@ def run(*options, cfg=None, local_rank=0):
 
     Notes:
         Options can be passed in via the options argument and loaded from the cfg file
-        Options loaded from default.py will be overridden by options loaded from cfg file
-        Options passed in through options argument will override option loaded from cfg file
-    
+        Options from default.py will be overridden by options loaded from cfg file
+        Options passed in via options argument will override option loaded from cfg file
+
     Args:
-        *options (str,int ,optional): Options used to overide what is loaded from the config. 
-                                      To see what options are available consult default.py
+        *options (str,int ,optional): Options used to overide what is loaded from the
+                                      config. To see what options are available consult
+                                      default.py
         cfg (str, optional): Location of config file to load. Defaults to None.
     """
     update_config(config, options=options, config_file=cfg)
@@ -99,11 +100,11 @@ def run(*options, cfg=None, local_rank=0):
     distributed = world_size > 1
 
     if distributed:
-        # FOR DISTRIBUTED:  Set the device according to local_rank.
+        # FOR DISTRIBUTED: Set the device according to local_rank.
         torch.cuda.set_device(local_rank)
 
-        # FOR DISTRIBUTED:  Initialize the backend.  torch.distributed.launch will provide
-        # environment variables, and requires that you use init_method=`env://`.
+        # FOR DISTRIBUTED: Initialize the backend. torch.distributed.launch will
+        # provide environment variables, and requires that you use init_method=`env://`.
         torch.distributed.init_process_group(backend="nccl", init_method="env://")
 
     scheduler_step = config.TRAIN.END_EPOCH // config.TRAIN.SNAPSHOTS
@@ -117,7 +118,7 @@ def run(*options, cfg=None, local_rank=0):
     basic_aug = Compose(
         [
             Normalize(
-                mean=(config.TRAIN.MEAN,), std=(config.TRAIN.STD,), max_pixel_value=1,
+                mean=(config.TRAIN.MEAN,), std=(config.TRAIN.STD,), max_pixel_value=1
             ),
             Resize(
                 config.TRAIN.AUGMENTATIONS.RESIZE.HEIGHT,
@@ -134,7 +135,7 @@ def run(*options, cfg=None, local_rank=0):
         ]
     )
     if config.TRAIN.AUGMENTATION:
-        train_aug = Compose([basic_aug, HorizontalFlip(p=0.5),])
+        train_aug = Compose([basic_aug, HorizontalFlip(p=0.5)])
         val_aug = basic_aug
     else:
         train_aug = val_aug = basic_aug
@@ -198,7 +199,8 @@ def run(*options, cfg=None, local_rank=0):
         weight_decay=config.TRAIN.WEIGHT_DECAY,
     )
 
-    # weights are inversely proportional to the frequency of the classes in the training set
+    # weights are inversely proportional to the frequency of the classes in
+    # the training set
     class_weights = torch.tensor(
         config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False
     )
@@ -229,7 +231,7 @@ def run(*options, cfg=None, local_rank=0):
     )
 
     scheduler = ConcatScheduler(
-        schedulers=[warmup_scheduler, cosine_scheduler], durations=[warmup_duration],
+        schedulers=[warmup_scheduler, cosine_scheduler], durations=[warmup_duration]
     )
 
     trainer = create_supervised_trainer(
@@ -237,39 +239,37 @@ def run(*options, cfg=None, local_rank=0):
     )
 
     trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)
-    # Set to update the epoch parameter of our distributed data sampler so that we get different shuffles
+    # Set to update the epoch parameter of our distributed data sampler so that we get
+    # different shuffles
     trainer.add_event_handler(Events.EPOCH_STARTED, update_sampler_epoch(train_loader))
 
     if silence_other_ranks & local_rank != 0:
         logging.getLogger("ignite.engine.engine.Engine").setLevel(logging.WARNING)
 
     def _select_pred_and_mask(model_out_dict):
-        return (
-            model_out_dict["y_pred"].squeeze(),
-            model_out_dict["mask"].squeeze(),
-        )
+        return (model_out_dict["y_pred"].squeeze(), model_out_dict["mask"].squeeze())
 
     evaluator = create_supervised_evaluator(
         model,
         prepare_batch,
         metrics={
-            "IoU": apex.MeanIoU(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "nll": Loss(
+                criterion, output_transform=_select_pred_and_mask, device=device
             ),
-            "nll": apex.LossMetric(
-                criterion,
-                world_size,
-                config.VALIDATION.BATCH_SIZE_PER_GPU,
-                output_transform=_select_pred_and_mask,
+            "pixa": pixelwise_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "mca": apex.MeanClassAccuracy(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "cacc": class_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "fiou": apex.FrequencyWeightedIoU(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "mca": mean_class_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "pixa": apex.PixelwiseAccuracy(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "ciou": class_iou(
+                n_classes, output_transform=_select_pred_and_mask, device=device
+            ),
+            "mIoU": mean_iou(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
         },
         device=device,
@@ -312,11 +312,10 @@ def _select_pred_and_mask(model_out_dict):
             logging_handlers.log_metrics(
                 "Validation results",
                 metrics_dict={
-                    "IoU": "IoU :",
                     "nll": "Avg loss :",
+                    "mIoU": " Avg IoU :",
                     "pixa": "Pixelwise Accuracy :",
                     "mca": "Mean Class Accuracy :",
-                    "fiou": "Freq Weighted IoU :",
                 },
             ),
         )
@@ -327,10 +326,9 @@ def _select_pred_and_mask(model_out_dict):
                 trainer,
                 "epoch",
                 metrics_dict={
-                    "IoU": "Validation/IoU",
+                    "mIoU": "Validation/IoU",
                     "nll": "Validation/Loss",
                     "mca": "Validation/MCA",
-                    "fiou": "Validation/FIoU",
                 },
             ),
         )
@@ -354,10 +352,7 @@ def _tensor_to_numpy(pred_tensor):
         evaluator.add_event_handler(
             Events.EPOCH_COMPLETED,
             create_image_writer(
-                summary_writer,
-                "Validation/Mask",
-                "mask",
-                transform_func=transform_func,
+                summary_writer, "Validation/Mask", "mask", transform_func=transform_func
             ),
         )
         evaluator.add_event_handler(
@@ -376,7 +371,7 @@ def snapshot_function():
         checkpoint_handler = SnapshotHandler(
             path.join(output_dir, config.TRAIN.MODEL_DIR),
             config.MODEL.NAME,
-            extract_metric_from("fiou"),
+            extract_metric_from("mIoU"),
             snapshot_function,
         )
         evaluator.add_event_handler(
diff --git a/experiments/interpretation/dutchf3_patch/local/train.py b/experiments/interpretation/dutchf3_patch/local/train.py
index 4b1fb17f..7c43bc91 100644
--- a/experiments/interpretation/dutchf3_patch/local/train.py
+++ b/experiments/interpretation/dutchf3_patch/local/train.py
@@ -1,5 +1,5 @@
-# Copyright (c) Microsoft Corporation. All rights reserved. 
-# Licensed under the MIT License.  
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
 # /* spell-checker: disable */
 
 import logging
@@ -10,8 +10,7 @@
 import fire
 import numpy as np
 import torch
-from albumentations import (Compose, HorizontalFlip, Normalize, PadIfNeeded,
-                            Resize)
+from albumentations import Compose, HorizontalFlip, Normalize, PadIfNeeded, Resize
 from ignite.contrib.handlers import CosineAnnealingScheduler
 from ignite.engine import Events
 from ignite.metrics import Loss
@@ -20,25 +19,41 @@
 from torch.utils import data
 
 from deepseismic_interpretation.dutchf3.data import get_patch_loader, decode_segmap
-from cv_lib.event_handlers import (SnapshotHandler, logging_handlers,
-                                   tensorboard_handlers)
+from cv_lib.event_handlers import (
+    SnapshotHandler,
+    logging_handlers,
+    tensorboard_handlers,
+)
 from cv_lib.event_handlers.logging_handlers import Evaluator
-from cv_lib.event_handlers.tensorboard_handlers import (create_image_writer,
-                                                        create_summary_writer)
-from cv_lib.segmentation import models
-from cv_lib.segmentation.dutchf3.engine import (create_supervised_evaluator,
-                                                create_supervised_trainer)
-from cv_lib.segmentation.dutchf3.metrics import (FrequencyWeightedIoU,
-                                                 MeanClassAccuracy, MeanIoU,
-                                                 PixelwiseAccuracy)
-from cv_lib.segmentation.dutchf3.utils import (current_datetime, generate_path,
-                                               git_branch, git_hash, np_to_tb)
+from cv_lib.event_handlers.tensorboard_handlers import (
+    create_image_writer,
+    create_summary_writer,
+)
+from cv_lib.segmentation import models, extract_metric_from
+from cv_lib.segmentation.dutchf3.engine import (
+    create_supervised_evaluator,
+    create_supervised_trainer,
+)
+
+from cv_lib.segmentation.metrics import (
+    pixelwise_accuracy,
+    class_accuracy,
+    mean_class_accuracy,
+    class_iou,
+    mean_iou,
+)
+
+from cv_lib.segmentation.dutchf3.utils import (
+    current_datetime,
+    generate_path,
+    git_branch,
+    git_hash,
+    np_to_tb,
+)
 
 from default import _C as config
 from default import update_config
 
-from cv_lib.segmentation import extract_metric_from
-
 
 def prepare_batch(batch, device=None, non_blocking=False):
     x, y = batch
@@ -53,12 +68,13 @@ def run(*options, cfg=None):
 
     Notes:
         Options can be passed in via the options argument and loaded from the cfg file
-        Options loaded from default.py will be overridden by options loaded from cfg file
-        Options passed in through options argument will override option loaded from cfg file
+        Options from default.py will be overridden by options loaded from cfg file
+        Options passed in via options argument will override option loaded from cfg file
     
     Args:
-        *options (str,int ,optional): Options used to overide what is loaded from the config. 
-                                      To see what options are available consult default.py
+        *options (str,int ,optional): Options used to overide what is loaded from the
+                                      config. To see what options are available consult
+                                      default.py
         cfg (str, optional): Location of config file to load. Defaults to None.
     """
 
@@ -78,9 +94,7 @@ def run(*options, cfg=None):
     basic_aug = Compose(
         [
             Normalize(
-                mean=(config.TRAIN.MEAN,),
-                std=(config.TRAIN.STD,),
-                max_pixel_value=1,
+                mean=(config.TRAIN.MEAN,), std=(config.TRAIN.STD,), max_pixel_value=1
             ),
             Resize(
                 config.TRAIN.AUGMENTATIONS.RESIZE.HEIGHT,
@@ -97,12 +111,7 @@ def run(*options, cfg=None):
         ]
     )
     if config.TRAIN.AUGMENTATION:
-        train_aug = Compose(
-            [
-                basic_aug,
-                HorizontalFlip(p=0.5),
-            ]
-        )
+        train_aug = Compose([basic_aug, HorizontalFlip(p=0.5)])
         val_aug = basic_aug
     else:
         train_aug = val_aug = basic_aug
@@ -148,7 +157,6 @@ def run(*options, cfg=None):
         device = "cuda"
     model = model.to(device)  # Send to GPU
 
-
     optimizer = torch.optim.SGD(
         model.parameters(),
         lr=config.TRAIN.MAX_LR,
@@ -163,19 +171,18 @@ def run(*options, cfg=None):
         config.MODEL.NAME,
         current_datetime(),
     )
+
     summary_writer = create_summary_writer(
         log_dir=path.join(output_dir, config.LOG_DIR)
     )
+
     snapshot_duration = scheduler_step * len(train_loader)
     scheduler = CosineAnnealingScheduler(
-        optimizer,
-        "lr",
-        config.TRAIN.MAX_LR,
-        config.TRAIN.MIN_LR,
-        snapshot_duration,
+        optimizer, "lr", config.TRAIN.MAX_LR, config.TRAIN.MIN_LR, snapshot_duration
     )
 
-    # weights are inversely proportional to the frequency of the classes in the training set
+    # weights are inversely proportional to the frequency of the classes in the
+    # training set
     class_weights = torch.tensor(
         config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False
     )
@@ -194,9 +201,7 @@ def run(*options, cfg=None):
         Events.ITERATION_COMPLETED,
         logging_handlers.log_training_output(log_interval=config.PRINT_FREQ),
     )
-    trainer.add_event_handler(
-        Events.EPOCH_STARTED, logging_handlers.log_lr(optimizer)
-    )
+    trainer.add_event_handler(Events.EPOCH_STARTED, logging_handlers.log_lr(optimizer))
     trainer.add_event_handler(
         Events.EPOCH_STARTED,
         tensorboard_handlers.log_lr(summary_writer, optimizer, "epoch"),
@@ -207,50 +212,42 @@ def run(*options, cfg=None):
     )
 
     def _select_pred_and_mask(model_out_dict):
-        return (
-            model_out_dict["y_pred"].squeeze(),
-            model_out_dict["mask"].squeeze(),
-        )
+        return (model_out_dict["y_pred"].squeeze(), model_out_dict["mask"].squeeze())
 
     evaluator = create_supervised_evaluator(
         model,
         prepare_batch,
         metrics={
-            "IoU": MeanIoU(
-                n_classes, device, output_transform=_select_pred_and_mask
-            ),
             "nll": Loss(criterion, output_transform=_select_pred_and_mask),
-            "mca": MeanClassAccuracy(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "pixacc": pixelwise_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "fiou": FrequencyWeightedIoU(
-                n_classes, device, output_transform=_select_pred_and_mask
-            ),
-            "pixa": PixelwiseAccuracy(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "cacc": class_accuracy(n_classes, output_transform=_select_pred_and_mask),
+            "mca": mean_class_accuracy(
+                n_classes, output_transform=_select_pred_and_mask
             ),
+            "ciou": class_iou(n_classes, output_transform=_select_pred_and_mask),
+            "mIoU": mean_iou(n_classes, output_transform=_select_pred_and_mask),
         },
         device=device,
     )
 
     # Set the validation run to start on the epoch completion of the training run
-    trainer.add_event_handler(
-        Events.EPOCH_COMPLETED, Evaluator(evaluator, val_loader)
-    )
+    trainer.add_event_handler(Events.EPOCH_COMPLETED, Evaluator(evaluator, val_loader))
 
     evaluator.add_event_handler(
         Events.EPOCH_COMPLETED,
         logging_handlers.log_metrics(
             "Validation results",
             metrics_dict={
-                "IoU": "IoU :",
                 "nll": "Avg loss :",
-                "pixa": "Pixelwise Accuracy :",
-                "mca": "Mean Class Accuracy :",
-                "fiou": "Freq Weighted IoU :",
+                "pixacc": "Pixelwise Accuracy :",
+                "mca": "Avg Class Accuracy :",
+                "mIoU": "Avg Class IoU :",
             },
         ),
     )
+
     evaluator.add_event_handler(
         Events.EPOCH_COMPLETED,
         tensorboard_handlers.log_metrics(
@@ -258,10 +255,10 @@ def _select_pred_and_mask(model_out_dict):
             trainer,
             "epoch",
             metrics_dict={
-                "IoU": "Validation/IoU",
+                "mIoU": "Validation/mIoU",
                 "nll": "Validation/Loss",
                 "mca": "Validation/MCA",
-                "fiou": "Validation/FIoU",
+                "pixacc": "Validation/Pixel_Acc",
             },
         ),
     )
@@ -285,19 +282,13 @@ def _tensor_to_numpy(pred_tensor):
     evaluator.add_event_handler(
         Events.EPOCH_COMPLETED,
         create_image_writer(
-            summary_writer,
-            "Validation/Mask",
-            "mask",
-            transform_func=transform_func,
+            summary_writer, "Validation/Mask", "mask", transform_func=transform_func
         ),
     )
     evaluator.add_event_handler(
         Events.EPOCH_COMPLETED,
         create_image_writer(
-            summary_writer,
-            "Validation/Pred",
-            "y_pred",
-            transform_func=transform_pred,
+            summary_writer, "Validation/Pred", "y_pred", transform_func=transform_pred
         ),
     )
 
@@ -307,7 +298,7 @@ def snapshot_function():
     checkpoint_handler = SnapshotHandler(
         path.join(output_dir, config.TRAIN.MODEL_DIR),
         config.MODEL.NAME,
-        extract_metric_from("fiou"),
+        extract_metric_from("mIoU"),
         snapshot_function,
     )
     evaluator.add_event_handler(
diff --git a/experiments/interpretation/dutchf3_section/local/train.py b/experiments/interpretation/dutchf3_section/local/train.py
index 04db2b12..6d99c463 100644
--- a/experiments/interpretation/dutchf3_section/local/train.py
+++ b/experiments/interpretation/dutchf3_section/local/train.py
@@ -1,4 +1,4 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
+# Copyright (c) Microsoft Corporation.
 # # Licensed under the MIT License.
 # # /* spell-checker: disable */
 
@@ -10,10 +10,7 @@
 import numpy as np
 import torch
 from albumentations import Compose, HorizontalFlip, Normalize
-from deepseismic_interpretation.dutchf3.data import (
-    decode_segmap,
-    get_section_loader,
-)
+from deepseismic_interpretation.dutchf3.data import decode_segmap, get_section_loader
 from cv_lib.event_handlers import (
     SnapshotHandler,
     logging_handlers,
@@ -24,16 +21,17 @@
     create_image_writer,
     create_summary_writer,
 )
-from cv_lib.segmentation import models
+from cv_lib.segmentation import models, extract_metric_from
 from cv_lib.segmentation.dutchf3.engine import (
     create_supervised_evaluator,
     create_supervised_trainer,
 )
-from cv_lib.segmentation.dutchf3.metrics import (
-    FrequencyWeightedIoU,
-    MeanClassAccuracy,
-    MeanIoU,
-    PixelwiseAccuracy,
+from cv_lib.segmentation.metrics import (
+    pixelwise_accuracy,
+    class_accuracy,
+    mean_class_accuracy,
+    class_iou,
+    mean_iou,
 )
 from cv_lib.segmentation.dutchf3.utils import (
     current_datetime,
@@ -46,13 +44,11 @@
 from default import update_config
 from ignite.contrib.handlers import CosineAnnealingScheduler
 from ignite.engine import Events
-from ignite.metrics import Loss
 from ignite.utils import convert_tensor
+from ignite.metrics import Loss
 from toolz import compose
 from torch.utils import data
 
-from cv_lib.segmentation import extract_metric_from
-
 
 def prepare_batch(batch, device="cuda", non_blocking=False):
     x, y = batch
@@ -67,12 +63,13 @@ def run(*options, cfg=None):
 
     Notes:
         Options can be passed in via the options argument and loaded from the cfg file
-        Options loaded from default.py will be overridden by options loaded from cfg file
-        Options passed in through options argument will override option loaded from cfg file
+        Options from default.py will be overridden by options loaded from cfg file
+        Options passed in via options argument will override option loaded from cfg file
 
     Args:
-        *options (str,int ,optional): Options used to overide what is loaded from the config.
-                                      To see what options are available consult default.py
+        *options (str,int ,optional): Options used to overide what is loaded from the
+                                      config. To see what options are available consult
+                                      default.py
         cfg (str, optional): Location of config file to load. Defaults to None.
     """
 
@@ -92,9 +89,7 @@ def run(*options, cfg=None):
     basic_aug = Compose(
         [
             Normalize(
-                mean=(config.TRAIN.MEAN,),
-                std=(config.TRAIN.STD,),
-                max_pixel_value=1,
+                mean=(config.TRAIN.MEAN,), std=(config.TRAIN.STD,), max_pixel_value=1
             )
         ]
     )
@@ -127,9 +122,7 @@ def __init__(self, data_source):
         def __iter__(self):
             char = ["i" if np.random.randint(2) == 1 else "x"]
             self.indices = [
-                idx
-                for (idx, name) in enumerate(self.data_source)
-                if char[0] in name
+                idx for (idx, name) in enumerate(self.data_source) if char[0] in name
             ]
             return (self.indices[i] for i in torch.randperm(len(self.indices)))
 
@@ -184,14 +177,11 @@ def __len__(self):
 
     snapshot_duration = scheduler_step * len(train_loader)
     scheduler = CosineAnnealingScheduler(
-        optimizer,
-        "lr",
-        config.TRAIN.MAX_LR,
-        config.TRAIN.MIN_LR,
-        snapshot_duration,
+        optimizer, "lr", config.TRAIN.MAX_LR, config.TRAIN.MIN_LR, snapshot_duration
     )
 
-    # weights are inversely proportional to the frequency of the classes in the training set
+    # weights are inversely proportional to the frequency of the classes in
+    # the training set
     class_weights = torch.tensor(
         config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False
     )
@@ -211,9 +201,7 @@ def __len__(self):
         logging_handlers.log_training_output(log_interval=config.PRINT_FREQ),
     )
 
-    trainer.add_event_handler(
-        Events.EPOCH_STARTED, logging_handlers.log_lr(optimizer)
-    )
+    trainer.add_event_handler(Events.EPOCH_STARTED, logging_handlers.log_lr(optimizer))
 
     trainer.add_event_handler(
         Events.EPOCH_STARTED,
@@ -226,51 +214,58 @@ def __len__(self):
     )
 
     def _select_pred_and_mask(model_out_dict):
-        return (
-            model_out_dict["y_pred"].squeeze(),
-            model_out_dict["mask"].squeeze(),
-        )
+        return (model_out_dict["y_pred"].squeeze(), model_out_dict["mask"].squeeze())
 
     evaluator = create_supervised_evaluator(
         model,
         prepare_batch,
         metrics={
-            "IoU": MeanIoU(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "nll": Loss(
+                criterion, output_transform=_select_pred_and_mask, device=device
+            ),
+            "pixacc": pixelwise_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "nll": Loss(criterion, output_transform=_select_pred_and_mask),
-            "mca": MeanClassAccuracy(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "cacc": class_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "fiou": FrequencyWeightedIoU(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "mca": mean_class_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "pixa": PixelwiseAccuracy(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "ciou": class_iou(
+                n_classes, output_transform=_select_pred_and_mask, device=device
+            ),
+            "mIoU": mean_iou(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
         },
         device=device,
     )
 
     # Set the validation run to start on the epoch completion of the training run
-    trainer.add_event_handler(
-        Events.EPOCH_COMPLETED, Evaluator(evaluator, val_loader)
-    )
+    trainer.add_event_handler(Events.EPOCH_COMPLETED, Evaluator(evaluator, val_loader))
 
     evaluator.add_event_handler(
         Events.EPOCH_COMPLETED,
         logging_handlers.log_metrics(
             "Validation results",
             metrics_dict={
-                "IoU": "IoU :",
                 "nll": "Avg loss :",
-                "pixa": "Pixelwise Accuracy :",
-                "mca": "Mean Class Accuracy :",
-                "fiou": "Freq Weighted IoU :",
+                "pixacc": "Pixelwise Accuracy :",
+                "mca": "Avg Class Accuracy :",
+                "mIoU": "Avg Class IoU :",
             },
         ),
     )
 
+    evaluator.add_event_handler(
+        Events.EPOCH_COMPLETED,
+        logging_handlers.log_class_metrics(
+            "Per class validation results",
+            metrics_dict={"ciou": "Class IoU :", "cacc": "Class Accuracy :"},
+        ),
+    )
+
     evaluator.add_event_handler(
         Events.EPOCH_COMPLETED,
         tensorboard_handlers.log_metrics(
@@ -278,10 +273,10 @@ def _select_pred_and_mask(model_out_dict):
             trainer,
             "epoch",
             metrics_dict={
-                "IoU": "Validation/IoU",
+                "mIoU": "Validation/mIoU",
                 "nll": "Validation/Loss",
                 "mca": "Validation/MCA",
-                "fiou": "Validation/FIoU",
+                "pixacc": "Validation/Pixel_Acc",
             },
         ),
     )
@@ -306,20 +301,14 @@ def _tensor_to_numpy(pred_tensor):
     evaluator.add_event_handler(
         Events.EPOCH_COMPLETED,
         create_image_writer(
-            summary_writer,
-            "Validation/Mask",
-            "mask",
-            transform_func=transform_func,
+            summary_writer, "Validation/Mask", "mask", transform_func=transform_func
         ),
     )
 
     evaluator.add_event_handler(
         Events.EPOCH_COMPLETED,
         create_image_writer(
-            summary_writer,
-            "Validation/Pred",
-            "y_pred",
-            transform_func=transform_pred,
+            summary_writer, "Validation/Pred", "y_pred", transform_func=transform_pred
         ),
     )
 
@@ -329,7 +318,7 @@ def snapshot_function():
     checkpoint_handler = SnapshotHandler(
         path.join(output_dir, config.TRAIN.MODEL_DIR),
         config.MODEL.NAME,
-        extract_metric_from("fiou"),
+        extract_metric_from("mIoU"),
         snapshot_function,
     )
 
diff --git a/experiments/interpretation/dutchf3_voxel/train.py b/experiments/interpretation/dutchf3_voxel/train.py
index 7ea63254..71ae42f3 100644
--- a/experiments/interpretation/dutchf3_voxel/train.py
+++ b/experiments/interpretation/dutchf3_voxel/train.py
@@ -12,74 +12,55 @@
 from torch.utils import data
 from ignite.engine import Events
 from ignite.handlers import ModelCheckpoint
-from ignite.metrics import Accuracy, Loss
-# TODO: get mertircs from Ignite
-# from ignite.metrics import MIoU, MeanClassAccuracy, FrequencyWeightedIoU, PixelwiseAccuracy
+from ignite.metrics import Loss
 from ignite.utils import convert_tensor
-from ignite.engine.engine import Engine
-from toolz import compose, curry
 from tqdm import tqdm
 
-from deepseismic_interpretation.dutchf3.data import (
-    get_voxel_loader,
-    decode_segmap,
-)
+from deepseismic_interpretation.dutchf3.data import get_voxel_loader
+from deepseismic_interpretation.models.texture_net import TextureNet
+
 from cv_lib.event_handlers import (
     SnapshotHandler,
     logging_handlers,
     tensorboard_handlers,
 )
 from cv_lib.event_handlers.logging_handlers import Evaluator
-from cv_lib.event_handlers.tensorboard_handlers import (
-    create_image_writer,
-    create_summary_writer,
+from cv_lib.event_handlers.tensorboard_handlers import create_summary_writer
+
+# TODO: replace with Ignite metrics
+from cv_lib.segmentation.metrics import (
+    pixelwise_accuracy,
+    class_accuracy,
+    mean_class_accuracy,
+    class_iou,
+    mean_iou,
 )
-from cv_lib.segmentation import models
+from cv_lib.segmentation import extract_metric_from
 
 # from cv_lib.segmentation.dutchf3.engine import (
 #    create_supervised_evaluator,
 #    create_supervised_trainer,
 # )
 # Use ignite generic versions for now
-from ignite.engine import (
-    create_supervised_trainer,
-    create_supervised_evaluator,
-)
-
-# TODO: replace with Ignite metrics
-from cv_lib.segmentation.dutchf3.metrics import (
-    FrequencyWeightedIoU,
-    MeanClassAccuracy,
-    MeanIoU,
-    PixelwiseAccuracy,
-)
-
-from cv_lib.segmentation.dutchf3.utils import (
-    current_datetime,
-    generate_path,
-    git_branch,
-    git_hash,
-    np_to_tb,
-)
-
-from interpretation.deepseismic_interpretation.models.texture_net import (
-    TextureNet,
-)
+from ignite.engine import create_supervised_trainer, create_supervised_evaluator
 
 from default import _C as config
 from default import update_config
 
 
-def _prepare_batch(
-    batch, device=None, non_blocking=False, t_type=torch.FloatTensor
-):
+def _prepare_batch(batch, device=None, non_blocking=False, t_type=torch.FloatTensor):
     x, y = batch
     new_x = convert_tensor(
         torch.squeeze(x, 1), device=device, non_blocking=non_blocking
     )
-    new_y = convert_tensor(torch.unsqueeze(y, 2), device=device, non_blocking=non_blocking)
+    new_y = convert_tensor(
+        torch.unsqueeze(y, 2), device=device, non_blocking=non_blocking
+    )
     if device == "cuda":
-        return new_x.type(t_type).cuda(), torch.unsqueeze(new_y, 3).type(torch.LongTensor).cuda()
+        return (
+            new_x.type(t_type).cuda(),
+            torch.unsqueeze(new_y, 3).type(torch.LongTensor).cuda(),
+        )
     else:
         return new_x.type(t_type), torch.unsqueeze(new_y, 3).type(torch.LongTensor)
 
@@ -89,12 +70,13 @@ def run(*options, cfg=None):
 
     Notes:
         Options can be passed in via the options argument and loaded from the cfg file
-        Options loaded from default.py will be overridden by options loaded from cfg file
-        Options passed in through options argument will override option loaded from cfg file
+        Options from default.py will be overridden by options loaded from cfg file
+        Options passed in via options argument will override option loaded from cfg file
     
     Args:
-        *options (str,int ,optional): Options used to overide what is loaded from the config. 
-                                      To see what options are available consult default.py
+        *options (str,int ,optional): Options used to overide what is loaded from the
+                                      config. To see what options are available consult
+                                      default.py
         cfg (str, optional): Location of config file to load. Defaults to None.
     """
 
@@ -117,7 +99,7 @@ def run(*options, cfg=None):
         config.DATASET.FILENAME,
         split="train",
         window_size=config.WINDOW_SIZE,
-        len=config.TRAIN.BATCH_SIZE_PER_GPU*config.TRAIN.BATCH_PER_EPOCH,
+        len=config.TRAIN.BATCH_SIZE_PER_GPU * config.TRAIN.BATCH_PER_EPOCH,
         batch_size=config.TRAIN.BATCH_SIZE_PER_GPU,
     )
     val_set = TrainVoxelLoader(
@@ -125,28 +107,29 @@ def run(*options, cfg=None):
         config.DATASET.FILENAME,
         split="val",
         window_size=config.WINDOW_SIZE,
-        len=config.TRAIN.BATCH_SIZE_PER_GPU*config.TRAIN.BATCH_PER_EPOCH,
+        len=config.TRAIN.BATCH_SIZE_PER_GPU * config.TRAIN.BATCH_PER_EPOCH,
         batch_size=config.TRAIN.BATCH_SIZE_PER_GPU,
     )
 
     n_classes = train_set.n_classes
 
-    # set dataset length to batch size to be consistent with 5000 iterations each of size 
-    # 32 in the original Waldeland implementation
+    # set dataset length to batch size to be consistent with 5000 iterations
+    # each of size 32 in the original Waldeland implementation
     train_loader = data.DataLoader(
         train_set,
         batch_size=config.TRAIN.BATCH_SIZE_PER_GPU,
-        num_workers=config.WORKERS,        
-        shuffle=False        
+        num_workers=config.WORKERS,
+        shuffle=False,
     )
     val_loader = data.DataLoader(
         val_set,
         batch_size=config.VALIDATION.BATCH_SIZE_PER_GPU,
         num_workers=config.WORKERS,
-        shuffle=False      
+        shuffle=False,
     )
 
-    # this is how we import model for CV - here we're importing a seismic segmentation model    
+    # this is how we import model for CV - here we're importing a seismic
+    # segmentation model
     model = TextureNet(n_classes=config.DATASET.NUM_CLASSES)
 
     optimizer = torch.optim.Adam(
@@ -157,72 +140,74 @@ def run(*options, cfg=None):
     )
 
     device = "cpu"
-    log_interval = 10
+
     if torch.cuda.is_available():
         device = "cuda"
         model = model.cuda()
 
-    loss = torch.nn.CrossEntropyLoss()    
+    loss = torch.nn.CrossEntropyLoss()
 
     trainer = create_supervised_trainer(
-        model,
-        optimizer,
-        loss,
-        prepare_batch=_prepare_batch,
-        device=device,
+        model, optimizer, loss, prepare_batch=_prepare_batch, device=device
     )
 
     desc = "ITERATION - loss: {:.2f}"
-    pbar = tqdm(
-        initial=0, leave=False, total=len(train_loader), desc=desc.format(0)
-    )
+    pbar = tqdm(initial=0, leave=False, total=len(train_loader), desc=desc.format(0))
 
     # add model checkpointing
     output_dir = path.join(config.OUTPUT_DIR, config.TRAIN.MODEL_DIR)
     checkpoint_handler = ModelCheckpoint(
-        output_dir, "model", save_interval=1, n_saved=3, create_dir=True, require_empty=False)
+        output_dir,
+        "model",
+        save_interval=1,
+        n_saved=3,
+        create_dir=True,
+        require_empty=False,
+    )
 
     criterion = torch.nn.CrossEntropyLoss(reduction="mean")
-    
+
     # save model at each epoch
     trainer.add_event_handler(
         Events.EPOCH_COMPLETED, checkpoint_handler, {config.MODEL.NAME: model}
     )
-    
+
     def _select_pred_and_mask(model_out):
         # receive a tuple of (x, y_pred), y
-        # so actually in line 51 of cv_lib/cv_lib/segmentation/dutch_f3/metrics/__init__.py
-        # we do the following line, so here we just select the model        
-        #_, y_pred = torch.max(model_out[0].squeeze(), 1, keepdim=True)
+        # so actually in line 51 of
+        # cv_lib/cv_lib/segmentation/dutch_f3/metrics/__init__.py
+        # we do the following line, so here we just select the model
+        # _, y_pred = torch.max(model_out[0].squeeze(), 1, keepdim=True)
         y_pred = model_out[0].squeeze()
         y = model_out[1].squeeze()
-        return (y_pred.squeeze(), y,)
-    
+        return (y_pred.squeeze(), y)
+
     evaluator = create_supervised_evaluator(
-        model,        
+        model,
         metrics={
-            "IoU": MeanIoU(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "nll": Loss(criterion, device=device),
+            "pixa": pixelwise_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
+            ),
+            "cacc": class_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "nll": Loss(criterion),
-            "mca": MeanClassAccuracy(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "mca": mean_class_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "fiou": FrequencyWeightedIoU(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "ciou": class_iou(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "pixa": PixelwiseAccuracy(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "mIoU": mean_iou(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
         },
         device=device,
         prepare_batch=_prepare_batch,
-    )    
+    )
 
     # Set the validation run to start on the epoch completion of the training run
-    trainer.add_event_handler(
-        Events.EPOCH_COMPLETED, Evaluator(evaluator, val_loader)
-    )
+    trainer.add_event_handler(Events.EPOCH_COMPLETED, Evaluator(evaluator, val_loader))
 
     summary_writer = create_summary_writer(
         log_dir=path.join(output_dir, config.LOG_DIR)
@@ -233,11 +218,10 @@ def _select_pred_and_mask(model_out):
         logging_handlers.log_metrics(
             "Validation results",
             metrics_dict={
-                "IoU": "IoU :",
+                "mIoU": "Avg IoU :",
                 "nll": "Avg loss :",
                 "pixa": "Pixelwise Accuracy :",
                 "mca": "Mean Class Accuracy :",
-                "fiou": "Freq Weighted IoU :",
             },
         ),
     )
@@ -248,26 +232,26 @@ def _select_pred_and_mask(model_out):
             trainer,
             "epoch",
             metrics_dict={
-                "IoU": "Validation/IoU",
+                "mIoU": "Validation/IoU",
                 "nll": "Validation/Loss",
                 "mca": "Validation/MCA",
-                "fiou": "Validation/FIoU",
             },
         ),
     )
-    
+
     summary_writer = create_summary_writer(
         log_dir=path.join(output_dir, config.LOG_DIR)
-    )   
-    
+    )
+
+    snapshot_duration = 1
 
-    snapshot_duration=1
     def snapshot_function():
         return (trainer.state.iteration % snapshot_duration) == 0
 
     checkpoint_handler = SnapshotHandler(
         path.join(output_dir, config.TRAIN.MODEL_DIR),
         config.MODEL.NAME,
+        extract_metric_from("mIoU"),
         snapshot_function,
     )
     evaluator.add_event_handler(
@@ -275,9 +259,11 @@ def snapshot_function():
     )
 
     logger.info("Starting training")
-    trainer.run(train_loader, max_epochs=config.TRAIN.END_EPOCH//config.TRAIN.BATCH_PER_EPOCH)
+    trainer.run(
+        train_loader, max_epochs=config.TRAIN.END_EPOCH // config.TRAIN.BATCH_PER_EPOCH
+    )
     pbar.close()
 
+
 if __name__ == "__main__":
     fire.Fire(run)
-
diff --git a/experiments/interpretation/penobscot/local/configs/seresnet_unet.yaml b/experiments/interpretation/penobscot/local/configs/seresnet_unet.yaml
index 36b315a7..cb61b6b6 100644
--- a/experiments/interpretation/penobscot/local/configs/seresnet_unet.yaml
+++ b/experiments/interpretation/penobscot/local/configs/seresnet_unet.yaml
@@ -54,11 +54,11 @@ VALIDATION:
 
 TEST:
   COMPLETE_PATCHES_ONLY: False
-  MODEL_PATH: "/data/home/mat/repos/DeepSeismic/experiments/segmentation/penobscot/local/output/penobscot/437970c875226e7e39c8109c0de8d21c5e5d6e3b/seg_hrnet/Sep25_144942/models/seg_hrnet_running_model_28.pth"
+  MODEL_PATH: "/data/home/vapaunic/repos/DeepSeismic/experiments/interpretation/penobscot/local/output/vapaunic/metrics/4120aa99152b6e4f92f8134b783ac63c8131e1ed/resnet_unet/Nov05_105704/models/resnet_unet_running_model_1.pth"
   AUGMENTATIONS:
     RESIZE:
-      HEIGHT: 200
-      WIDTH: 200
+      HEIGHT: 256
+      WIDTH: 256
     PAD:
       HEIGHT: 256
       WIDTH: 256
diff --git a/experiments/interpretation/penobscot/local/test.py b/experiments/interpretation/penobscot/local/test.py
index 91899519..9eafa2c5 100644
--- a/experiments/interpretation/penobscot/local/test.py
+++ b/experiments/interpretation/penobscot/local/test.py
@@ -12,26 +12,19 @@
 import numpy as np
 import torch
 import torchvision
-from albumentations import (
-    Compose,
-    Normalize,
-    PadIfNeeded,
-    Resize,
-)
-from cv_lib.event_handlers import (
-    logging_handlers,
-    tensorboard_handlers,
-)
+from albumentations import Compose, Normalize, PadIfNeeded, Resize
+from cv_lib.event_handlers import logging_handlers, tensorboard_handlers
 from cv_lib.event_handlers.tensorboard_handlers import (
     create_image_writer,
     create_summary_writer,
 )
 from cv_lib.segmentation import models
-from cv_lib.segmentation.dutchf3.metrics import (
-    FrequencyWeightedIoU,
-    MeanClassAccuracy,
-    MeanIoU,
-    PixelwiseAccuracy,
+from cv_lib.segmentation.metrics import (
+    pixelwise_accuracy,
+    class_accuracy,
+    mean_class_accuracy,
+    class_iou,
+    mean_iou,
 )
 from cv_lib.segmentation.dutchf3.utils import (
     current_datetime,
@@ -40,13 +33,9 @@
     git_hash,
     np_to_tb,
 )
-from cv_lib.segmentation.penobscot.engine import (
-    create_supervised_evaluator,
-)
+from cv_lib.segmentation.penobscot.engine import create_supervised_evaluator
 from deepseismic_interpretation.dutchf3.data import decode_segmap
-from deepseismic_interpretation.penobscot.data import (
-    get_patch_dataset,
-)
+from deepseismic_interpretation.penobscot.data import get_patch_dataset
 from deepseismic_interpretation.penobscot.metrics import InlineMeanIoU
 from default import _C as config
 from default import update_config
@@ -70,12 +59,10 @@ def _prepare_batch(batch, device=None, non_blocking=False):
 
 def _padding_from(config):
     padding_height = (
-        config.TEST.AUGMENTATIONS.PAD.HEIGHT
-        - config.TEST.AUGMENTATIONS.RESIZE.HEIGHT
+        config.TEST.AUGMENTATIONS.PAD.HEIGHT - config.TEST.AUGMENTATIONS.RESIZE.HEIGHT
     )
     padding_width = (
-        config.TEST.AUGMENTATIONS.PAD.WIDTH
-        - config.TEST.AUGMENTATIONS.RESIZE.WIDTH
+        config.TEST.AUGMENTATIONS.PAD.WIDTH - config.TEST.AUGMENTATIONS.RESIZE.WIDTH
     )
     assert (
         padding_height == padding_width
@@ -84,19 +71,17 @@ def _padding_from(config):
 
 
 def _scale_from(config):
-    scale_height = (
-        config.TEST.AUGMENTATIONS.PAD.HEIGHT / config.TRAIN.PATCH_SIZE
-    )
+    scale_height = config.TEST.AUGMENTATIONS.PAD.HEIGHT / config.TRAIN.PATCH_SIZE
     scale_width = config.TEST.AUGMENTATIONS.PAD.WIDTH / config.TRAIN.PATCH_SIZE
     assert (
-        config.TEST.AUGMENTATIONS.PAD.HEIGHT % config.TRAIN.PATCH_SIZE != 0
-    ), "The scaling between the patch height and resized height needs to be whole number"
+        config.TEST.AUGMENTATIONS.PAD.HEIGHT % config.TRAIN.PATCH_SIZE == 0
+    ), "The scaling between the patch height and resized height must be whole number"
     assert (
-        config.TEST.AUGMENTATIONS.PAD.WIDTH % config.TRAIN.PATCH_SIZE != 0
-    ), "The scaling between the patch width and resized height needs to be whole number"
+        config.TEST.AUGMENTATIONS.PAD.WIDTH % config.TRAIN.PATCH_SIZE == 0
+    ), "The scaling between the patch width and resized height must be whole number"
     assert (
         scale_height == scale_width
-    ), "The scaling for the height and width need to be the same"
+    ), "The scaling for the height and width must be the same"
     return int(scale_height)
 
 
@@ -113,9 +98,7 @@ def _scale_from(config):
 )
 
 
-def _log_tensor_to_tensorboard(
-    images_tensor, identifier, summary_writer, evaluator
-):
+def _log_tensor_to_tensorboard(images_tensor, identifier, summary_writer, evaluator):
     image_grid = torchvision.utils.make_grid(
         images_tensor, normalize=False, scale_each=False, nrow=2
     )
@@ -132,12 +115,13 @@ def run(*options, cfg=None):
 
     Notes:
         Options can be passed in via the options argument and loaded from the cfg file
-        Options loaded from default.py will be overridden by options loaded from cfg file
-        Options passed in through options argument will override option loaded from cfg file
+        Options from default.py will be overridden by options loaded from cfg file
+        Options passed in via options argument will override option loaded from cfg file
     
     Args:
-        *options (str,int ,optional): Options used to overide what is loaded from the config. 
-                                      To see what options are available consult default.py
+        *options (str,int ,optional): Options used to overide what is loaded from the
+                                      config. To see what options are available consult
+                                      default.py
         cfg (str, optional): Location of config file to load. Defaults to None.
     """
 
@@ -145,7 +129,6 @@ def run(*options, cfg=None):
     logging.config.fileConfig(config.LOG_CONFIG)
     logger = logging.getLogger(__name__)
     logger.debug(config.WORKERS)
-    scheduler_step = config.TRAIN.END_EPOCH // config.TRAIN.SNAPSHOTS
     torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK
 
     torch.manual_seed(config.SEED)
@@ -226,7 +209,8 @@ def run(*options, cfg=None):
         log_dir=path.join(output_dir, config.LOG_DIR)
     )
 
-    # weights are inversely proportional to the frequency of the classes in the training set
+    # weights are inversely proportional to the frequency of the classes in
+    # the training set
     class_weights = torch.tensor(
         config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False
     )
@@ -236,10 +220,7 @@ def run(*options, cfg=None):
     )
 
     def _select_pred_and_mask(model_out_dict):
-        return (
-            model_out_dict["y_pred"].squeeze(),
-            model_out_dict["mask"].squeeze(),
-        )
+        return (model_out_dict["y_pred"].squeeze(), model_out_dict["mask"].squeeze())
 
     def _select_all(model_out_dict):
         return (
@@ -263,20 +244,25 @@ def _select_all(model_out_dict):
         model,
         _prepare_batch,
         metrics={
-            "IoU": MeanIoU(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "nll": Loss(
+                criterion, output_transform=_select_pred_and_mask, device=device
             ),
-            "nll": Loss(criterion, output_transform=_select_pred_and_mask),
-            "mca": MeanClassAccuracy(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "inIoU": inline_mean_iou,
+            "pixa": pixelwise_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "fiou": FrequencyWeightedIoU(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "cacc": class_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "pixa": PixelwiseAccuracy(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "mca": mean_class_accuracy(
+                n_classes, output_transform=_select_pred_and_mask, device=device
+            ),
+            "ciou": class_iou(
+                n_classes, output_transform=_select_pred_and_mask, device=device
+            ),
+            "mIoU": mean_iou(
+                n_classes, output_transform=_select_pred_and_mask, device=device
             ),
-            "inIoU": inline_mean_iou,
         },
         device=device,
     )
@@ -286,11 +272,10 @@ def _select_all(model_out_dict):
         logging_handlers.log_metrics(
             "Test results",
             metrics_dict={
-                "IoU": "IoU :",
                 "nll": "Avg loss :",
+                "mIoU": "Avg IoU :",
                 "pixa": "Pixelwise Accuracy :",
                 "mca": "Mean Class Accuracy :",
-                "fiou": "Freq Weighted IoU :",
                 "inIoU": "Mean Inline IoU :",
             },
         ),
@@ -302,10 +287,9 @@ def _select_all(model_out_dict):
             evaluator,
             "epoch",
             metrics_dict={
-                "IoU": "Test/IoU",
+                "mIoU": "Test/IoU",
                 "nll": "Test/Loss",
                 "mca": "Test/MCA",
-                "fiou": "Test/FIoU",
                 "inIoU": "Test/MeanInlineIoU",
             },
         ),
@@ -338,15 +322,12 @@ def _tensor_to_numpy(pred_tensor):
     evaluator.add_event_handler(
         Events.EPOCH_COMPLETED,
         create_image_writer(
-            summary_writer,
-            "Test/Pred",
-            "y_pred",
-            transform_func=transform_pred,
+            summary_writer, "Test/Pred", "y_pred", transform_func=transform_pred
         ),
     )
 
     logger.info("Starting training")
-    evaluator.run(take(10, test_loader), max_epochs=1)
+    evaluator.run(test_loader, max_epochs=1)
 
     # Log top N and bottom N inlines in terms of IoU to tensorboard
     inline_ious = inline_mean_iou.iou_per_inline()
diff --git a/experiments/interpretation/penobscot/local/train.py b/experiments/interpretation/penobscot/local/train.py
index 82168244..58dffef9 100644
--- a/experiments/interpretation/penobscot/local/train.py
+++ b/experiments/interpretation/penobscot/local/train.py
@@ -10,13 +10,7 @@
 import fire
 import numpy as np
 import torch
-from albumentations import (
-    Compose,
-    HorizontalFlip,
-    Normalize,
-    PadIfNeeded,
-    Resize,
-)
+from albumentations import Compose, HorizontalFlip, Normalize, PadIfNeeded, Resize
 from ignite.contrib.handlers import CosineAnnealingScheduler
 from ignite.engine import Events
 from ignite.metrics import Loss
@@ -25,9 +19,7 @@
 from torch.utils import data
 
 from deepseismic_interpretation.dutchf3.data import decode_segmap
-from deepseismic_interpretation.penobscot.data import (
-    get_patch_dataset,
-)
+from deepseismic_interpretation.penobscot.data import get_patch_dataset
 from cv_lib.event_handlers import (
     SnapshotHandler,
     logging_handlers,
@@ -38,16 +30,17 @@
     create_image_writer,
     create_summary_writer,
 )
-from cv_lib.segmentation import models
+from cv_lib.segmentation import models, extract_metric_from
 from cv_lib.segmentation.penobscot.engine import (
     create_supervised_evaluator,
     create_supervised_trainer,
 )
-from cv_lib.segmentation.dutchf3.metrics import (
-    FrequencyWeightedIoU,
-    MeanClassAccuracy,
-    MeanIoU,
-    PixelwiseAccuracy,
+from cv_lib.segmentation.metrics import (
+    pixelwise_accuracy,
+    class_accuracy,
+    mean_class_accuracy,
+    class_iou,
+    mean_iou,
 )
 from cv_lib.segmentation.dutchf3.utils import (
     current_datetime,
@@ -60,8 +53,6 @@
 from default import _C as config
 from default import update_config
 
-from cv_lib.segmentation import extract_metric_from
-
 mask_value = 255
 _SEG_COLOURS = np.asarray(
     [
@@ -91,12 +82,13 @@ def run(*options, cfg=None):
 
     Notes:
         Options can be passed in via the options argument and loaded from the cfg file
-        Options loaded from default.py will be overridden by options loaded from cfg file
-        Options passed in through options argument will override option loaded from cfg file
+        Options loaded from default.py will be overridden by those loaded from cfg file
+        Options passed in via options argument will override those loaded from cfg file
     
     Args:
-        *options (str,int ,optional): Options used to overide what is loaded from the config. 
-                                      To see what options are available consult default.py
+        *options (str, int, optional): Options used to overide what is loaded from the
+                                    config. To see what options are available consult
+                                    default.py
         cfg (str, optional): Location of config file to load. Defaults to None.
     """
 
@@ -112,6 +104,10 @@ def run(*options, cfg=None):
         torch.cuda.manual_seed_all(config.SEED)
     np.random.seed(seed=config.SEED)
 
+    device = "cpu"
+    if torch.cuda.is_available():
+        device = "cuda"
+
     # Setup Augmentations
     basic_aug = Compose(
         [
@@ -189,9 +185,6 @@ def run(*options, cfg=None):
 
     model = getattr(models, config.MODEL.NAME).get_seg_model(config)
 
-    device = "cpu"
-    if torch.cuda.is_available():
-        device = "cuda"
     model = model.to(device)  # Send to GPU
 
     optimizer = torch.optim.SGD(
@@ -213,14 +206,11 @@ def run(*options, cfg=None):
     )
     snapshot_duration = scheduler_step * len(train_loader)
     scheduler = CosineAnnealingScheduler(
-        optimizer,
-        "lr",
-        config.TRAIN.MAX_LR,
-        config.TRAIN.MIN_LR,
-        snapshot_duration,
+        optimizer, "lr", config.TRAIN.MAX_LR, config.TRAIN.MIN_LR, snapshot_duration
     )
 
-    # weights are inversely proportional to the frequency of the classes in the training set
+    # weights are inversely proportional to the frequency of the classes in
+    # the training set
     class_weights = torch.tensor(
         config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False
     )
@@ -239,9 +229,7 @@ def run(*options, cfg=None):
         Events.ITERATION_COMPLETED,
         logging_handlers.log_training_output(log_interval=config.PRINT_FREQ),
     )
-    trainer.add_event_handler(
-        Events.EPOCH_STARTED, logging_handlers.log_lr(optimizer)
-    )
+    trainer.add_event_handler(Events.EPOCH_STARTED, logging_handlers.log_lr(optimizer))
     trainer.add_event_handler(
         Events.EPOCH_STARTED,
         tensorboard_handlers.log_lr(summary_writer, optimizer, "epoch"),
@@ -252,47 +240,38 @@ def run(*options, cfg=None):
     )
 
     def _select_pred_and_mask(model_out_dict):
-        return (
-            model_out_dict["y_pred"].squeeze(),
-            model_out_dict["mask"].squeeze(),
-        )
+        return (model_out_dict["y_pred"].squeeze(), model_out_dict["mask"].squeeze())
 
     evaluator = create_supervised_evaluator(
         model,
         _prepare_batch,
         metrics={
-            "IoU": MeanIoU(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "pixacc": pixelwise_accuracy(
+                n_classes, output_transform=_select_pred_and_mask
             ),
             "nll": Loss(criterion, output_transform=_select_pred_and_mask),
-            "mca": MeanClassAccuracy(
-                n_classes, device, output_transform=_select_pred_and_mask
-            ),
-            "fiou": FrequencyWeightedIoU(
-                n_classes, device, output_transform=_select_pred_and_mask
-            ),
-            "pixa": PixelwiseAccuracy(
-                n_classes, device, output_transform=_select_pred_and_mask
+            "cacc": class_accuracy(n_classes, output_transform=_select_pred_and_mask),
+            "mca": mean_class_accuracy(
+                n_classes, output_transform=_select_pred_and_mask
             ),
+            "ciou": class_iou(n_classes, output_transform=_select_pred_and_mask),
+            "mIoU": mean_iou(n_classes, output_transform=_select_pred_and_mask),
         },
         device=device,
     )
 
     # Set the validation run to start on the epoch completion of the training run
-    trainer.add_event_handler(
-        Events.EPOCH_COMPLETED, Evaluator(evaluator, val_loader)
-    )
+    trainer.add_event_handler(Events.EPOCH_COMPLETED, Evaluator(evaluator, val_loader))
 
     evaluator.add_event_handler(
         Events.EPOCH_COMPLETED,
         logging_handlers.log_metrics(
             "Validation results",
             metrics_dict={
-                "IoU": "IoU :",
                 "nll": "Avg loss :",
-                "pixa": "Pixelwise Accuracy :",
-                "mca": "Mean Class Accuracy :",
-                "fiou": "Freq Weighted IoU :",
+                "pixacc": "Pixelwise Accuracy :",
+                "mca": "Avg Class Accuracy :",
+                "mIoU": "Avg Class IoU :",
             },
         ),
     )
@@ -303,10 +282,10 @@ def _select_pred_and_mask(model_out_dict):
             trainer,
             "epoch",
             metrics_dict={
-                "IoU": "Validation/IoU",
+                "mIoU": "Validation/mIoU",
                 "nll": "Validation/Loss",
                 "mca": "Validation/MCA",
-                "fiou": "Validation/FIoU",
+                "pixacc": "Validation/Pixel_Acc",
             },
         ),
     )
@@ -332,19 +311,13 @@ def _tensor_to_numpy(pred_tensor):
     evaluator.add_event_handler(
         Events.EPOCH_COMPLETED,
         create_image_writer(
-            summary_writer,
-            "Validation/Mask",
-            "mask",
-            transform_func=transform_func,
+            summary_writer, "Validation/Mask", "mask", transform_func=transform_func
         ),
     )
     evaluator.add_event_handler(
         Events.EPOCH_COMPLETED,
         create_image_writer(
-            summary_writer,
-            "Validation/Pred",
-            "y_pred",
-            transform_func=transform_pred,
+            summary_writer, "Validation/Pred", "y_pred", transform_func=transform_pred
         ),
     )
 
@@ -354,7 +327,7 @@ def snapshot_function():
     checkpoint_handler = SnapshotHandler(
         path.join(output_dir, config.TRAIN.MODEL_DIR),
         config.MODEL.NAME,
-        extract_metric_from("IoU"),
+        extract_metric_from("mIoU"),
         snapshot_function,
     )
     evaluator.add_event_handler(
diff --git a/interpretation/deepseismic_interpretation/penobscot/metrics.py b/interpretation/deepseismic_interpretation/penobscot/metrics.py
index 1be9a5a5..cb6e9ca9 100644
--- a/interpretation/deepseismic_interpretation/penobscot/metrics.py
+++ b/interpretation/deepseismic_interpretation/penobscot/metrics.py
@@ -2,12 +2,32 @@
 # Licensed under the MIT License.
 
 from collections import defaultdict
-from cv_lib.segmentation.dutchf3.metrics import _torch_hist
 from ignite.metrics import Metric
 import torch
 import numpy as np
 
 
+def _torch_hist(label_true, label_pred, n_class):
+    """Calculates the confusion matrix for the labels
+    
+    Args:
+        label_true ([type]): [description]
+        label_pred ([type]): [description]
+        n_class ([type]): [description]
+    
+    Returns:
+        [type]: [description]
+    """
+    # TODO Add exceptions
+    assert len(label_true.shape) == 1, "Labels need to be 1D"
+    assert len(label_pred.shape) == 1, "Predictions need to be 1D"
+    mask = (label_true >= 0) & (label_true < n_class)
+    hist = torch.bincount(
+        n_class * label_true[mask] + label_pred[mask], minlength=n_class ** 2
+    ).reshape(n_class, n_class)
+    return hist
+
+
 def _default_tensor(image_height, image_width, pad_value=255):
     return torch.full((image_height, image_width), pad_value, dtype=torch.long)
 
@@ -18,8 +38,8 @@ class InlineMeanIoU(Metric):
     """Compute Mean IoU for Inline
 
     Notes:
-        This metric collects all the patches and recomposes the predictions and masks into inlines
-        These are then used to calculate the mean IoU
+        This metric collects all the patches and recomposes the predictions and masks
+        into inlines. These are then used to calculate the mean IoU.
     """
 
     def __init__(
@@ -40,11 +60,16 @@ def __init__(
             image_width (int): width of inline
             patch_size (int): patch size
             num_classes (int): number of classes in dataset
-            padding (int, optional): the amount of padding to height and width, e.g 200 padded to 256 - padding=56. Defaults to 0
-            scale (int, optional): the scale factor applied to the patch, e.g 100 scaled to 200 - scale=2. Defaults to 1
+            padding (int, optional): the amount of padding to height and width,
+                e.g 200 padded to 256 - padding=56. Defaults to 0
+            scale (int, optional): the scale factor applied to the patch,
+                e.g 100 scaled to 200 - scale=2. Defaults to 1
             pad_value (int):  the constant value used for padding Defaults to 255
-            output_transform (callable, optional): a callable that is used to transform the ignite.engine.Engine's `process_function`'s output into the
-                form expected by the metric. This can be useful if, for example, you have a multi-output model and you want to compute the metric with respect to one of the outputs.
+            output_transform (callable, optional): a callable that is used to transform
+                the ignite.engine.Engine's `process_function`'s output into the form
+                expected by the metric. This can be useful if, for example, if you have
+                a multi-output model and you want to compute the metric with respect to
+                one of the outputs.
         """
         self._image_height = image_height
         self._image_width = image_width
@@ -78,7 +103,8 @@ def update(self, output):
         assert y.shape == max_prediction.shape, "Shape not the same"
 
         for pred, mask, id, patch_loc in zip(max_prediction, y, ids, patch_locations):
-            # ! With overlapping patches this does not aggregate the results it simply overwrites them
+            # ! With overlapping patches this does not aggregate the results,
+            # ! it simply overwrites them
             # If patch is padded ingore padding
             pad = int(self._padding // 2)
             pred = pred[pad : pred.shape[0] - pad, pad : pred.shape[1] - pad]