From a85e302d994be9a4e1bd070f8129b69d1ad58d50 Mon Sep 17 00:00:00 2001
From: Wenjie Du <wenjay.du@gmail.com>
Date: Thu, 21 Dec 2023 16:53:06 +0800
Subject: [PATCH 1/3] feat: add _check_inputs() for all error metric
 calculation functions;

---
 pypots/utils/metrics/error.py | 213 ++++++++++++++++++++++------------
 1 file changed, 139 insertions(+), 74 deletions(-)

diff --git a/pypots/utils/metrics/error.py b/pypots/utils/metrics/error.py
index 9136e232..8404ebe7 100644
--- a/pypots/utils/metrics/error.py
+++ b/pypots/utils/metrics/error.py
@@ -13,10 +13,46 @@
 from ..logging import logger
 
 
-def calc_mae(
+def _check_inputs(
     predictions: Union[np.ndarray, torch.Tensor, list],
     targets: Union[np.ndarray, torch.Tensor, list],
     masks: Optional[Union[np.ndarray, torch.Tensor, list]] = None,
+) -> None:
+    # check shape
+    assert isinstance(predictions, type(targets)), (
+        f"types of `predictions` and `targets` must match, but got"
+        f"`predictions`: {type(predictions)}, `target`: {type(targets)}"
+    )
+    prediction_shape = predictions.shape
+    target_shape = targets.shape
+    assert (
+        prediction_shape == target_shape
+    ), f"shape of `predictions` and `targets` must match, but got {prediction_shape} and {target_shape}"
+    # check NaN
+    assert not np.isnan(
+        predictions
+    ).any(), "`predictions` mustn't contain NaN values, but detected NaN in it"
+    assert not np.isnan(
+        targets
+    ).any(), "`targets` mustn't contain NaN values, but detected NaN in it"
+
+    if masks is not None:
+        # check shape
+        mask_shape = masks.shape
+        assert mask_shape == target_shape, (
+            f"shape of `masks` must match `predictions` and `targets` shape, "
+            f"but got `mask`: {mask_shape} that is different from {prediction_shape}"
+        )
+        # check NaN
+        assert not np.isnan(
+            masks
+        ).any(), "`masks` mustn't contain NaN values, but detected NaN in it"
+
+
+def calc_mae(
+    predictions: Union[np.ndarray, torch.Tensor],
+    targets: Union[np.ndarray, torch.Tensor],
+    masks: Optional[Union[np.ndarray, torch.Tensor]] = None,
 ) -> Union[float, torch.Tensor]:
     """Calculate the Mean Absolute Error between ``predictions`` and ``targets``.
     ``masks`` can be used for filtering. For values==0 in ``masks``,
@@ -55,23 +91,11 @@ def calc_mae(
     so the result is 1/2=0.5.
 
     """
-    assert isinstance(predictions, type(targets)), (
-        f"types of inputs and target must match, but got"
-        f"type(inputs)={type(predictions)}, type(target)={type(targets)}"
-    )
-    prediction_shape = predictions.shape
-    target_shape = targets.shape
-    assert (
-        prediction_shape == target_shape
-    ), f"shape of predictions and targets must match, but got {prediction_shape} and {target_shape} "
+    # check shapes and values of inputs
+    _check_inputs(predictions, targets, masks)
 
     lib = np if isinstance(predictions, np.ndarray) else torch
     if masks is not None:
-        mask_shape = masks.shape
-        assert (
-            mask_shape == target_shape
-        ), f"shape of masks must match predictions' shape, but got {mask_shape} and {prediction_shape} "
-
         return lib.sum(lib.abs(predictions - targets) * masks) / (
             lib.sum(masks) + 1e-12
         )
@@ -80,9 +104,9 @@ def calc_mae(
 
 
 def calc_mse(
-    predictions: Union[np.ndarray, torch.Tensor, list],
-    targets: Union[np.ndarray, torch.Tensor, list],
-    masks: Optional[Union[np.ndarray, torch.Tensor, list]] = None,
+    predictions: Union[np.ndarray, torch.Tensor],
+    targets: Union[np.ndarray, torch.Tensor],
+    masks: Optional[Union[np.ndarray, torch.Tensor]] = None,
 ) -> Union[float, torch.Tensor]:
     """Calculate the Mean Square Error between ``predictions`` and ``targets``.
     ``masks`` can be used for filtering. For values==0 in ``masks``,
@@ -121,23 +145,11 @@ def calc_mse(
     so the result is 1/2=0.5.
 
     """
-
-    assert isinstance(predictions, type(targets)), (
-        f"types of inputs and target must match, but got"
-        f"type(inputs)={type(predictions)}, type(target)={type(targets)}"
-    )
-    prediction_shape = predictions.shape
-    target_shape = targets.shape
-    assert (
-        prediction_shape == target_shape
-    ), f"shape of predictions and targets must match, but got {prediction_shape} and {target_shape} "
+    # check shapes and values of inputs
+    _check_inputs(predictions, targets, masks)
 
     lib = np if isinstance(predictions, np.ndarray) else torch
     if masks is not None:
-        mask_shape = masks.shape
-        assert (
-            mask_shape == target_shape
-        ), f"shape of masks must match predictions' shape, but got {mask_shape} and {prediction_shape} "
         return lib.sum(lib.square(predictions - targets) * masks) / (
             lib.sum(masks) + 1e-12
         )
@@ -146,9 +158,9 @@ def calc_mse(
 
 
 def calc_rmse(
-    predictions: Union[np.ndarray, torch.Tensor, list],
-    targets: Union[np.ndarray, torch.Tensor, list],
-    masks: Optional[Union[np.ndarray, torch.Tensor, list]] = None,
+    predictions: Union[np.ndarray, torch.Tensor],
+    targets: Union[np.ndarray, torch.Tensor],
+    masks: Optional[Union[np.ndarray, torch.Tensor]] = None,
 ) -> Union[float, torch.Tensor]:
     """Calculate the Root Mean Square Error between ``predictions`` and ``targets``.
     ``masks`` can be used for filtering. For values==0 in ``masks``,
@@ -188,18 +200,14 @@ def calc_rmse(
     so the result is :math:`\\sqrt{1/2}=0.5`.
 
     """
-    assert isinstance(predictions, type(targets)), (
-        f"types of inputs and target must match, but got"
-        f"type(inputs)={type(predictions)}, type(target)={type(targets)}"
-    )
     lib = np if isinstance(predictions, np.ndarray) else torch
     return lib.sqrt(calc_mse(predictions, targets, masks))
 
 
 def calc_mre(
-    predictions: Union[np.ndarray, torch.Tensor, list],
-    targets: Union[np.ndarray, torch.Tensor, list],
-    masks: Optional[Union[np.ndarray, torch.Tensor, list]] = None,
+    predictions: Union[np.ndarray, torch.Tensor],
+    targets: Union[np.ndarray, torch.Tensor],
+    masks: Optional[Union[np.ndarray, torch.Tensor]] = None,
 ) -> Union[float, torch.Tensor]:
     """Calculate the Mean Relative Error between ``predictions`` and ``targets``.
     ``masks`` can be used for filtering. For values==0 in ``masks``,
@@ -239,22 +247,11 @@ def calc_mre(
     so the result is :math:`\\sqrt{1/2}=0.5`.
 
     """
-    assert isinstance(predictions, type(targets)), (
-        f"types of inputs and target must match, but got"
-        f"type(inputs)={type(predictions)}, type(target)={type(targets)}"
-    )
-    prediction_shape = predictions.shape
-    target_shape = targets.shape
-    assert (
-        prediction_shape == target_shape
-    ), f"shape of predictions and targets must match, but got {prediction_shape} and {target_shape} "
+    # check shapes and values of inputs
+    _check_inputs(predictions, targets, masks)
 
     lib = np if isinstance(predictions, np.ndarray) else torch
     if masks is not None:
-        mask_shape = masks.shape
-        assert (
-            mask_shape == target_shape
-        ), f"shape of masks must match predictions' shape, but got {mask_shape} and {prediction_shape} "
         return lib.sum(lib.abs(predictions - targets) * masks) / (
             lib.sum(lib.abs(targets * masks)) + 1e-12
         )
@@ -273,51 +270,119 @@ def calc_quantile_loss(predictions, targets, q: float, eval_points) -> float:
     return quantile_loss
 
 
-def calc_quantile_crps(predictions, targets, eval_points, mean_scaler=0, scaler=1):
-    """Continuous rank probability score for distributional predictions."""
+def calc_quantile_crps(
+    predictions: Union[np.ndarray, torch.Tensor],
+    targets: Union[np.ndarray, torch.Tensor],
+    masks: Union[np.ndarray, torch.Tensor],
+    scaler_mean=0,
+    scaler_stddev=1,
+) -> float:
+    """Continuous rank probability score for distributional predictions.
+
+    Parameters
+    ----------
+    predictions :
+        The prediction data to be evaluated.
+
+    targets :
+        The target data for helping evaluate the predictions.
+
+    masks :
+        The masks for filtering the specific values in inputs and target from evaluation.
+        Only values at corresponding positions where values ==1 in ``masks`` will be used for evaluation.
+
+    scaler_mean:
+        Mean value of the scaler used to scale the data.
+
+    scaler_stddev:
+        Standard deviation value of the scaler used to scale the data.
+
+    Returns
+    -------
+    CRPS :
+        Value of continuous rank probability score.
+
+    """
+    # check shapes and values of inputs
+    _check_inputs(predictions, targets, masks)
+
     if isinstance(predictions, np.ndarray):
         predictions = torch.from_numpy(predictions)
     if isinstance(targets, np.ndarray):
         targets = torch.from_numpy(targets)
-    if isinstance(eval_points, np.ndarray):
-        eval_points = torch.from_numpy(eval_points)
+    if isinstance(masks, np.ndarray):
+        masks = torch.from_numpy(masks)
 
-    targets = targets * scaler + mean_scaler
-    predictions = predictions * scaler + mean_scaler
+    targets = targets * scaler_stddev + scaler_mean
+    predictions = predictions * scaler_stddev + scaler_mean
 
     quantiles = np.arange(0.05, 1.0, 0.05)
-    denominator = torch.sum(torch.abs(targets * eval_points))
-    CRPS = 0
+    denominator = torch.sum(torch.abs(targets * masks))
+    CRPS = torch.tensor(0.0)
     for i in range(len(quantiles)):
         q_pred = []
         for j in range(len(predictions)):
             q_pred.append(torch.quantile(predictions[j : j + 1], quantiles[i], dim=1))
         q_pred = torch.cat(q_pred, 0)
-        q_loss = calc_quantile_loss(targets, q_pred, quantiles[i], eval_points)
+        q_loss = calc_quantile_loss(targets, q_pred, quantiles[i], masks)
         CRPS += q_loss / denominator
     return CRPS.item() / len(quantiles)
 
 
-def calc_quantile_crps_sum(predictions, targets, eval_points, mean_scaler=0, scaler=1):
-    """Continuous rank probability score for distributional predictions."""
+def calc_quantile_crps_sum(
+    predictions: Union[np.ndarray, torch.Tensor],
+    targets: Union[np.ndarray, torch.Tensor],
+    masks: Union[np.ndarray, torch.Tensor],
+    scaler_mean=0,
+    scaler_stddev=1,
+) -> float:
+    """Sum continuous rank probability score for distributional predictions.
+
+    Parameters
+    ----------
+    predictions :
+        The prediction data to be evaluated.
+
+    targets :
+        The target data for helping evaluate the predictions.
+
+    masks :
+        The masks for filtering the specific values in inputs and target from evaluation.
+        Only values at corresponding positions where values ==1 in ``masks`` will be used for evaluation.
+
+    scaler_mean:
+        Mean value of the scaler used to scale the data.
+
+    scaler_stddev:
+        Standard deviation value of the scaler used to scale the data.
+
+    Returns
+    -------
+    CRPS :
+        Sum value of continuous rank probability score.
+
+    """
+    # check shapes and values of inputs
+    _check_inputs(predictions, targets, masks)
+
     if isinstance(predictions, np.ndarray):
         predictions = torch.from_numpy(predictions)
     if isinstance(targets, np.ndarray):
         targets = torch.from_numpy(targets)
-    if isinstance(eval_points, np.ndarray):
-        eval_points = torch.from_numpy(eval_points)
+    if isinstance(masks, np.ndarray):
+        masks = torch.from_numpy(masks)
 
-    eval_points = eval_points.mean(-1)
-    targets = targets * scaler + mean_scaler
+    masks = masks.mean(-1)
+    targets = targets * scaler_stddev + scaler_mean
     targets = targets.sum(-1)
-    predictions = predictions * scaler + mean_scaler
+    predictions = predictions * scaler_stddev + scaler_mean
 
     quantiles = np.arange(0.05, 1.0, 0.05)
-    denominator = torch.sum(torch.abs(targets * eval_points))
-    CRPS = 0
+    denominator = torch.sum(torch.abs(targets * masks))
+    CRPS = torch.tensor(0.0)
     for i in range(len(quantiles)):
         q_pred = torch.quantile(predictions.sum(-1), quantiles[i], dim=1)
-        q_loss = calc_quantile_loss(targets, q_pred, quantiles[i], eval_points)
+        q_loss = calc_quantile_loss(targets, q_pred, quantiles[i], masks)
         CRPS += q_loss / denominator
     return CRPS.item() / len(quantiles)
 

From d2a475a0e1c100b73df7565abe3357d3781806ee Mon Sep 17 00:00:00 2001
From: Wenjie Du <wenjay.du@gmail.com>
Date: Thu, 21 Dec 2023 17:40:42 +0800
Subject: [PATCH 2/3] fix: use corresponding lib to detect NaN, otherwise loss
 calculation during torch training will raise error...

RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.
---
 pypots/utils/metrics/error.py | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/pypots/utils/metrics/error.py b/pypots/utils/metrics/error.py
index 8404ebe7..004c2e7d 100644
--- a/pypots/utils/metrics/error.py
+++ b/pypots/utils/metrics/error.py
@@ -17,26 +17,33 @@ def _check_inputs(
     predictions: Union[np.ndarray, torch.Tensor, list],
     targets: Union[np.ndarray, torch.Tensor, list],
     masks: Optional[Union[np.ndarray, torch.Tensor, list]] = None,
-) -> None:
-    # check shape
+):
+    # check type
     assert isinstance(predictions, type(targets)), (
         f"types of `predictions` and `targets` must match, but got"
         f"`predictions`: {type(predictions)}, `target`: {type(targets)}"
     )
+    lib = np if isinstance(predictions, np.ndarray) else torch
+    # check shape
     prediction_shape = predictions.shape
     target_shape = targets.shape
     assert (
         prediction_shape == target_shape
     ), f"shape of `predictions` and `targets` must match, but got {prediction_shape} and {target_shape}"
     # check NaN
-    assert not np.isnan(
+    assert not lib.isnan(
         predictions
     ).any(), "`predictions` mustn't contain NaN values, but detected NaN in it"
-    assert not np.isnan(
+    assert not lib.isnan(
         targets
     ).any(), "`targets` mustn't contain NaN values, but detected NaN in it"
 
     if masks is not None:
+        # check type
+        assert isinstance(masks, type(targets)), (
+            f"types of `masks`, `predictions`, and `targets` must match, but got"
+            f"`masks`: {type(masks)}, `targets`: {type(targets)}"
+        )
         # check shape
         mask_shape = masks.shape
         assert mask_shape == target_shape, (
@@ -44,10 +51,12 @@ def _check_inputs(
             f"but got `mask`: {mask_shape} that is different from {prediction_shape}"
         )
         # check NaN
-        assert not np.isnan(
+        assert not lib.isnan(
             masks
         ).any(), "`masks` mustn't contain NaN values, but detected NaN in it"
 
+    return lib
+
 
 def calc_mae(
     predictions: Union[np.ndarray, torch.Tensor],
@@ -92,9 +101,8 @@ def calc_mae(
 
     """
     # check shapes and values of inputs
-    _check_inputs(predictions, targets, masks)
+    lib = _check_inputs(predictions, targets, masks)
 
-    lib = np if isinstance(predictions, np.ndarray) else torch
     if masks is not None:
         return lib.sum(lib.abs(predictions - targets) * masks) / (
             lib.sum(masks) + 1e-12
@@ -146,9 +154,8 @@ def calc_mse(
 
     """
     # check shapes and values of inputs
-    _check_inputs(predictions, targets, masks)
+    lib = _check_inputs(predictions, targets, masks)
 
-    lib = np if isinstance(predictions, np.ndarray) else torch
     if masks is not None:
         return lib.sum(lib.square(predictions - targets) * masks) / (
             lib.sum(masks) + 1e-12
@@ -200,6 +207,7 @@ def calc_rmse(
     so the result is :math:`\\sqrt{1/2}=0.5`.
 
     """
+    # don't have to check types and NaN here, since calc_mse() will do it
     lib = np if isinstance(predictions, np.ndarray) else torch
     return lib.sqrt(calc_mse(predictions, targets, masks))
 
@@ -248,9 +256,8 @@ def calc_mre(
 
     """
     # check shapes and values of inputs
-    _check_inputs(predictions, targets, masks)
+    lib = _check_inputs(predictions, targets, masks)
 
-    lib = np if isinstance(predictions, np.ndarray) else torch
     if masks is not None:
         return lib.sum(lib.abs(predictions - targets) * masks) / (
             lib.sum(lib.abs(targets * masks)) + 1e-12
@@ -304,7 +311,7 @@ def calc_quantile_crps(
 
     """
     # check shapes and values of inputs
-    _check_inputs(predictions, targets, masks)
+    _ = _check_inputs(predictions, targets, masks)
 
     if isinstance(predictions, np.ndarray):
         predictions = torch.from_numpy(predictions)
@@ -363,7 +370,7 @@ def calc_quantile_crps_sum(
 
     """
     # check shapes and values of inputs
-    _check_inputs(predictions, targets, masks)
+    _ = _check_inputs(predictions, targets, masks)
 
     if isinstance(predictions, np.ndarray):
         predictions = torch.from_numpy(predictions)

From b6858eb8afb7b46af2c4c0aca1a228fef80b242f Mon Sep 17 00:00:00 2001
From: Wenjie Du <wenjay.du@gmail.com>
Date: Thu, 21 Dec 2023 18:08:41 +0800
Subject: [PATCH 3/3] fix: don't check shapes for CRPS calc funcs;

---
 pypots/utils/metrics/error.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/pypots/utils/metrics/error.py b/pypots/utils/metrics/error.py
index 004c2e7d..16d7c923 100644
--- a/pypots/utils/metrics/error.py
+++ b/pypots/utils/metrics/error.py
@@ -17,6 +17,7 @@ def _check_inputs(
     predictions: Union[np.ndarray, torch.Tensor, list],
     targets: Union[np.ndarray, torch.Tensor, list],
     masks: Optional[Union[np.ndarray, torch.Tensor, list]] = None,
+    check_shape: bool = True,
 ):
     # check type
     assert isinstance(predictions, type(targets)), (
@@ -27,9 +28,10 @@ def _check_inputs(
     # check shape
     prediction_shape = predictions.shape
     target_shape = targets.shape
-    assert (
-        prediction_shape == target_shape
-    ), f"shape of `predictions` and `targets` must match, but got {prediction_shape} and {target_shape}"
+    if check_shape:
+        assert (
+            prediction_shape == target_shape
+        ), f"shape of `predictions` and `targets` must match, but got {prediction_shape} and {target_shape}"
     # check NaN
     assert not lib.isnan(
         predictions
@@ -44,11 +46,11 @@ def _check_inputs(
             f"types of `masks`, `predictions`, and `targets` must match, but got"
             f"`masks`: {type(masks)}, `targets`: {type(targets)}"
         )
-        # check shape
+        # check shape, masks shape must match targets
         mask_shape = masks.shape
         assert mask_shape == target_shape, (
-            f"shape of `masks` must match `predictions` and `targets` shape, "
-            f"but got `mask`: {mask_shape} that is different from {prediction_shape}"
+            f"shape of `masks` must match `targets` shape, "
+            f"but got `mask`: {mask_shape} that is different from `targets`: {target_shape}"
         )
         # check NaN
         assert not lib.isnan(
@@ -311,7 +313,7 @@ def calc_quantile_crps(
 
     """
     # check shapes and values of inputs
-    _ = _check_inputs(predictions, targets, masks)
+    _ = _check_inputs(predictions, targets, masks, check_shape=False)
 
     if isinstance(predictions, np.ndarray):
         predictions = torch.from_numpy(predictions)
@@ -370,7 +372,7 @@ def calc_quantile_crps_sum(
 
     """
     # check shapes and values of inputs
-    _ = _check_inputs(predictions, targets, masks)
+    _ = _check_inputs(predictions, targets, masks, check_shape=False)
 
     if isinstance(predictions, np.ndarray):
         predictions = torch.from_numpy(predictions)