From 567e43871e48e034a69706fbfa8466b36055504b Mon Sep 17 00:00:00 2001 From: Artem Vazhentsev Date: Fri, 6 Sep 2024 11:14:20 +0300 Subject: [PATCH 1/3] added parameter to PRR to compute 50% of the curve --- src/lm_polygraph/ue_metrics/pred_rej_area.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/lm_polygraph/ue_metrics/pred_rej_area.py b/src/lm_polygraph/ue_metrics/pred_rej_area.py index 6ce77555..7e74bb20 100644 --- a/src/lm_polygraph/ue_metrics/pred_rej_area.py +++ b/src/lm_polygraph/ue_metrics/pred_rej_area.py @@ -13,7 +13,7 @@ class PredictionRejectionArea(UEMetric): def __str__(self): return "prr" - def __call__(self, estimator: List[float], target: List[float]) -> float: + def __call__(self, estimator: List[float], target: List[float], max_rejection: float = 1.0) -> float: """ Measures the area under the Prediction-Rejection curve between `estimator` and `target`. @@ -22,6 +22,8 @@ def __call__(self, estimator: List[float], target: List[float]) -> float: Higher values indicate more uncertainty. target (List[int]): a batch of ground-truth uncertainty estimations. Higher values indicate less uncertainty. + max_rejection (float): a maximum proportion of instances that will be rejected. + 1.0 indicates entire set, 0.5 - half of the set Returns: float: area under the Prediction-Rejection curve. Higher values indicate better uncertainty estimations. @@ -30,12 +32,13 @@ def __call__(self, estimator: List[float], target: List[float]) -> float: # ue: greater is more uncertain ue = np.array(estimator) num_obs = len(ue) + num_rej = int(max_rejection * num_obs) # Sort in ascending order: the least uncertain come first ue_argsort = np.argsort(ue) # want sorted_metrics to be increasing => smaller scores is better sorted_metrics = np.array(target)[ue_argsort] # Since we want all plots to coincide when all the data is discarded - cumsum = np.cumsum(sorted_metrics) - scores = (cumsum / np.arange(1, num_obs + 1))[::-1] - prr_score = np.sum(scores) / num_obs + cumsum = np.cumsum(sorted_metrics)[-num_rej:] + scores = (cumsum / np.arange((num_obs - num_rej) + 1, num_obs + 1))[::-1] + prr_score = np.sum(scores) / num_rej return prr_score From 5e13f8ee076c9fb72f3c40dc51be3e8ae54443da Mon Sep 17 00:00:00 2001 From: Artem Vazhentsev Date: Fri, 6 Sep 2024 15:44:15 +0300 Subject: [PATCH 2/3] black --- src/lm_polygraph/ue_metrics/pred_rej_area.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lm_polygraph/ue_metrics/pred_rej_area.py b/src/lm_polygraph/ue_metrics/pred_rej_area.py index 7e74bb20..ae91ed88 100644 --- a/src/lm_polygraph/ue_metrics/pred_rej_area.py +++ b/src/lm_polygraph/ue_metrics/pred_rej_area.py @@ -13,7 +13,9 @@ class PredictionRejectionArea(UEMetric): def __str__(self): return "prr" - def __call__(self, estimator: List[float], target: List[float], max_rejection: float = 1.0) -> float: + def __call__( + self, estimator: List[float], target: List[float], max_rejection: float = 1.0 + ) -> float: """ Measures the area under the Prediction-Rejection curve between `estimator` and `target`. @@ -23,7 +25,7 @@ def __call__(self, estimator: List[float], target: List[float], max_rejection: f target (List[int]): a batch of ground-truth uncertainty estimations. Higher values indicate less uncertainty. max_rejection (float): a maximum proportion of instances that will be rejected. - 1.0 indicates entire set, 0.5 - half of the set + 1.0 indicates entire set, 0.5 - half of the set Returns: float: area under the Prediction-Rejection curve. Higher values indicate better uncertainty estimations. From 5ce3175c2a1f50501fed025e7e859b74edd34fd8 Mon Sep 17 00:00:00 2001 From: Artem Vazhentsev Date: Fri, 6 Sep 2024 16:40:13 +0300 Subject: [PATCH 3/3] move to init --- src/lm_polygraph/ue_metrics/pred_rej_area.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/lm_polygraph/ue_metrics/pred_rej_area.py b/src/lm_polygraph/ue_metrics/pred_rej_area.py index ae91ed88..dc20164a 100644 --- a/src/lm_polygraph/ue_metrics/pred_rej_area.py +++ b/src/lm_polygraph/ue_metrics/pred_rej_area.py @@ -10,12 +10,19 @@ class PredictionRejectionArea(UEMetric): Calculates area under Prediction-Rejection curve. """ + def __init__(self, max_rejection: float = 1.0): + """ + Parameters: + max_rejection (float): a maximum proportion of instances that will be rejected. + 1.0 indicates entire set, 0.5 - half of the set + """ + super().__init__() + self.max_rejection = max_rejection + def __str__(self): return "prr" - def __call__( - self, estimator: List[float], target: List[float], max_rejection: float = 1.0 - ) -> float: + def __call__(self, estimator: List[float], target: List[float]) -> float: """ Measures the area under the Prediction-Rejection curve between `estimator` and `target`. @@ -24,8 +31,6 @@ def __call__( Higher values indicate more uncertainty. target (List[int]): a batch of ground-truth uncertainty estimations. Higher values indicate less uncertainty. - max_rejection (float): a maximum proportion of instances that will be rejected. - 1.0 indicates entire set, 0.5 - half of the set Returns: float: area under the Prediction-Rejection curve. Higher values indicate better uncertainty estimations. @@ -34,7 +39,7 @@ def __call__( # ue: greater is more uncertain ue = np.array(estimator) num_obs = len(ue) - num_rej = int(max_rejection * num_obs) + num_rej = int(self.max_rejection * num_obs) # Sort in ascending order: the least uncertain come first ue_argsort = np.argsort(ue) # want sorted_metrics to be increasing => smaller scores is better