From 22fd9b2f2a139c8f6766df612dca1493379ef70d Mon Sep 17 00:00:00 2001 From: "Evgeny A. Stepanov" Date: Tue, 9 Jan 2024 17:07:20 +0100 Subject: [PATCH 1/7] README update --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index b5d078e..73fb08f 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,26 @@ where predictors (classifiers) may have different label spaces. Consequently, the library makes distinction between classes predicted with a low score (`0.0`) and not predicted classes (`nan`). +## Vectorization + +eFusor provides a `vectorize` function to do the vectorization +making distinction between predicted and not predicted classes. +The function expects a `list` of class labels +and a `dict` of prediction scores. + +```python +from efusor import vectorize + +labels = ["A", "B", "C", "D"] +scores = {"A": 0.75, "B": 0.25, "C": 0.00} + +vector = vectorize(labels, scores) +# array([0.75, 0.25, 0. , nan]) +``` + +The function supports scores input as a vector, matrix or a tensor. +That is a list of dicts or list of lists of dicts. + ## Fusion Methods ### Basic Fusion Methods From e72f75bc88fed3dca5fcd4698904cf3b90ac1784 Mon Sep 17 00:00:00 2001 From: "Evgeny A. Stepanov" Date: Tue, 9 Jan 2024 19:15:09 +0100 Subject: [PATCH 2/7] added nan softmax --- src/efusor/utils.py | 11 +++++++++++ tests/test_utils.py | 12 +++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/efusor/utils.py b/src/efusor/utils.py index a75dfe1..587c0ab 100644 --- a/src/efusor/utils.py +++ b/src/efusor/utils.py @@ -32,3 +32,14 @@ def batch(*vector: np.ndarray) -> np.ndarray: :rtype: np.ndarray """ return np.stack(vector) + + +def softmax(vector: np.ndarray) -> np.ndarray: + """ + numerically stable softmax with nan support + :param vector: predictions scores (not probability) + :type vector: np.ndarray + :return: softmax + :rtype: np.ndarray + """ + return np.exp(vector - np.nanmax(vector))/np.nansum(np.exp(vector - np.nanmax(vector))) diff --git a/tests/test_utils.py b/tests/test_utils.py index f8fcb44..f6f1ddf 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,7 +4,7 @@ import numpy as np -from efusor.utils import batch, vectorize +from efusor.utils import batch, vectorize, softmax def test_batch_tensor(scores: list) -> None: @@ -89,3 +89,13 @@ def test_vectorize_vector(scores: list) -> None: for j, vector in enumerate(matrix): assert np.array_equal(vectorize(labels, vector), np.array(scores[i][j]), equal_nan=True) + + +def test_softmax() -> None: + """ test softmax """ + # test normal + assert softmax(np.array([0.25, -0.25, 0.0])).sum() == 1.0 + # test large number + assert softmax(np.array([999, 100, 0.0])).sum() == 1.0 + # test nan support + assert np.nansum(softmax(np.array([0.75, 0.25, 0.0, np.nan]))) == 1.0 From d4c3d75c57b29cac7d7360f17863e45457ef0b99 Mon Sep 17 00:00:00 2001 From: "Evgeny A. Stepanov" Date: Tue, 9 Jan 2024 19:17:07 +0100 Subject: [PATCH 3/7] version & ChangeLog --- CHANGELOG.md | 6 ++++++ setup.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fade58d..c5d8449 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # ChangeLog +## 0.1.3 + +2024.01.09 + +- added softmax with nan support to utils + ## 0.1.2 2024.01.07 diff --git a/setup.py b/setup.py index c460bfd..9e16a7f 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ def read(path): setup( name='efusor', url='https://github.com/esrel/efusor', - version='0.1.2', + version='0.1.3', author='Evgeny A. Stepanov', author_email='stepanov.evgeny.a@gmail.com', description='Extended Decision Fusion', From 3728904316b2abf23e78f248ab7ad2bfb6caafb6 Mon Sep 17 00:00:00 2001 From: "Evgeny A. Stepanov" Date: Tue, 9 Jan 2024 19:22:56 +0100 Subject: [PATCH 4/7] modified min-max --- src/efusor/scaler.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/efusor/scaler.py b/src/efusor/scaler.py index 71fd4ad..5436448 100644 --- a/src/efusor/scaler.py +++ b/src/efusor/scaler.py @@ -20,7 +20,10 @@ def scale(vector: np.ndarray) -> np.ndarray: :return: vector :rtype: np.ndarray """ - if not vector.any() or vector.min() == vector.max(): + if np.isnan(vector).all(): return vector - return (vector - np.min(vector)) / (np.max(vector) - np.min(vector)) + if np.nanmin(vector) == np.nanmax(vector): + return vector + + return (vector - np.nanmin(vector)) / (np.nanmax(vector) - np.nanmin(vector)) From 0175e4f90c0073c8f048363e44303b9f606c5e4a Mon Sep 17 00:00:00 2001 From: "Evgeny A. Stepanov" Date: Thu, 11 Jan 2024 10:47:01 +0100 Subject: [PATCH 5/7] readme fixes --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 73fb08f..6ccd6e3 100644 --- a/README.md +++ b/README.md @@ -56,8 +56,8 @@ vector = vectorize(labels, scores) # array([0.75, 0.25, 0. , nan]) ``` -The function supports scores input as a vector, matrix or a tensor. -That is a list of dicts or list of lists of dicts. +The function supports scores input as a vector, a matrix or a tensor. +That is a dict, a list of dicts or a list of lists of dicts. ## Fusion Methods From baac88229a0ef78cd7bd0c5853cb3ace563b9a38 Mon Sep 17 00:00:00 2001 From: "Evgeny A. Stepanov" Date: Thu, 11 Jan 2024 10:49:25 +0100 Subject: [PATCH 6/7] change log fixes --- CHANGELOG.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5d8449..40cb1d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,9 +22,7 @@ ## 0.1.0 -2024.01.04 - -Initial release. +2024.01.04: initial release - `basic`, `voter` and `borda` (simple and tournament-style) decision fusion methods. - support function for vector scaling (`scale`). From f065c1b3ae130b0ec86f4cdbbd14f815faa20ec6 Mon Sep 17 00:00:00 2001 From: "Evgeny A. Stepanov" Date: Fri, 12 Jan 2024 17:14:01 +0100 Subject: [PATCH 7/7] v0.1.4. with scaling & cutoff --- CHANGELOG.md | 11 +++++++++-- setup.py | 2 +- src/efusor/fusor.py | 13 +++++++++++++ tests/test_fusor.py | 41 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40cb1d4..527b0d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,17 +1,24 @@ # ChangeLog +## 0.1.4 + +2024.01.12 + +- added `cutoff` threshold to `fuse` +- added `scaled` kwarg to `fuse` to apply `softmax` to fused vectors + ## 0.1.3 2024.01.09 -- added softmax with nan support to utils +- added `softmax` with `nan` support to `utils.py` ## 0.1.2 2024.01.07 - added vectorization from dict of scores -- added priority fusion +- added `priority` fusion ## 0.1.1 diff --git a/setup.py b/setup.py index 9e16a7f..badba25 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ def read(path): setup( name='efusor', url='https://github.com/esrel/efusor', - version='0.1.3', + version='0.1.4', author='Evgeny A. Stepanov', author_email='stepanov.evgeny.a@gmail.com', description='Extended Decision Fusion', diff --git a/src/efusor/fusor.py b/src/efusor/fusor.py index 53a2391..0a9ea74 100644 --- a/src/efusor/fusor.py +++ b/src/efusor/fusor.py @@ -12,11 +12,15 @@ from efusor.basic import apply from efusor.borda import borda from efusor.priority import prioritize +from efusor.utils import softmax def fuse(tensor: list | np.ndarray, method: str = "hard_voting", weights: list | np.ndarray = None, + *, + cutoff: float = None, + scaled: bool = False, digits: int = None ) -> list: """ @@ -27,12 +31,20 @@ def fuse(tensor: list | np.ndarray, :type method: str, optional :param weights: predictor weights; defaults to None :type weights: np.ndarray, optional + :param cutoff: prediction cut-off threshold; defaults to None + :type cutoff: float, optional + :param scaled: if to re-scale final scores (softmax); defaults to False + :type scaled: bool, optional :param digits: rounding precision; defaults to None :type digits: int, optional :return: fused scores :rtype: np.ndarray """ tensor = np.array(tensor) if isinstance(tensor, list) else tensor + + if cutoff: + tensor[tensor < cutoff] = np.nan + weights = np.array(weights) if isinstance(weights, list) else weights if method in {"hard_voting", "soft_voting", "majority_voting"}: @@ -48,6 +60,7 @@ def fuse(tensor: list | np.ndarray, else: raise ValueError(f"unsupported fusion method: {method}") + result = np.apply_along_axis(softmax, -1, result) if scaled else result result = np.round(result, decimals=digits) if digits else result return result.tolist() diff --git a/tests/test_fusor.py b/tests/test_fusor.py index 81ed6ca..850c7a2 100644 --- a/tests/test_fusor.py +++ b/tests/test_fusor.py @@ -63,3 +63,44 @@ def test_fuse_scalar(scores: list, weights: list) -> None: for scalar in vector: with pytest.raises(IndexError): fuse(np.array(scalar), method="hard_voting", weights=np.array(weights)) + + +def test_fusor_cutoff(scores: list) -> None: + """ + test cutoff + :param scores: prediction scores + :type scores: list + """ + cutoff_max = [[0.7, 0.3, 0.5], + [0.4, 0.4, 0.6], + [0.4, 0.7, np.nan], + [0.3, 0.3, 1.0], + [np.nan, np.nan, np.nan]] + + result = fuse(np.array(scores), method="max", cutoff=0.1) + assert np.array_equal(np.array(result), np.array(cutoff_max), equal_nan=True) + + +def test_fusor_scaled(scores: list) -> None: + """ + test softmax + :param scores: prediction scores + :type scores: list + """ + scaled_max = [[0.4, 0.27, 0.33], + [0.31, 0.31, 0.38], + [0.33, 0.45, 0.22], + [0.25, 0.25, 0.5], + [0.33, 0.33, 0.33]] + + cutoff_max = [[0.4, 0.27, 0.33], + [0.31, 0.31, 0.38], + [0.43, 0.57, np.nan], + [0.25, 0.25, 0.5], + [np.nan, np.nan, np.nan]] + + result = fuse(np.array(scores), method="max", scaled=True, digits=2) + assert np.array_equal(np.array(result), np.array(scaled_max), equal_nan=True) + + result = fuse(np.array(scores), method="max", cutoff=0.1, scaled=True, digits=2) + assert np.array_equal(np.array(result), np.array(cutoff_max), equal_nan=True)