From 22fd9b2f2a139c8f6766df612dca1493379ef70d Mon Sep 17 00:00:00 2001
From: "Evgeny A. Stepanov" <esrel@hotmail.com>
Date: Tue, 9 Jan 2024 17:07:20 +0100
Subject: [PATCH 1/7] README update

---
 README.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/README.md b/README.md
index b5d078e..73fb08f 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,26 @@ where predictors (classifiers) may have different label spaces.
 Consequently, the library makes distinction between classes predicted with a low score (`0.0`)
 and not predicted classes (`nan`).
 
+## Vectorization
+
+eFusor provides a `vectorize` function to do the vectorization 
+making distinction between predicted and not predicted classes.
+The function expects a `list` of class labels 
+and a `dict` of prediction scores.
+
+```python
+from efusor import vectorize
+
+labels = ["A", "B", "C", "D"]
+scores = {"A": 0.75, "B": 0.25, "C": 0.00}
+
+vector = vectorize(labels, scores)
+# array([0.75, 0.25, 0.  ,  nan])
+```
+
+The function supports scores input as a vector, matrix or a tensor.
+That is a list of dicts or list of lists of dicts.
+
 ## Fusion Methods
 
 ### Basic Fusion Methods

From e72f75bc88fed3dca5fcd4698904cf3b90ac1784 Mon Sep 17 00:00:00 2001
From: "Evgeny A. Stepanov" <esrel@hotmail.com>
Date: Tue, 9 Jan 2024 19:15:09 +0100
Subject: [PATCH 2/7] added nan softmax

---
 src/efusor/utils.py | 11 +++++++++++
 tests/test_utils.py | 12 +++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/efusor/utils.py b/src/efusor/utils.py
index a75dfe1..587c0ab 100644
--- a/src/efusor/utils.py
+++ b/src/efusor/utils.py
@@ -32,3 +32,14 @@ def batch(*vector: np.ndarray) -> np.ndarray:
     :rtype: np.ndarray
     """
     return np.stack(vector)
+
+
+def softmax(vector: np.ndarray) -> np.ndarray:
+    """
+    numerically stable softmax with nan support
+    :param vector: predictions scores (not probability)
+    :type vector: np.ndarray
+    :return: softmax
+    :rtype: np.ndarray
+    """
+    return np.exp(vector - np.nanmax(vector))/np.nansum(np.exp(vector - np.nanmax(vector)))
diff --git a/tests/test_utils.py b/tests/test_utils.py
index f8fcb44..f6f1ddf 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from efusor.utils import batch, vectorize
+from efusor.utils import batch, vectorize, softmax
 
 
 def test_batch_tensor(scores: list) -> None:
@@ -89,3 +89,13 @@ def test_vectorize_vector(scores: list) -> None:
         for j, vector in enumerate(matrix):
             assert np.array_equal(vectorize(labels, vector),
                                   np.array(scores[i][j]), equal_nan=True)
+
+
+def test_softmax() -> None:
+    """ test softmax """
+    # test normal
+    assert softmax(np.array([0.25, -0.25, 0.0])).sum() == 1.0
+    # test large number
+    assert softmax(np.array([999, 100, 0.0])).sum() == 1.0
+    # test nan support
+    assert np.nansum(softmax(np.array([0.75, 0.25, 0.0, np.nan]))) == 1.0

From d4c3d75c57b29cac7d7360f17863e45457ef0b99 Mon Sep 17 00:00:00 2001
From: "Evgeny A. Stepanov" <esrel@hotmail.com>
Date: Tue, 9 Jan 2024 19:17:07 +0100
Subject: [PATCH 3/7] version & ChangeLog

---
 CHANGELOG.md | 6 ++++++
 setup.py     | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fade58d..c5d8449 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # ChangeLog
 
+## 0.1.3
+
+2024.01.09
+
+- added softmax with nan support to utils
+
 ## 0.1.2
 
 2024.01.07
diff --git a/setup.py b/setup.py
index c460bfd..9e16a7f 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ def read(path):
 setup(
     name='efusor',
     url='https://github.com/esrel/efusor',
-    version='0.1.2',
+    version='0.1.3',
     author='Evgeny A. Stepanov',
     author_email='stepanov.evgeny.a@gmail.com',
     description='Extended Decision Fusion',

From 3728904316b2abf23e78f248ab7ad2bfb6caafb6 Mon Sep 17 00:00:00 2001
From: "Evgeny A. Stepanov" <esrel@hotmail.com>
Date: Tue, 9 Jan 2024 19:22:56 +0100
Subject: [PATCH 4/7] modified min-max

---
 src/efusor/scaler.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/efusor/scaler.py b/src/efusor/scaler.py
index 71fd4ad..5436448 100644
--- a/src/efusor/scaler.py
+++ b/src/efusor/scaler.py
@@ -20,7 +20,10 @@ def scale(vector: np.ndarray) -> np.ndarray:
     :return: vector
     :rtype: np.ndarray
     """
-    if not vector.any() or vector.min() == vector.max():
+    if np.isnan(vector).all():
         return vector
 
-    return (vector - np.min(vector)) / (np.max(vector) - np.min(vector))
+    if np.nanmin(vector) == np.nanmax(vector):
+        return vector
+
+    return (vector - np.nanmin(vector)) / (np.nanmax(vector) - np.nanmin(vector))

From 0175e4f90c0073c8f048363e44303b9f606c5e4a Mon Sep 17 00:00:00 2001
From: "Evgeny A. Stepanov" <esrel@hotmail.com>
Date: Thu, 11 Jan 2024 10:47:01 +0100
Subject: [PATCH 5/7] readme fixes

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 73fb08f..6ccd6e3 100644
--- a/README.md
+++ b/README.md
@@ -56,8 +56,8 @@ vector = vectorize(labels, scores)
 # array([0.75, 0.25, 0.  ,  nan])
 ```
 
-The function supports scores input as a vector, matrix or a tensor.
-That is a list of dicts or list of lists of dicts.
+The function supports scores input as a vector, a matrix or a tensor.
+That is a dict, a list of dicts or a list of lists of dicts.
 
 ## Fusion Methods
 

From baac88229a0ef78cd7bd0c5853cb3ace563b9a38 Mon Sep 17 00:00:00 2001
From: "Evgeny A. Stepanov" <esrel@hotmail.com>
Date: Thu, 11 Jan 2024 10:49:25 +0100
Subject: [PATCH 6/7] change log fixes

---
 CHANGELOG.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c5d8449..40cb1d4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,9 +22,7 @@
 
 ## 0.1.0
 
-2024.01.04
-
-Initial release.
+2024.01.04: initial release
 
 - `basic`, `voter` and `borda` (simple and tournament-style) decision fusion methods.
 - support function for vector scaling (`scale`).

From f065c1b3ae130b0ec86f4cdbbd14f815faa20ec6 Mon Sep 17 00:00:00 2001
From: "Evgeny A. Stepanov" <esrel@hotmail.com>
Date: Fri, 12 Jan 2024 17:14:01 +0100
Subject: [PATCH 7/7] v0.1.4. with scaling & cutoff

---
 CHANGELOG.md        | 11 +++++++++--
 setup.py            |  2 +-
 src/efusor/fusor.py | 13 +++++++++++++
 tests/test_fusor.py | 41 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 40cb1d4..527b0d6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,17 +1,24 @@
 # ChangeLog
 
+## 0.1.4
+
+2024.01.12
+
+- added `cutoff` threshold to `fuse`
+- added `scaled` kwarg to `fuse` to apply `softmax` to fused vectors
+
 ## 0.1.3
 
 2024.01.09
 
-- added softmax with nan support to utils
+- added `softmax` with `nan` support to `utils.py`
 
 ## 0.1.2
 
 2024.01.07
 
 - added vectorization from dict of scores
-- added priority fusion
+- added `priority` fusion
 
 ## 0.1.1
 
diff --git a/setup.py b/setup.py
index 9e16a7f..badba25 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ def read(path):
 setup(
     name='efusor',
     url='https://github.com/esrel/efusor',
-    version='0.1.3',
+    version='0.1.4',
     author='Evgeny A. Stepanov',
     author_email='stepanov.evgeny.a@gmail.com',
     description='Extended Decision Fusion',
diff --git a/src/efusor/fusor.py b/src/efusor/fusor.py
index 53a2391..0a9ea74 100644
--- a/src/efusor/fusor.py
+++ b/src/efusor/fusor.py
@@ -12,11 +12,15 @@
 from efusor.basic import apply
 from efusor.borda import borda
 from efusor.priority import prioritize
+from efusor.utils import softmax
 
 
 def fuse(tensor: list | np.ndarray,
          method: str = "hard_voting",
          weights: list | np.ndarray = None,
+         *,
+         cutoff: float = None,
+         scaled: bool = False,
          digits: int = None
          ) -> list:
     """
@@ -27,12 +31,20 @@ def fuse(tensor: list | np.ndarray,
     :type method: str, optional
     :param weights: predictor weights; defaults to None
     :type weights: np.ndarray, optional
+    :param cutoff: prediction cut-off threshold; defaults to None
+    :type cutoff: float, optional
+    :param scaled: if to re-scale final scores (softmax); defaults to False
+    :type scaled: bool, optional
     :param digits: rounding precision; defaults to None
     :type digits: int, optional
     :return: fused scores
     :rtype: np.ndarray
     """
     tensor = np.array(tensor) if isinstance(tensor, list) else tensor
+
+    if cutoff:
+        tensor[tensor < cutoff] = np.nan
+
     weights = np.array(weights) if isinstance(weights, list) else weights
 
     if method in {"hard_voting", "soft_voting", "majority_voting"}:
@@ -48,6 +60,7 @@ def fuse(tensor: list | np.ndarray,
     else:
         raise ValueError(f"unsupported fusion method: {method}")
 
+    result = np.apply_along_axis(softmax, -1, result) if scaled else result
     result = np.round(result,  decimals=digits) if digits else result
 
     return result.tolist()
diff --git a/tests/test_fusor.py b/tests/test_fusor.py
index 81ed6ca..850c7a2 100644
--- a/tests/test_fusor.py
+++ b/tests/test_fusor.py
@@ -63,3 +63,44 @@ def test_fuse_scalar(scores: list, weights: list) -> None:
             for scalar in vector:
                 with pytest.raises(IndexError):
                     fuse(np.array(scalar), method="hard_voting", weights=np.array(weights))
+
+
+def test_fusor_cutoff(scores: list) -> None:
+    """
+    test cutoff
+    :param scores: prediction scores
+    :type scores: list
+    """
+    cutoff_max = [[0.7, 0.3, 0.5],
+                  [0.4, 0.4, 0.6],
+                  [0.4, 0.7, np.nan],
+                  [0.3, 0.3, 1.0],
+                  [np.nan, np.nan, np.nan]]
+
+    result = fuse(np.array(scores), method="max", cutoff=0.1)
+    assert np.array_equal(np.array(result), np.array(cutoff_max), equal_nan=True)
+
+
+def test_fusor_scaled(scores: list) -> None:
+    """
+    test softmax
+    :param scores: prediction scores
+    :type scores: list
+    """
+    scaled_max = [[0.4, 0.27, 0.33],
+                  [0.31, 0.31, 0.38],
+                  [0.33, 0.45, 0.22],
+                  [0.25, 0.25, 0.5],
+                  [0.33, 0.33, 0.33]]
+
+    cutoff_max = [[0.4, 0.27, 0.33],
+                  [0.31, 0.31, 0.38],
+                  [0.43, 0.57, np.nan],
+                  [0.25, 0.25, 0.5],
+                  [np.nan, np.nan, np.nan]]
+
+    result = fuse(np.array(scores), method="max", scaled=True, digits=2)
+    assert np.array_equal(np.array(result), np.array(scaled_max), equal_nan=True)
+
+    result = fuse(np.array(scores), method="max", cutoff=0.1, scaled=True, digits=2)
+    assert np.array_equal(np.array(result), np.array(cutoff_max), equal_nan=True)