Skip to content

Commit

Permalink
Merge pull request #3 from esrel/dev
Browse files Browse the repository at this point in the history
nan softmax, etc.
  • Loading branch information
esrel authored Jan 17, 2024
2 parents 0d0ccac + f065c1b commit 7541e31
Show file tree
Hide file tree
Showing 8 changed files with 117 additions and 8 deletions.
19 changes: 15 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
# ChangeLog

## 0.1.4

2024.01.12

- added `cutoff` threshold to `fuse`
- added `scaled` kwarg to `fuse` to apply `softmax` to fused vectors

## 0.1.3

2024.01.09

- added `softmax` with `nan` support to `utils.py`

## 0.1.2

2024.01.07

- added vectorization from dict of scores
- added priority fusion
- added `priority` fusion

## 0.1.1

Expand All @@ -16,9 +29,7 @@

## 0.1.0

2024.01.04

Initial release.
2024.01.04: initial release

- `basic`, `voter` and `borda` (simple and tournament-style) decision fusion methods.
- support function for vector scaling (`scale`).
Expand Down
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,26 @@ where predictors (classifiers) may have different label spaces.
Consequently, the library makes distinction between classes predicted with a low score (`0.0`)
and not predicted classes (`nan`).

## Vectorization

eFusor provides a `vectorize` function to do the vectorization
making distinction between predicted and not predicted classes.
The function expects a `list` of class labels
and a `dict` of prediction scores.

```python
from efusor import vectorize

labels = ["A", "B", "C", "D"]
scores = {"A": 0.75, "B": 0.25, "C": 0.00}

vector = vectorize(labels, scores)
# array([0.75, 0.25, 0. , nan])
```

The function supports scores input as a vector, a matrix or a tensor.
That is a dict, a list of dicts or a list of lists of dicts.

## Fusion Methods

### Basic Fusion Methods
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def read(path):
setup(
name='efusor',
url='https://github.com/esrel/efusor',
version='0.1.2',
version='0.1.4',
author='Evgeny A. Stepanov',
author_email='stepanov.evgeny.a@gmail.com',
description='Extended Decision Fusion',
Expand Down
13 changes: 13 additions & 0 deletions src/efusor/fusor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,15 @@
from efusor.basic import apply
from efusor.borda import borda
from efusor.priority import prioritize
from efusor.utils import softmax


def fuse(tensor: list | np.ndarray,
method: str = "hard_voting",
weights: list | np.ndarray = None,
*,
cutoff: float = None,
scaled: bool = False,
digits: int = None
) -> list:
"""
Expand All @@ -27,12 +31,20 @@ def fuse(tensor: list | np.ndarray,
:type method: str, optional
:param weights: predictor weights; defaults to None
:type weights: np.ndarray, optional
:param cutoff: prediction cut-off threshold; defaults to None
:type cutoff: float, optional
:param scaled: if to re-scale final scores (softmax); defaults to False
:type scaled: bool, optional
:param digits: rounding precision; defaults to None
:type digits: int, optional
:return: fused scores
:rtype: np.ndarray
"""
tensor = np.array(tensor) if isinstance(tensor, list) else tensor

if cutoff:
tensor[tensor < cutoff] = np.nan

weights = np.array(weights) if isinstance(weights, list) else weights

if method in {"hard_voting", "soft_voting", "majority_voting"}:
Expand All @@ -48,6 +60,7 @@ def fuse(tensor: list | np.ndarray,
else:
raise ValueError(f"unsupported fusion method: {method}")

result = np.apply_along_axis(softmax, -1, result) if scaled else result
result = np.round(result, decimals=digits) if digits else result

return result.tolist()
7 changes: 5 additions & 2 deletions src/efusor/scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ def scale(vector: np.ndarray) -> np.ndarray:
:return: vector
:rtype: np.ndarray
"""
if not vector.any() or vector.min() == vector.max():
if np.isnan(vector).all():
return vector

return (vector - np.min(vector)) / (np.max(vector) - np.min(vector))
if np.nanmin(vector) == np.nanmax(vector):
return vector

return (vector - np.nanmin(vector)) / (np.nanmax(vector) - np.nanmin(vector))
11 changes: 11 additions & 0 deletions src/efusor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,14 @@ def batch(*vector: np.ndarray) -> np.ndarray:
:rtype: np.ndarray
"""
return np.stack(vector)


def softmax(vector: np.ndarray) -> np.ndarray:
"""
numerically stable softmax with nan support
:param vector: predictions scores (not probability)
:type vector: np.ndarray
:return: softmax
:rtype: np.ndarray
"""
return np.exp(vector - np.nanmax(vector))/np.nansum(np.exp(vector - np.nanmax(vector)))
41 changes: 41 additions & 0 deletions tests/test_fusor.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,44 @@ def test_fuse_scalar(scores: list, weights: list) -> None:
for scalar in vector:
with pytest.raises(IndexError):
fuse(np.array(scalar), method="hard_voting", weights=np.array(weights))


def test_fusor_cutoff(scores: list) -> None:
"""
test cutoff
:param scores: prediction scores
:type scores: list
"""
cutoff_max = [[0.7, 0.3, 0.5],
[0.4, 0.4, 0.6],
[0.4, 0.7, np.nan],
[0.3, 0.3, 1.0],
[np.nan, np.nan, np.nan]]

result = fuse(np.array(scores), method="max", cutoff=0.1)
assert np.array_equal(np.array(result), np.array(cutoff_max), equal_nan=True)


def test_fusor_scaled(scores: list) -> None:
"""
test softmax
:param scores: prediction scores
:type scores: list
"""
scaled_max = [[0.4, 0.27, 0.33],
[0.31, 0.31, 0.38],
[0.33, 0.45, 0.22],
[0.25, 0.25, 0.5],
[0.33, 0.33, 0.33]]

cutoff_max = [[0.4, 0.27, 0.33],
[0.31, 0.31, 0.38],
[0.43, 0.57, np.nan],
[0.25, 0.25, 0.5],
[np.nan, np.nan, np.nan]]

result = fuse(np.array(scores), method="max", scaled=True, digits=2)
assert np.array_equal(np.array(result), np.array(scaled_max), equal_nan=True)

result = fuse(np.array(scores), method="max", cutoff=0.1, scaled=True, digits=2)
assert np.array_equal(np.array(result), np.array(cutoff_max), equal_nan=True)
12 changes: 11 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np

from efusor.utils import batch, vectorize
from efusor.utils import batch, vectorize, softmax


def test_batch_tensor(scores: list) -> None:
Expand Down Expand Up @@ -89,3 +89,13 @@ def test_vectorize_vector(scores: list) -> None:
for j, vector in enumerate(matrix):
assert np.array_equal(vectorize(labels, vector),
np.array(scores[i][j]), equal_nan=True)


def test_softmax() -> None:
""" test softmax """
# test normal
assert softmax(np.array([0.25, -0.25, 0.0])).sum() == 1.0
# test large number
assert softmax(np.array([999, 100, 0.0])).sum() == 1.0
# test nan support
assert np.nansum(softmax(np.array([0.75, 0.25, 0.0, np.nan]))) == 1.0

0 comments on commit 7541e31

Please sign in to comment.