Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nan softmax, etc. #3

Merged
merged 8 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
# ChangeLog

## 0.1.4

2024.01.12

- added `cutoff` threshold to `fuse`
- added `scaled` kwarg to `fuse` to apply `softmax` to fused vectors

## 0.1.3

2024.01.09

- added `softmax` with `nan` support to `utils.py`

## 0.1.2

2024.01.07

- added vectorization from dict of scores
- added priority fusion
- added `priority` fusion

## 0.1.1

Expand All @@ -16,9 +29,7 @@

## 0.1.0

2024.01.04

Initial release.
2024.01.04: initial release

- `basic`, `voter` and `borda` (simple and tournament-style) decision fusion methods.
- support function for vector scaling (`scale`).
Expand Down
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,26 @@ where predictors (classifiers) may have different label spaces.
Consequently, the library makes distinction between classes predicted with a low score (`0.0`)
and not predicted classes (`nan`).

## Vectorization

eFusor provides a `vectorize` function to do the vectorization
making distinction between predicted and not predicted classes.
The function expects a `list` of class labels
and a `dict` of prediction scores.

```python
from efusor import vectorize

labels = ["A", "B", "C", "D"]
scores = {"A": 0.75, "B": 0.25, "C": 0.00}

vector = vectorize(labels, scores)
# array([0.75, 0.25, 0. , nan])
```

The function supports scores input as a vector, a matrix or a tensor.
That is a dict, a list of dicts or a list of lists of dicts.

## Fusion Methods

### Basic Fusion Methods
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def read(path):
setup(
name='efusor',
url='https://github.com/esrel/efusor',
version='0.1.2',
version='0.1.4',
author='Evgeny A. Stepanov',
author_email='stepanov.evgeny.a@gmail.com',
description='Extended Decision Fusion',
Expand Down
13 changes: 13 additions & 0 deletions src/efusor/fusor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,15 @@
from efusor.basic import apply
from efusor.borda import borda
from efusor.priority import prioritize
from efusor.utils import softmax


def fuse(tensor: list | np.ndarray,
method: str = "hard_voting",
weights: list | np.ndarray = None,
*,
cutoff: float = None,
scaled: bool = False,
digits: int = None
) -> list:
"""
Expand All @@ -27,12 +31,20 @@ def fuse(tensor: list | np.ndarray,
:type method: str, optional
:param weights: predictor weights; defaults to None
:type weights: np.ndarray, optional
:param cutoff: prediction cut-off threshold; defaults to None
:type cutoff: float, optional
:param scaled: if to re-scale final scores (softmax); defaults to False
:type scaled: bool, optional
:param digits: rounding precision; defaults to None
:type digits: int, optional
:return: fused scores
:rtype: np.ndarray
"""
tensor = np.array(tensor) if isinstance(tensor, list) else tensor

if cutoff:
tensor[tensor < cutoff] = np.nan

weights = np.array(weights) if isinstance(weights, list) else weights

if method in {"hard_voting", "soft_voting", "majority_voting"}:
Expand All @@ -48,6 +60,7 @@ def fuse(tensor: list | np.ndarray,
else:
raise ValueError(f"unsupported fusion method: {method}")

result = np.apply_along_axis(softmax, -1, result) if scaled else result
result = np.round(result, decimals=digits) if digits else result

return result.tolist()
7 changes: 5 additions & 2 deletions src/efusor/scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ def scale(vector: np.ndarray) -> np.ndarray:
:return: vector
:rtype: np.ndarray
"""
if not vector.any() or vector.min() == vector.max():
if np.isnan(vector).all():
return vector

return (vector - np.min(vector)) / (np.max(vector) - np.min(vector))
if np.nanmin(vector) == np.nanmax(vector):
return vector

return (vector - np.nanmin(vector)) / (np.nanmax(vector) - np.nanmin(vector))
11 changes: 11 additions & 0 deletions src/efusor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,14 @@ def batch(*vector: np.ndarray) -> np.ndarray:
:rtype: np.ndarray
"""
return np.stack(vector)


def softmax(vector: np.ndarray) -> np.ndarray:
"""
numerically stable softmax with nan support
:param vector: predictions scores (not probability)
:type vector: np.ndarray
:return: softmax
:rtype: np.ndarray
"""
return np.exp(vector - np.nanmax(vector))/np.nansum(np.exp(vector - np.nanmax(vector)))
41 changes: 41 additions & 0 deletions tests/test_fusor.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,44 @@ def test_fuse_scalar(scores: list, weights: list) -> None:
for scalar in vector:
with pytest.raises(IndexError):
fuse(np.array(scalar), method="hard_voting", weights=np.array(weights))


def test_fusor_cutoff(scores: list) -> None:
"""
test cutoff
:param scores: prediction scores
:type scores: list
"""
cutoff_max = [[0.7, 0.3, 0.5],
[0.4, 0.4, 0.6],
[0.4, 0.7, np.nan],
[0.3, 0.3, 1.0],
[np.nan, np.nan, np.nan]]

result = fuse(np.array(scores), method="max", cutoff=0.1)
assert np.array_equal(np.array(result), np.array(cutoff_max), equal_nan=True)


def test_fusor_scaled(scores: list) -> None:
"""
test softmax
:param scores: prediction scores
:type scores: list
"""
scaled_max = [[0.4, 0.27, 0.33],
[0.31, 0.31, 0.38],
[0.33, 0.45, 0.22],
[0.25, 0.25, 0.5],
[0.33, 0.33, 0.33]]

cutoff_max = [[0.4, 0.27, 0.33],
[0.31, 0.31, 0.38],
[0.43, 0.57, np.nan],
[0.25, 0.25, 0.5],
[np.nan, np.nan, np.nan]]

result = fuse(np.array(scores), method="max", scaled=True, digits=2)
assert np.array_equal(np.array(result), np.array(scaled_max), equal_nan=True)

result = fuse(np.array(scores), method="max", cutoff=0.1, scaled=True, digits=2)
assert np.array_equal(np.array(result), np.array(cutoff_max), equal_nan=True)
12 changes: 11 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np

from efusor.utils import batch, vectorize
from efusor.utils import batch, vectorize, softmax


def test_batch_tensor(scores: list) -> None:
Expand Down Expand Up @@ -89,3 +89,13 @@ def test_vectorize_vector(scores: list) -> None:
for j, vector in enumerate(matrix):
assert np.array_equal(vectorize(labels, vector),
np.array(scores[i][j]), equal_nan=True)


def test_softmax() -> None:
""" test softmax """
# test normal
assert softmax(np.array([0.25, -0.25, 0.0])).sum() == 1.0
# test large number
assert softmax(np.array([999, 100, 0.0])).sum() == 1.0
# test nan support
assert np.nansum(softmax(np.array([0.75, 0.25, 0.0, np.nan]))) == 1.0