Skip to content

Commit

Permalink
Update test comparision with sklearn library
Browse files Browse the repository at this point in the history
  • Loading branch information
thieu1995 committed Feb 23, 2024
1 parent a0deec4 commit 1f43bef
Show file tree
Hide file tree
Showing 3 changed files with 276 additions and 3 deletions.
102 changes: 99 additions & 3 deletions tests/test_comparisons/test_sklearn_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,105 @@
# Github: https://github.com/thieu1995 %
# --------------------------------------------------%

import numpy as np
from permetrics import ClassificationMetric
import pytest
from sklearn.metrics import accuracy_score, f1_score, fbeta_score, precision_score, recall_score

from sklearn.metrics import log_loss
print(log_loss(["spam", "ham", "ham", "spam"],
[[.1, .9], [.9, .1], [.8, .2], [.35, .65]]))

@pytest.fixture(scope="module") # scope: Call only 1 time at the beginning
def data():
y_true1 = np.array([0, 1, 0, 0, 1, 0, 0, 1, 1, 0])
y_pred1 = np.array([0, 1, 0, 0, 0, 1, 0, 0, 0, 1])
cm1 = ClassificationMetric(y_true=y_true1, y_pred=y_pred1)

# Example one-hot encoded y_true and y_pred
y_true2 = np.array([[0, 1, 0], # Class 1
[1, 0, 0], # Class 0
[0, 0, 1], # Class 2
[0, 1, 0], # Class 1
[0, 0, 1]]) # Class 2
y_pred2 = np.array([[0.1, 0.8, 0.1], # Predicted probabilities for Class 1, Class 0, Class 2
[0.7, 0.2, 0.1],
[0.2, 0.3, 0.5],
[0.3, 0.6, 0.1],
[0.1, 0.2, 0.7]])
cm2 = ClassificationMetric(y_true=y_true2, y_pred=y_pred2)

y_true3 = np.array([0, 1, 2, 0, 2]) # Class 2
y_pred3 = np.array([[0.1, 0.8, 0.1], # Predicted probabilities for Class 1, Class 0, Class 2
[0.7, 0.2, 0.1],
[0.2, 0.3, 0.5],
[0.3, 0.6, 0.1],
[0.1, 0.2, 0.7]])
cm3 = ClassificationMetric(y_true=y_true3, y_pred=y_pred3)
return (y_true1, y_pred1), (y_true2, y_pred2), (y_true3, y_pred3), cm1, cm2, cm3


def test_AS(data):
(y_true1, y_pred1), (y_true2, y_pred2), (y_true3, y_pred3), cm1, cm2, cm3 = data
res11 = cm1.PS(average="micro")
res12 = accuracy_score(y_true1, y_pred1)
assert res11 == res12

# res21 = cm2.PS(average="micro")
# res22 = accuracy_score(y_true2, y_pred2) # ValueError: Classification metrics can't handle a mix of multiclass and continuous-multioutput targets
# assert res21 == res22

# res31 = cm3.PS(average="micro")
# res32 = accuracy_score(y_true3, y_pred3) # ValueError: Classification metrics can't handle a mix of multiclass and continuous-multioutput targets
# assert res31 == res32


# avg_paras = [None, "macro", "micro", "weighted"]
# outs = (dict, float, float, float)
#
# for idx, avg in enumerate(avg_paras):
# for cm in data:
# res = cm.PS(average=avg)
# assert isinstance(res, outs[idx])


def test_F1S(data):
(y_true1, y_pred1), (y_true2, y_pred2), (y_true3, y_pred3), cm1, cm2, cm3 = data
res11 = cm1.F1S(average="micro")
res12 = f1_score(y_true1, y_pred1, average="micro")
assert res11 == res12

res11 = cm1.F1S(average="macro")
res12 = f1_score(y_true1, y_pred1, average="macro")
assert res11 == res12


def test_FBS(data):
(y_true1, y_pred1), (y_true2, y_pred2), (y_true3, y_pred3), cm1, cm2, cm3 = data
res11 = cm1.FBS(average="micro", beta=1.5)
res12 = fbeta_score(y_true1, y_pred1, average="micro", beta=1.5)
assert res11 == res12

res11 = cm1.FBS(average="macro", beta=2.0)
res12 = fbeta_score(y_true1, y_pred1, average="macro", beta=2.0)
assert res11 == res12


def test_PS(data):
(y_true1, y_pred1), (y_true2, y_pred2), (y_true3, y_pred3), cm1, cm2, cm3 = data
res11 = cm1.PS(average="micro")
res12 = precision_score(y_true1, y_pred1, average="micro")
assert res11 == res12

res11 = cm1.PS(average="macro")
res12 = precision_score(y_true1, y_pred1, average="macro")
assert res11 == res12


def test_RS(data):
(y_true1, y_pred1), (y_true2, y_pred2), (y_true3, y_pred3), cm1, cm2, cm3 = data
res11 = cm1.RS(average="micro")
res12 = recall_score(y_true1, y_pred1, average="micro")
assert res11 == res12

res11 = cm1.RS(average="macro")
res12 = recall_score(y_true1, y_pred1, average="macro")
assert res11 == res12

109 changes: 109 additions & 0 deletions tests/test_comparisons/test_sklearn_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env python
# Created by "Thieu" at 16:47, 23/02/2024 ----------%
# Email: nguyenthieu2102@gmail.com %
# Github: https://github.com/thieu1995 %
# --------------------------------------------------%

import numpy as np
from permetrics import ClusteringMetric
from sklearn.metrics import mutual_info_score, normalized_mutual_info_score, \
adjusted_rand_score, rand_score, \
completeness_score, homogeneity_score, v_measure_score, \
fowlkes_mallows_score, calinski_harabasz_score, davies_bouldin_score
import pytest

np.random.seed(42)


def is_close_enough(x1, x2, eps=1e-5):
if abs(x1 - x2) <= eps:
return True
return False


@pytest.fixture(scope="module")
def internal_data():
# generate sample data
X = np.random.uniform(-1, 10, size=(300, 6))
y_pred = np.random.randint(0, 3, size=300)
evaluator = ClusteringMetric(y_pred=y_pred, X=X, force_finite=True)
return (X, y_pred), evaluator


@pytest.fixture(scope="module")
def external_data():
# generate sample data
y_true = np.random.randint(0, 3, size=300)
y_pred = np.random.randint(0, 3, size=300)
evaluator = ClusteringMetric(y_true=y_true, y_pred=y_pred, force_finite=True)
return (y_true, y_pred), evaluator


def test_MIS(external_data):
(y_true, y_pred), cm = external_data
res1 = cm.MIS()
res2 = mutual_info_score(y_true, y_pred)
assert is_close_enough(res1, res2)


def test_NMIS(external_data):
(y_true, y_pred), cm = external_data
res1 = cm.NMIS()
res2 = normalized_mutual_info_score(y_true, y_pred)
assert is_close_enough(res1, res2)


def test_RaS(external_data):
(y_true, y_pred), cm = external_data
res1 = cm.RaS()
res2 = rand_score(y_true, y_pred)
assert is_close_enough(res1, res2)


def test_ARS(external_data):
(y_true, y_pred), cm = external_data
res1 = cm.ARS()
res2 = adjusted_rand_score(y_true, y_pred)
assert is_close_enough(res1, res2)


def test_CS(external_data):
(y_true, y_pred), cm = external_data
res1 = cm.CS()
res2 = completeness_score(y_true, y_pred)
assert is_close_enough(res1, res2)


def test_HS(external_data):
(y_true, y_pred), cm = external_data
res1 = cm.HS()
res2 = homogeneity_score(y_true, y_pred)
assert is_close_enough(res1, res2)


def test_VMS(external_data):
(y_true, y_pred), cm = external_data
res1 = cm.VMS()
res2 = v_measure_score(y_true, y_pred)
assert is_close_enough(res1, res2)


def test_FMS(external_data):
(y_true, y_pred), cm = external_data
res1 = cm.FMS()
res2 = fowlkes_mallows_score(y_true, y_pred)
assert is_close_enough(res1, res2)


def test_CHI(internal_data):
(y_true, y_pred), cm = internal_data
res1 = cm.CHI()
res2 = calinski_harabasz_score(y_true, y_pred)
assert is_close_enough(res1, res2)


def test_DBI(internal_data):
(y_true, y_pred), cm = internal_data
res1 = cm.DBI()
res2 = davies_bouldin_score(y_true, y_pred)
assert is_close_enough(res1, res2)
68 changes: 68 additions & 0 deletions tests/test_comparisons/test_sklearn_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env python
# Created by "Thieu" at 18:21, 22/02/2024 ----------%
# Email: nguyenthieu2102@gmail.com %
# Github: https://github.com/thieu1995 %
# --------------------------------------------------%

import numpy as np
from permetrics import RegressionMetric
from sklearn.metrics import explained_variance_score, max_error, mean_absolute_error, \
mean_squared_error, median_absolute_error, r2_score, mean_absolute_percentage_error
import pytest


@pytest.fixture(scope="module") # scope: Call only 1 time at the beginning
def data():
y_true = np.array([3, -0.5, 2, 7, 5, 3, 4, -3, 10])
y_pred = np.array([2.5, 0.0, 2, 8, 5, 2, 3.5, -4, 9])
rm = RegressionMetric(y_true=y_true, y_pred=y_pred)
return y_true, y_pred, rm


def test_EVS(data):
y_true, y_pred, rm = data
res11 = rm.EVS()
res12 = explained_variance_score(y_true, y_pred)
assert res11 == res12


def test_ME(data):
y_true, y_pred, rm = data
res11 = rm.ME()
res12 = max_error(y_true, y_pred)
assert res11 == res12


def test_MAE(data):
y_true, y_pred, rm = data
res11 = rm.MAE()
res12 = mean_absolute_error(y_true, y_pred)
assert res11 == res12


def test_MSE(data):
y_true, y_pred, rm = data
res11 = rm.MSE()
res12 = mean_squared_error(y_true, y_pred)
assert res11 == res12


def test_MedAE(data):
y_true, y_pred, rm = data
res11 = rm.MedAE()
res12 = median_absolute_error(y_true, y_pred)
assert res11 == res12


def test_R2(data):
y_true, y_pred, rm = data
res11 = rm.R2()
res12 = r2_score(y_true, y_pred)
assert res11 == res12


def test_MAPE(data):
y_true, y_pred, rm = data
res11 = rm.MAPE()
res12 = mean_absolute_percentage_error(y_true, y_pred)
assert res11 == res12

0 comments on commit 1f43bef

Please sign in to comment.