-
Notifications
You must be signed in to change notification settings - Fork 0
/
confusion.py
108 lines (85 loc) · 3.19 KB
/
confusion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""Confusion Matrix for categorical data
2019 Colin Dietrich
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
class Matrix:
"""Calculate a Confusion Matrix
Parameters
----------
y : array-like, true labels
p : array-like, predicted labels of same type and length as y
Attributes
----------
y : see above
p : see above
df : Pandas DataFrame, aligned y and p data
dfg : Pandas DataFrame, grouped by (y, p) combinations and counts
a : Numpy array, confusion matrix values
df_cm : Pandas DataFrame, confusion matrix values with row/column labels
"""
def __init__(self, y, p):
assert type(y) == type(p)
assert len(y) == len(p)
self.categorical = False
if isinstance(y[0], str):
self.categorical = True
if self.categorical:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(y)
y = le.transform(y)
p = le.transform(p)
self.y = y
self.p = p
self.df = pd.DataFrame({'y':self.y, 'p':self.p})
self.n_classes = len(self.df.y.unique())
self.labels = range(self.n_classes)
if self.categorical:
self.labels = le.inverse_transform(self.labels)
self.a = np.zeros((self.n_classes, self.n_classes))
self.dfg = self.df.groupby(['p','y']).size().reset_index().rename(columns={0:'n'})
_ = self.dfg.apply(lambda x: self.assemble_cm(x), axis=1)
self.df_cm = pd.DataFrame(self.a, self.labels, self.labels)
self.cmap = 'Greens'
def assemble_cm(self, row):
"""Assemble a confusion matrix
Parameters
----------
row : row in a Pandas DataFrame with columns 'y_true', 'y_pred', 'count'
'y_true' = true value of input data sample
'y_pred' = predicted value of input data sample
'n' = number of times this prediction combination occurred
a : numpy array, filled with zeros of shape n x n where n is the number
of unique classes in the whole input data set
"""
y = row.y
p = row.p
self.a[y, p] = self.a[y, p] + row.n
def plot(self, font_scale=1.4, axis_labels=True, ticklabels=False,
figsize=5, **kwargs):
"""Plot a Confusion Matrix
Parameters
----------
font_scale : float, font scale multiplier
axis_labels : bool, show axis labels
ticklabels : bool, show ticklabels
figsize : float, size in inches to make square confusion matrix
**kwargs : keyword arguments to pass to seaborn.heatmap method
"""
xticklabels=False
yticklabels=False
if ticklabels:
xticklabels=self.labels
yticklabels=self.labels
fig, ax = plt.subplots(figsize=(figsize, figsize))
sns.set(font_scale=font_scale)
sns.heatmap(self.df_cm,
xticklabels=xticklabels, yticklabels=xticklabels,
square=True, ax=ax, **kwargs);
if axis_labels:
plt.ylabel("True label")
plt.xlabel("Predicted label")
plt.show();