-
Notifications
You must be signed in to change notification settings - Fork 0
/
Metrics.py
155 lines (109 loc) · 3.41 KB
/
Metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import pandas as pd
import numpy as np
import multiprocessing as mp
# def new_function(
# arg1: type,
# arg2: type
# ):
# '''
# Lorem ipsum
#
# Parameters:
#
# * param (type): Lorem ipsum
#
# Output:
#
# * output (type): Lorem ipsum
# '''
#
# pass
def accuracy_score(
targs: np.array,
preds: np.array,
wghts: np.array = None,
):
'''
Determines the accuracy of a given set of predictions
Parameters:
* targs (array-like): A list of the real target outcomes
* preds (array-like): A list of the predictions made
Optional:
* wghts (array-like): A list of weights used to calculate a weighted
error
Output:
* (float): A number in [0, 1] representing the accuracy of the predictions
against the model
'''
if wghts is None:
wghts = np.zeros(len(targs)) + 1
hits: np.array = (np.array(targs) == np.array(preds))
hits = hits.astype(int)
accuracy = np.dot(hits, wghts) / sum(wghts)
return accuracy
def wt_unique(
elements: np.array,
weights: np.array
):
'''
Calculates the unique values in an array, along with their weighted
count.
Parameters:
* target (ndarray): a vector of output classes
* weights (ndarray): a vector of sample weights
Output:
* (float): a float between 0 and 1 representing the order of the
output vector
'''
uniques = np.unique(elements)
e_weights = np.zeros(len(uniques))
for i, e in enumerate(uniques):
filter = (elements == e).astype(int)
filt_wts = weights * filter
e_weights[i] = np.sum(filt_wts)
return uniques, e_weights
def maj_err(target, weights):
'''
Calculates the majority error within a given output data column
Parameters:
* target (ndarray): a vector of output classes
Output:
* (float): a float between 0 and 1 representing the order of the
output vector
'''
e_weights = wt_unique(target, weights)[1]
return 1 - (e_weights.max() / sum(weights))
def gini(target, weights):
'''
Calculates the gini error within a given output data column
Parameters:
* target (ndarray): a vector of output classes
Output:
* (float): a float between 0 and 1 representing the order of the
output vector
'''
e_weights = wt_unique(target, weights)[1]
wt_sum = sum(e_weights)
return 1 - sum( [(wt / wt_sum)**2 for wt in e_weights] )
def entropy(target, weights):
'''
Calculates the entropy within a given output data column
Parameters:
* target (ndarray): a vector of output classes
Output:
* (float): a float between 0 and 1 representing the order of the
output vector
'''
# Get the individual classes and counts
# for every unique element of the target
_, e_weights = wt_unique(target, weights)
# Calculate the probability of each class
# by dividing all the counts by the length
# of the vector
probs = e_weights / sum(weights)
# Start an accumulation loop for the entropy
ent = 0
for p in probs:
ent += p * np.log2(p)
# Flip the sign and return
return -1 * ent