-
Notifications
You must be signed in to change notification settings - Fork 1
/
example2.py
95 lines (65 loc) · 2.55 KB
/
example2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""
Example of using CMRC with some of the common classification datasets.
"""
import numpy as np
import warnings
import time
from sklearn.model_selection import StratifiedKFold
from sklearn.impute import SimpleImputer
from sklearn import preprocessing
from minimax_risk_classifiers.CMRC import CMRC
#import the datasets
from datasets import *
#data sets
loaders = [load_mammographic, load_haberman, load_indian_liver, load_diabetes, load_credit]
dataName= ["mammographic", "haberman", "indian_liver", "diabetes", "credit"]
def runCMRC(phi, loss):
random_seed = 0
res_mean = np.zeros(len(dataName))
res_std = np.zeros(len(dataName))
np.random.seed(random_seed)
# Iterate through each of the dataset and fit the MRC classfier.
for j, load in enumerate(loaders):
# Loading the dataset
X, Y = load(return_X_y=True)
r = len(np.unique(Y))
n, d= X.shape
# Print the dataset name
print(" ############## \n" + dataName[j] + " n= " + str(n) + " , d= " + str(d) + ", cardY= "+ str(r))
clf = CMRC(r=r, phi=phi, loss=loss)
# Preprocess
trans = SimpleImputer(strategy='median')
X = trans.fit_transform(X, Y)
# Generate the partitions of the stratified cross-validation
cv = StratifiedKFold(n_splits=10, random_state=random_seed)
np.random.seed(random_seed)
cvError= list()
auxTime= 0
# Paired and stratified cross-validation
for train_index, test_index in cv.split(X, Y):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = Y[train_index], Y[test_index]
# Normalizing the data
std_scale = preprocessing.StandardScaler().fit(X_train, y_train)
X_train = std_scale.transform(X_train)
X_test = std_scale.transform(X_test)
# Save start time for computing training time
startTime= time.time()
clf.fit(X_train, y_train)
# Calculate the training time
auxTime+= time.time() - startTime
y_pred= clf.predict(X_test)
cvError.append(np.average(y_pred != y_test))
res_mean[j] = np.average(cvError)
res_std[j] = np.std(cvError)
print(" error= " + ":\t" + str(res_mean[j]) + "\t+/-\t" + str(res_std[j]) +
"\navg_train_time= " + ":\t" + str(auxTime/10) + ' secs' +
"\n ############## \n\n\n")
if __name__ == '__main__':
# Supress the warnings
warnings.simplefilter('ignore')
print('******************** Example 2 (CMRC with the additional marginal constraints) ********************** \n\n')
print('\t\t 1. Using 0-1 loss and threshold feature mapping\n\n')
runCMRC(phi='threshold', loss='0-1')
print('\t\t 2. Using log loss and threshold feature mapping\n\n')
runCMRC(phi='threshold', loss='log')