-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathMIRCO_run.py
95 lines (79 loc) · 3.58 KB
/
MIRCO_run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# -*- coding: utf-8 -*-
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
import RuleCoverDatasets as RCDS
from MIRCO import MIRCO
# Test problems
problems = [RCDS.banknote, RCDS.ILPD, RCDS.ionosphere,
RCDS.transfusion, RCDS.liver, RCDS.tictactoe,
RCDS.wdbc, RCDS.mammography, RCDS.diabetes,
RCDS.oilspill, RCDS.phoneme, RCDS.seeds, RCDS.wine,
RCDS.glass, RCDS.ecoli]
fname = 'MIRCO_results.txt'
crit = 'gini'
randomstate = 25
for problem in problems:
pname = problem.__name__.upper()
print(pname)
df = np.array(problem('datasets/'))
X = df[:, 0:-1]
y = df[:, -1]
# Initializing Classifiers
DTestimator = DecisionTreeClassifier(random_state=randomstate, criterion=crit)
RFestimator = RandomForestClassifier(random_state=randomstate, criterion=crit)
# Setting up the parameter grids
DT_pgrid = {'max_depth': [5, 10, 20]}
RF_pgrid = {'max_depth': [5, 10, 20],
'n_estimators': [10, 50, 100]}
scores = {'DT': [], 'RF': [], 'MIRCO': []}
nofrules = {'DT': [], 'RF': [], 'MIRCO': []}
fracofmissed = []
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=randomstate)
foldnum = 0
for train_index, test_index in skf.split(X, y):
foldnum += 1
print('Fold number: ', foldnum)
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
inner_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=randomstate)
for pgrid, est, name in zip((DT_pgrid, RF_pgrid),
(DTestimator, RFestimator),
('DT', 'RF')):
gcv = GridSearchCV(estimator=est,
param_grid=pgrid,
scoring='accuracy',
n_jobs=1,
cv=inner_cv,
verbose=0,
refit=True)
gcv_fit = gcv.fit(X_train, y_train)
# Evaluate with the best estimator
gcv_pred = gcv_fit.best_estimator_.predict(X_test)
scores[name].append(accuracy_score(gcv_pred, y_test))
if (name == 'DT'):
nofrules[name].append(gcv_fit.best_estimator_.tree_.n_leaves)
else: # RF
# Only the results with the heuristic are reported
solver = 'heu'
MIRCO_estimator = MIRCO(gcv_fit.best_estimator_, solver)
mirco_fit = MIRCO_estimator.fit(X_train, y_train)
MIRCO_pred = mirco_fit.predict(X_test)
scores['MIRCO'].append(accuracy_score(MIRCO_pred, y_test))
nofrules['MIRCO'].append(mirco_fit.numOfRules)
fracofmissed.append(mirco_fit.numOfMissed/len(y_test))
# Rules in RF are already returned by MIRCO
nofrules[name].append(mirco_fit.initNumOfRules)
with open(fname, 'a') as f:
print('--->', file=f)
print(pname, file=f)
print('Accuracy Scores:', file=f)
print(scores, file=f)
print('Number of Rules:', file=f)
print(nofrules, file=f)
print('Fractions of Missed Points by MIRCO:', file=f)
print(fracofmissed, file=f)
print('<---\n', file=f)