-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDecision_tree.py
122 lines (84 loc) · 2.32 KB
/
Decision_tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
# coding: utf-8
# In[23]:
import pandas as pd
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
# In[6]:
#Loading the file
df = pd.read_excel(r'D:\Books\Project\Draft1_fabien\leak-exp-day2\Exact\release2a.xlsx')
s1 = df.iloc[:,0]
s2 = df.iloc[:,1]
s3 = df.iloc[:,2]
s4 = df.iloc[:,3]
s5 = df.iloc[:,4]
s6 = df.iloc[:,5]
s7 = df.iloc[:,6]
s8 = df.iloc[:,7]
s9 = df.iloc[:,8]
s10 = df.iloc[:,9]
s11 = df.iloc[:,10]
s12 = df.iloc[:,11]
s13 = df.iloc[:,12]
s14 = df.iloc[:,13]
s15 = df.iloc[:,14]
s16 = df.iloc[:,15]
s17 = df.iloc[:,16]
s18 = df.iloc[:,17]
s19 = df.iloc[:,18]
s20 = df.iloc[:,19]
status = df.iloc[:,20]
# In[7]:
#creating features and labels
n_features = list(zip(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16,s17,s18,s19,s20))
n_samples = status
# In[8]:
#Decision tree regression
clf = tree.DecisionTreeRegressor()
#spliting of data
X_train, X_test, y_train, y_test = train_test_split(n_features,n_samples, test_size=0.5,random_state=0)
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.fit_transform(X_test)
#train model
clf.fit(X_train,y_train)
#prediction
y_pred = clf.predict(X_test_std)
print('percentage Accuracy:',100*metrics.accuracy_score(y_test,y_pred))
# In[9]:
#false prediction
print((y_test != y_pred).sum(),'/',((y_test==y_pred).sum()+(y_test != y_pred).sum()))
# In[22]:
#Graph ploting Features vs Predict values
k = []
for i in range(0,len(X_test)):
k.append(i+1)
plt.figure(figsize = (20,8))
plt.xlim(0,150)
sns.lineplot(y = y_pred, x = k,label = "y_pred",color = 'red')
sns.lineplot(y = y_test, x = k,label ="y_test",color = 'blue')
plt.legend()
plt.show()
# In[29]:
#3d plot
fig = plt.figure(figsize = (20,8))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(y_pred, y_test,k, c='r', marker='o')
ax.set_xlabel('Predicted Value')
ax.set_ylabel('Test Value')
ax.set_zlabel('Feature Index')
plt.show()
# In[57]:
plt.figure(figsize = (20,8))
plt.plot(k, y_pred, c='r', label='y_pred',linestyle = 'dotted',linewidth = 6.0)
plt.plot(k, y_test, c='b', label='y_test')
plt.xlim(40,120)
plt.legend()
plt.show()
# In[ ]: