-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVgregression.py
113 lines (88 loc) · 4.69 KB
/
Vgregression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
#importing the dataset
Dataset = pd.read_csv('vgsales.csv')
#Label Encoding
from sklearn.preprocessing import LabelEncoder
number = LabelEncoder()
Dataset['Platform'] = number.fit_transform(Dataset['Platform'].astype('str'))
Dataset['Genre'] = number.fit_transform(Dataset['Genre'].astype('str'))
Dataset['Publisher'] = number.fit_transform(Dataset['Publisher'].astype('str'))
#extracting the feature vector and the dependant variable vector
columns = ["Platform", "Genre", "Publisher", "NA_Sales", "EU_Sales"]
y = Dataset["Global_Sales"].values
X = Dataset[list(columns)].values
#importing the linear model library
from sklearn import linear_model
regr = linear_model.LinearRegression()
#importing the train test split library and splitting data into 80% for training 20% for testing
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.2,random_state=0)
#scaling the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
# apply same transformation to test data
X_test = scaler.transform(X_test)
#fit the train data to the linear model
regr.fit(X_train, y_train)
#Printing Accuracy in our Linear model
Accuracy = regr.score(X_train, y_train)
print ("Accuracy in the training data with Linear Regression Model: ", Accuracy*100, "%")
accuracy = regr.score(X_test, y_test)
print ("Accuracy in the test data with Linear Regression model", accuracy*100, "%")
#Comparing the model predicted results vs the Test set
y_pred_Model1 = regr.predict(X_test)
y_pred_Model1
compare_Model1 = np.concatenate((y_pred_Model1.reshape(len(y_pred_Model1),1), y_test.reshape(len(y_test),1)),1)
compare_Model1
####*************************
#Using DecisionTreeRegressor :
from sklearn.tree import DecisionTreeRegressor
DTR = DecisionTreeRegressor()
DTR2 =DecisionTreeRegressor(min_samples_leaf=0.2)
DTR3 =DecisionTreeRegressor(min_samples_leaf=15)
DTR4 =DecisionTreeRegressor(min_samples_leaf=30)
DTR5 =DecisionTreeRegressor(min_samples_leaf=35)
DTR.fit(X_train, y_train)
DTR2.fit(X_train,y_train)
DTR3.fit(X_train,y_train)
DTR4.fit(X_train,y_train)
DTR5.fit(X_train,y_train)
#printing Accuracy in our DTR Model
Accuracy = DTR.score(X_train, y_train)
print ("Accuracy in the training data with Decision Tree Regression model before tuning : ", Accuracy*100, "%")
accuracy = DTR.score(X_test, y_test)
print ("Accuracy in the test data with Decision Tree Regression model before tuining : ", accuracy*100, "%")
##########################
Accuracy2 = DTR2.score(X_train, y_train)
print ("\nAccuracy in the training data with Decision Tree Regression model After tuning with min_samples_leaf=3 : ", Accuracy2*100, "%")
accuracy2 = DTR2.score(X_test, y_test)
print ("Accuracy in the test data with Decision Tree Regression model After tuning with min_samples_leaf=3 : ", accuracy2*100, "%")
##########################
Accuracy3 = DTR3.score(X_train, y_train)
print ("\nAccuracy in the training data with Decision Tree Regression model After tuning with min_samples_leaf=15 : ", Accuracy3*100, "%")
accuracy3 = DTR3.score(X_test, y_test)
print ("Accuracy in the test data with Decision Tree Regression model After tuning with min_samples_leaf=15 : ", accuracy3*100, "%")
##########################
Accuracy4 = DTR4.score(X_train, y_train)
print ("\nAccuracy in the training data with Decision Tree Regression model After tuning with min_samples_leaf=30 : ", Accuracy4*100, "%")
accuracy4 = DTR4.score(X_test, y_test)
print ("Accuracy in the test data with Decision Tree Regression model After tuning with min_samples_leaf=30 : ", accuracy4*100, "%")
##########################
Accuracy5 = DTR5.score(X_train, y_train)
print ("\nAccuracy in the training data with Decision Tree Regression model After tuning with min_samples_leaf=35 : ", Accuracy5*100, "%")
accuracy5 = DTR5.score(X_test, y_test)
print ("Accuracy in the test data with Decision Tree Regression model After tuning with min_samples_leaf=35 : ", accuracy5*100, "%")
##########################
#Comparing the model predicted results vs the Test set
y_pred_Model2 = DTR.predict(X_test)
y_pred_Model2
y_pred_Model2_Tuned = DTR5.predict(X_test)
y_pred_Model2_Tuned
compare_Model2 = np.concatenate((y_pred_Model2.reshape(len(y_pred_Model2),1), y_test.reshape(len(y_test),1)),1)
compare_Model2
compare_Model2_Tuned = np.concatenate((y_pred_Model2_Tuned.reshape(len(y_pred_Model2_Tuned),1), y_test.reshape(len(y_test),1)),1)
compare_Model2_Tuned