forked from eatamath/Research-Project--Biology
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathServerTesting.py
93 lines (76 loc) · 2.47 KB
/
ServerTesting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from module.prepare import *
from itertools import product
from sklearn.externals import joblib
# import pygraphviz
def LGBTuning(Xtrain,Ytrain):
clf = lgb.LGBMClassifier(objective='binary',
silent=False,
verbose=1,
random_state=seed,
n_jobs=4,
# class_weight
)
gridParams = {
# step 1
# 'learning_rate': [0.01,0.05,0.1],
# 'boosting_type':['gbdt','goss'],
# 'n_estimators': [50,200,500],
# 'num_iterations':[200,400,1000],
# step 1 fixed
'learning_rate': [0.1], ### 0.1
'boosting_type':['gbdt'], ### goss>gbdt
'n_estimators': [300],
'num_iterations':[2000], ### 2000
# step 2
'num_leaves': [680], ### 680
# 'max_bin':[127,255,511],
# step 2 fixed
# 'num_leaves': [800],
'max_bin':[256],
# step 3
# 'max_depth':[7,8,9,10], ### missed
'colsample_bytree' : [0.8], ### 0.8
'subsample_freq':[1,2,3],
'subsample' : [0.6,0.8,1],
'reg_alpha' : [0,0.1,0.5],
'reg_lambda' : [0,0.1,0.5],
}
print('default params\n',clf.get_params())
grid = GridSearchCV(clf, gridParams,
scoring='roc_auc',
# refit=False,
verbose=3,
cv=5,
n_jobs=1)
grid.fit(Xtrain,Ytrain)
return grid
#### configure
hyper_params = GetConfigure()
num_hyper_params = len(hyper_params)
generalize_ratio = 0.3
test_ratio = 0.3
cv = 1
mi_use = True
tuning_mode = True
if tuning_mode:
cv = 1
cv_results = []
#### main
[data,T] = ReadData()
# for i in
for batch in range(cv):
if mi_use==True:
arr = ToMatrix(data,'sparse')
[X_train,X_test,Y_train,Y_test] = MutualInformationFeatureSelection2(arr,data,generalize_ratio)
[X_train,X_test,Y_train,Y_test] = \
RandomForestDimensionalityReduction(X_train,X_test,Y_train,Y_test)
else:
[X,Y] = ToMatrix(data,'dense')
[X_train,X_test,Y_train,Y_test] = SplitDataset(X,Y,generalize_ratio)
[X_train,X_test,Y_train,Y_test] = \
RandomForestDimensionalityReduction(X_train,X_test,Y_train,Y_test)
if tuning_mode:
[Xtrain,Ytrain] = merge_train_test(X_train,X_test,Y_train,Y_test)
grid = LGBTuning(Xtrain,Ytrain)
cv_results.append(grid)
joblib.dump(grid,'./result-temp/2-19-lgb-grid-tune1.m')