-
Notifications
You must be signed in to change notification settings - Fork 178
/
catboost_simple.py
62 lines (45 loc) · 2.04 KB
/
catboost_simple.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""
Optuna example that optimizes a classifier configuration for cancer dataset using
Catboost.
In this example, we optimize the validation accuracy of cancer detection using
Catboost. We optimize both the choice of booster model and their hyperparameters.
"""
import numpy as np
import optuna
import catboost as cb
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
def objective(trial):
data, target = load_breast_cancer(return_X_y=True)
train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.3)
param = {
"objective": trial.suggest_categorical("objective", ["Logloss", "CrossEntropy"]),
"colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1),
"depth": trial.suggest_int("depth", 1, 12),
"boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
"bootstrap_type": trial.suggest_categorical(
"bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]
),
"used_ram_limit": "3gb",
}
if param["bootstrap_type"] == "Bayesian":
param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
elif param["bootstrap_type"] == "Bernoulli":
param["subsample"] = trial.suggest_float("subsample", 0.1, 1)
gbm = cb.CatBoostClassifier(**param)
gbm.fit(train_x, train_y, eval_set=[(valid_x, valid_y)], verbose=0, early_stopping_rounds=100)
preds = gbm.predict(valid_x)
pred_labels = np.rint(preds)
accuracy = accuracy_score(valid_y, pred_labels)
return accuracy
if __name__ == "__main__":
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, timeout=600)
print("Number of finished trials: {}".format(len(study.trials)))
print("Best trial:")
trial = study.best_trial
print(" Value: {}".format(trial.value))
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))