Skip to content

Commit

Permalink
Additional xgboost refactorings
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasMeissnerDS committed Jan 17, 2025
1 parent e17ab0f commit e40211b
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 41 deletions.
28 changes: 28 additions & 0 deletions bluecast/ml_modelling/base_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import optuna
import pandas as pd
import xgboost as xgb
from sklearn.utils import class_weight

from bluecast.config.training_config import (
TrainingConfig,
Expand Down Expand Up @@ -166,6 +167,33 @@ def _load_experiment_tracker(self, experiment_tracker) -> None:
else:
self.experiment_tracker = experiment_tracker

def _create_d_matrices(self, x_train, y_train, x_test, y_test):
if self.conf_params_xgboost.sample_weight and self.class_problem in [
"binary",
"multiclass",
]:
classes_weights = class_weight.compute_sample_weight(
class_weight="balanced", y=y_train
)
d_train = xgb.DMatrix(
x_train,
label=y_train,
weight=classes_weights,
enable_categorical=self.conf_training.cat_encoding_via_ml_algorithm,
)
else:
d_train = xgb.DMatrix(
x_train,
label=y_train,
enable_categorical=self.conf_training.cat_encoding_via_ml_algorithm,
)
d_test = xgb.DMatrix(
x_test,
label=y_test,
enable_categorical=self.conf_training.cat_encoding_via_ml_algorithm,
)
return d_train, d_test

def concat_prepare_full_train_datasets(
self,
*,
Expand Down
28 changes: 2 additions & 26 deletions bluecast/ml_modelling/xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def fit(
y_test=y_test,
)

d_train, d_test = self.create_d_matrices(x_train, y_train, x_test, y_test)
d_train, d_test = self._create_d_matrices(x_train, y_train, x_test, y_test)
eval_set = [(d_test, "test")]

steps = self.conf_params_xgboost.params.pop("steps", 300)
Expand Down Expand Up @@ -390,30 +390,6 @@ def objective(trial):
"sample_weight"
]

def create_d_matrices(self, x_train, y_train, x_test, y_test):
if self.conf_params_xgboost.sample_weight:
classes_weights = class_weight.compute_sample_weight(
class_weight="balanced", y=y_train
)
d_train = xgb.DMatrix(
x_train,
label=y_train,
weight=classes_weights,
enable_categorical=self.conf_training.cat_encoding_via_ml_algorithm,
)
else:
d_train = xgb.DMatrix(
x_train,
label=y_train,
enable_categorical=self.conf_training.cat_encoding_via_ml_algorithm,
)
d_test = xgb.DMatrix(
x_test,
label=y_test,
enable_categorical=self.conf_training.cat_encoding_via_ml_algorithm,
)
return d_train, d_test

def train_single_fold_model(
self, d_train, d_test, y_test, param, steps, pruning_callback
):
Expand Down Expand Up @@ -573,7 +549,7 @@ def fine_tune(
logging.info("Start grid search fine tuning of Xgboost model.")

def objective(trial):
d_train, d_test = self.create_d_matrices(x_train, y_train, x_test, y_test)
d_train, d_test = self._create_d_matrices(x_train, y_train, x_test, y_test)

pruning_callback = optuna.integration.XGBoostPruningCallback(
trial, f"test-{self.conf_xgboost.xgboost_eval_metric}"
Expand Down
17 changes: 2 additions & 15 deletions bluecast/ml_modelling/xgboost_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def fit(
y_test=y_test,
)

d_train, d_test = self.create_d_matrices(x_train, y_train, x_test, y_test)
d_train, d_test = self._create_d_matrices(x_train, y_train, x_test, y_test)
eval_set = [(d_test, "test")]

steps = self.conf_params_xgboost.params.pop("steps", 300)
Expand Down Expand Up @@ -389,19 +389,6 @@ def objective(trial):
logging.info(f"Best params: {self.conf_params_xgboost.params}")
print(f"Best params: {self.conf_params_xgboost.params}")

def create_d_matrices(self, x_train, y_train, x_test, y_test):
d_train = xgb.DMatrix(
x_train,
label=y_train,
enable_categorical=self.conf_training.cat_encoding_via_ml_algorithm,
)
d_test = xgb.DMatrix(
x_test,
label=y_test,
enable_categorical=self.conf_training.cat_encoding_via_ml_algorithm,
)
return d_train, d_test

def train_single_fold_model(
self, d_train, d_test, y_test, param, steps, pruning_callback
):
Expand Down Expand Up @@ -549,7 +536,7 @@ def fine_tune(
logging.info("Start grid search fine tuning of Xgboost model.")

def objective(trial):
d_train, d_test = self.create_d_matrices(x_train, y_train, x_test, y_test)
d_train, d_test = self._create_d_matrices(x_train, y_train, x_test, y_test)

pruning_callback = optuna.integration.XGBoostPruningCallback(
trial, f"test-{self.conf_xgboost.xgboost_eval_metric}"
Expand Down

0 comments on commit e40211b

Please sign in to comment.