Skip to content

Commit

Permalink
Update Catboost tuning
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasMeissnerDS committed Jan 20, 2025
1 parent 0f6e1c1 commit a10289f
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 14 deletions.
10 changes: 8 additions & 2 deletions bluecast/blueprints/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import pandas as pd

from bluecast.config.training_config import (
CatboostFinalParamConfig,
CatboostTuneParamsConfig,
TrainingConfig,
XgboostFinalParamConfig,
XgboostTuneParamsConfig,
Expand Down Expand Up @@ -97,8 +99,12 @@ def __init__(
Union[BoostaRootaWrapper, CustomPreprocessing]
] = None,
conf_training: Optional[TrainingConfig] = None,
conf_xgboost: Optional[XgboostTuneParamsConfig] = None,
conf_params_xgboost: Optional[XgboostFinalParamConfig] = None,
conf_xgboost: Optional[
Union[XgboostTuneParamsConfig, CatboostTuneParamsConfig]
] = None,
conf_params_xgboost: Optional[
Union[XgboostFinalParamConfig, CatboostFinalParamConfig]
] = None,
experiment_tracker: Optional[ExperimentTracker] = None,
single_fold_eval_metric_func: Optional[ClassificationEvalWrapper] = None,
):
Expand Down
10 changes: 8 additions & 2 deletions bluecast/blueprints/cast_cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from bluecast.blueprints.cast import BlueCast
from bluecast.config.training_config import (
CatboostFinalParamConfig,
CatboostTuneParamsConfig,
TrainingConfig,
XgboostFinalParamConfig,
XgboostTuneParamsConfig,
Expand Down Expand Up @@ -58,8 +60,12 @@ def __init__(
cat_columns: Optional[List[Union[str, float, int]]] = None,
stratifier: Optional[Any] = None,
conf_training: Optional[TrainingConfig] = None,
conf_xgboost: Optional[XgboostTuneParamsConfig] = None,
conf_params_xgboost: Optional[XgboostFinalParamConfig] = None,
conf_xgboost: Optional[
Union[XgboostTuneParamsConfig, CatboostTuneParamsConfig]
] = None,
conf_params_xgboost: Optional[
Union[XgboostFinalParamConfig, CatboostFinalParamConfig]
] = None,
experiment_tracker: Optional[ExperimentTracker] = None,
custom_in_fold_preprocessor: Optional[CustomPreprocessing] = None,
custom_last_mile_computation: Optional[CustomPreprocessing] = None,
Expand Down
10 changes: 8 additions & 2 deletions bluecast/blueprints/cast_cv_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

from bluecast.blueprints.cast_regression import BlueCastRegression
from bluecast.config.training_config import (
CatboostRegressionFinalParamConfig,
CatboostTuneParamsRegressionConfig,
TrainingConfig,
XgboostRegressionFinalParamConfig,
XgboostTuneParamsRegressionConfig,
Expand Down Expand Up @@ -61,8 +63,12 @@ def __init__(
cat_columns: Optional[List[Union[str, float, int]]] = None,
stratifier: Optional[Any] = None,
conf_training: Optional[TrainingConfig] = None,
conf_xgboost: Optional[XgboostTuneParamsRegressionConfig] = None,
conf_params_xgboost: Optional[XgboostRegressionFinalParamConfig] = None,
conf_xgboost: Optional[
Union[XgboostTuneParamsRegressionConfig, CatboostTuneParamsRegressionConfig]
] = None,
conf_params_xgboost: Optional[
Union[XgboostRegressionFinalParamConfig, CatboostRegressionFinalParamConfig]
] = None,
experiment_tracker: Optional[ExperimentTracker] = None,
custom_in_fold_preprocessor: Optional[CustomPreprocessing] = None,
custom_last_mile_computation: Optional[CustomPreprocessing] = None,
Expand Down
10 changes: 8 additions & 2 deletions bluecast/blueprints/cast_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from sklearn.metrics import mean_squared_error

from bluecast.config.training_config import (
CatboostRegressionFinalParamConfig,
CatboostTuneParamsRegressionConfig,
TrainingConfig,
XgboostRegressionFinalParamConfig,
XgboostTuneParamsRegressionConfig,
Expand Down Expand Up @@ -93,8 +95,12 @@ def __init__(
Union[BoostaRootaWrapper, CustomPreprocessing]
] = None,
conf_training: Optional[TrainingConfig] = None,
conf_xgboost: Optional[XgboostTuneParamsRegressionConfig] = None,
conf_params_xgboost: Optional[XgboostRegressionFinalParamConfig] = None,
conf_xgboost: Optional[
Union[XgboostTuneParamsRegressionConfig, CatboostTuneParamsRegressionConfig]
] = None,
conf_params_xgboost: Optional[
Union[XgboostRegressionFinalParamConfig, CatboostRegressionFinalParamConfig]
] = None,
experiment_tracker: Optional[ExperimentTracker] = None,
single_fold_eval_metric_func: Optional[RegressionEvalWrapper] = None,
):
Expand Down
5 changes: 3 additions & 2 deletions bluecast/config/training_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,8 @@ def __init__(
):
if bootstrap_type is None:
bootstrap_type = [
"Bayesian"
"Bayesian",
"No",
] # Poisson not possible on CPU, "MVS" requires min samples
if grow_policy is None:
grow_policy = ["SymmetricTree"]
Expand Down Expand Up @@ -615,7 +616,7 @@ def __init__(
catboost_eval_metric_tune_direction: str = "minimize",
):
if bootstrap_type is None:
bootstrap_type = ["Bayesian", "Poisson", "MVS"]
bootstrap_type = ["Bayesian", "No"] # "Poisson", "MVS"
if grow_policy is None:
grow_policy = ["SymmetricTree"]

Expand Down
4 changes: 2 additions & 2 deletions bluecast/ml_modelling/catboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def objective(trial):
log=True,
),
}
if params["bootstrap_type"] == "Bayesian":
if params["bootstrap_type"] in ["Bayesian", "No"]:
params["bagging_temperature"] = None
params["subsample"] = None

Expand Down Expand Up @@ -383,7 +383,7 @@ def objective(trial):
}
final_best_params = {**final_best_params, **train_on}

if final_best_params["bootstrap_type"] == "Bayesian":
if final_best_params["bootstrap_type"] in ["Bayesian", "No"]:
final_best_params.pop("subsample", None)
final_best_params.pop("bagging_temperature", None)

Expand Down
4 changes: 2 additions & 2 deletions bluecast/ml_modelling/catboost_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def objective(trial):
),
}

if params["bootstrap_type"] == "Bayesian":
if params["bootstrap_type"] in ["Bayesian", "No"]:
params["bagging_temperature"] = None
params["subsample"] = None

Expand Down Expand Up @@ -359,7 +359,7 @@ def objective(trial):
# Merge device or other settings
final_best_params = {**final_best_params, **train_on}

if final_best_params["bootstrap_type"] == "Bayesian":
if final_best_params["bootstrap_type"] in ["Bayesian", "No"]:
final_best_params.pop("subsample", None)
final_best_params.pop("bagging_temperature", None)

Expand Down
129 changes: 129 additions & 0 deletions bluecast/tests/test_catboost_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import numpy as np
import pandas as pd

from bluecast.blueprints.cast_regression import BlueCastRegression
from bluecast.config.training_config import (
CatboostTuneParamsRegressionConfig,
TrainingConfig,
)
from bluecast.ml_modelling.catboost_regression import CatboostModelRegression


def test_BlueCastRegression_without_hyperparam_tuning():
train_config = TrainingConfig()
train_config.hyperparameter_tuning_rounds = 10
train_config.hypertuning_cv_folds = 2
train_config.autotune_model = False

catboost_pram_config = CatboostTuneParamsRegressionConfig()

# Create an instance of the BlueCastRegression class with the custom model
bluecast = BlueCastRegression(
class_problem="binary",
ml_model=CatboostModelRegression(
class_problem="regression",
conf_training=train_config,
conf_catboost=catboost_pram_config,
),
conf_xgboost=catboost_pram_config,
conf_training=train_config,
)

# Create some sample data for testing
x_train = pd.DataFrame(
{
"feature1": [i for i in range(20)],
"feature2": [i for i in range(20)],
"feature3": [i for i in range(20)],
"feature4": [i for i in range(20)],
"feature5": [i for i in range(20)],
"feature6": [i for i in range(20)],
}
)
y_train = pd.Series([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
x_test = pd.DataFrame(
{
"feature1": [i for i in range(10)],
"feature2": [i for i in range(10)],
"feature3": [i for i in range(10)],
"feature4": [i for i in range(10)],
"feature5": [i for i in range(10)],
"feature6": [i for i in range(10)],
}
)

x_train["target"] = y_train

# Fit the BlueCastRegression model using the custom model
bluecast.fit(x_train, "target")

# Predict on the test data using the custom model
predicted_probas, predicted_classes = bluecast.predict(x_test)

# Assert the expected results
assert isinstance(predicted_probas, np.ndarray)
assert isinstance(predicted_classes, np.ndarray)
print(bluecast.experiment_tracker.experiment_id)
assert (
len(bluecast.experiment_tracker.experiment_id) == 0
) # due to custom model and fit method


def test_BlueCastRegression_with_hyperparam_tuning():
train_config = TrainingConfig()
train_config.hyperparameter_tuning_rounds = 10
train_config.hypertuning_cv_folds = 2
train_config.autotune_model = True

catboost_pram_config = CatboostTuneParamsRegressionConfig()

# Create an instance of the BlueCastRegression class with the custom model
bluecast = BlueCastRegression(
class_problem="regression",
ml_model=CatboostModelRegression(
class_problem="regression",
conf_training=train_config,
conf_catboost=catboost_pram_config,
),
conf_xgboost=catboost_pram_config,
conf_training=train_config,
)

# Create some sample data for testing
x_train = pd.DataFrame(
{
"feature1": [i for i in range(20)],
"feature2": [i for i in range(20)],
"feature3": [i for i in range(20)],
"feature4": [i for i in range(20)],
"feature5": [i for i in range(20)],
"feature6": [i for i in range(20)],
}
)
y_train = pd.Series([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
x_test = pd.DataFrame(
{
"feature1": [i for i in range(10)],
"feature2": [i for i in range(10)],
"feature3": [i for i in range(10)],
"feature4": [i for i in range(10)],
"feature5": [i for i in range(10)],
"feature6": [i for i in range(10)],
}
)

x_train["target"] = y_train

# Fit the BlueCastRegression model using the custom model
bluecast.fit(x_train, "target")

# Predict on the test data using the custom model
predicted_probas, predicted_classes = bluecast.predict(x_test)

# Assert the expected results
assert isinstance(predicted_probas, np.ndarray)
assert isinstance(predicted_classes, np.ndarray)
print(bluecast.experiment_tracker.experiment_id)
assert (
len(bluecast.experiment_tracker.experiment_id) == 0
) # due to custom model and fit method

0 comments on commit a10289f

Please sign in to comment.