Skip to content

Commit

Permalink
Fix Catboost tuning
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasMeissnerDS committed Jan 20, 2025
1 parent 5b96c69 commit 0f6e1c1
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 4 deletions.
6 changes: 4 additions & 2 deletions bluecast/config/training_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,9 @@ def __init__(
catboost_eval_metric_tune_direction: str = "minimize",
):
if bootstrap_type is None:
bootstrap_type = ["Bayesian", "Poisson", "MVS", "No"]
bootstrap_type = [
"Bayesian"
] # Poisson not possible on CPU, "MVS" requires min samples
if grow_policy is None:
grow_policy = ["SymmetricTree"]

Expand Down Expand Up @@ -613,7 +615,7 @@ def __init__(
catboost_eval_metric_tune_direction: str = "minimize",
):
if bootstrap_type is None:
bootstrap_type = ["Bayesian", "Poisson", "MVS", "No"]
bootstrap_type = ["Bayesian", "Poisson", "MVS"]
if grow_policy is None:
grow_policy = ["SymmetricTree"]

Expand Down
9 changes: 8 additions & 1 deletion bluecast/ml_modelling/catboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@ def objective(trial):
"objective": self.conf_catboost.catboost_objective,
"eval_metric": self.conf_catboost.catboost_eval_metric,
"random_seed": self.conf_training.global_random_state,
# Some typical CatBoost hyperparameters:
"learning_rate": trial.suggest_float(
"learning_rate",
self.conf_catboost.learning_rate_min,
Expand Down Expand Up @@ -228,6 +227,10 @@ def objective(trial):
log=True,
),
}
if params["bootstrap_type"] == "Bayesian":
params["bagging_temperature"] = None
params["subsample"] = None

params = {**params, **train_on}

sample_weight_choice = trial.suggest_categorical(
Expand Down Expand Up @@ -380,6 +383,10 @@ def objective(trial):
}
final_best_params = {**final_best_params, **train_on}

if final_best_params["bootstrap_type"] == "Bayesian":
final_best_params.pop("subsample", None)
final_best_params.pop("bagging_temperature", None)

final_best_params = update_params_with_best_params(
final_best_params, best_param
)
Expand Down
9 changes: 9 additions & 0 deletions bluecast/ml_modelling/catboost_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,11 @@ def objective(trial):
log=True,
),
}

if params["bootstrap_type"] == "Bayesian":
params["bagging_temperature"] = None
params["subsample"] = None

params = {**params, **train_on}

train_pool = Pool(x_train, label=y_train, cat_features=self.cat_columns)
Expand Down Expand Up @@ -354,6 +359,10 @@ def objective(trial):
# Merge device or other settings
final_best_params = {**final_best_params, **train_on}

if final_best_params["bootstrap_type"] == "Bayesian":
final_best_params.pop("subsample", None)
final_best_params.pop("bagging_temperature", None)

# Optionally apply a custom function to finalize best params
final_best_params = update_params_with_best_params(
final_best_params, catboost_best_param
Expand Down
66 changes: 65 additions & 1 deletion bluecast/tests/test_catboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,71 @@ def test_bluecast_without_hyperparam_tuning():
# Create an instance of the BlueCast class with the custom model
bluecast = BlueCast(
class_problem="binary",
ml_model=CatboostModel(class_problem='binary', conf_training=train_config, conf_catboost=catboost_pram_config),
ml_model=CatboostModel(
class_problem="binary",
conf_training=train_config,
conf_catboost=catboost_pram_config,
),
conf_xgboost=catboost_pram_config,
conf_training=train_config,
)

# Create some sample data for testing
x_train = pd.DataFrame(
{
"feature1": [i for i in range(20)],
"feature2": [i for i in range(20)],
"feature3": [i for i in range(20)],
"feature4": [i for i in range(20)],
"feature5": [i for i in range(20)],
"feature6": [i for i in range(20)],
}
)
y_train = pd.Series([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
x_test = pd.DataFrame(
{
"feature1": [i for i in range(10)],
"feature2": [i for i in range(10)],
"feature3": [i for i in range(10)],
"feature4": [i for i in range(10)],
"feature5": [i for i in range(10)],
"feature6": [i for i in range(10)],
}
)

x_train["target"] = y_train

# Fit the BlueCast model using the custom model
bluecast.fit(x_train, "target")

# Predict on the test data using the custom model
predicted_probas, predicted_classes = bluecast.predict(x_test)

# Assert the expected results
assert isinstance(predicted_probas, np.ndarray)
assert isinstance(predicted_classes, np.ndarray)
print(bluecast.experiment_tracker.experiment_id)
assert (
len(bluecast.experiment_tracker.experiment_id) == 0
) # due to custom model and fit method


def test_bluecast_with_hyperparam_tuning():
train_config = TrainingConfig()
train_config.hyperparameter_tuning_rounds = 10
train_config.hypertuning_cv_folds = 2
train_config.autotune_model = True

catboost_pram_config = CatboostTuneParamsConfig()

# Create an instance of the BlueCast class with the custom model
bluecast = BlueCast(
class_problem="binary",
ml_model=CatboostModel(
class_problem="binary",
conf_training=train_config,
conf_catboost=catboost_pram_config,
),
conf_xgboost=catboost_pram_config,
conf_training=train_config,
)
Expand Down

0 comments on commit 0f6e1c1

Please sign in to comment.