Skip to content

Commit

Permalink
Add catboost implementation skeleton
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasMeissnerDS committed Jan 19, 2025
1 parent 1ca3623 commit e0ba4ad
Show file tree
Hide file tree
Showing 4 changed files with 1,887 additions and 4 deletions.
241 changes: 237 additions & 4 deletions bluecast/config/training_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,7 @@ def dict(self):
return vars(self)


# TODO: CREATE BASE CLASS WITH VALIDATIONS FOR ALL FIELD and to_dict func
# TODO: MOVE LIST defaults into constructor


# Xgboost
class XgboostTuneParamsConfig:
"""Define hyperparameter tuning search space.
Expand Down Expand Up @@ -463,3 +460,239 @@ class XgboostRegressionFinalParamConfig:
}
sample_weight: Optional[Dict[str, float]] = None
classification_threshold: float = 999

# Catboost


class CatboostTuneParamsConfig:
"""Define hyperparameter tuning search space for CatBoost (classification or multiclass).
:param depth_min: Minimum value for the depth of the trees. Defaults to 1.
:param depth_max: Maximum value for the depth of the trees. Defaults to 10.
:param l2_leaf_reg_min: Minimum value for L2 regularization term (l2_leaf_reg). Defaults to 1e-8.
:param l2_leaf_reg_max: Maximum value for L2 regularization term (l2_leaf_reg). Defaults to 100.
:param bagging_temperature_min: Minimum value for bagging temperature when bootstrap_type='Bayesian'. Defaults to 0.0.
:param bagging_temperature_max: Maximum value for bagging temperature when bootstrap_type='Bayesian'. Defaults to 10.0.
:param random_strength_min: Minimum value for the random strength. Defaults to 0.0.
:param random_strength_max: Maximum value for the random strength. Defaults to 10.0.
:param subsample_min: Minimum value of subsample ratio of the training instances. Defaults to 0.1.
:param subsample_max: Maximum value of subsample ratio of the training instances. Defaults to 1.0.
:param border_count_min: Minimum value for the number of splits for numerical features. Defaults to 32.
:param border_count_max: Maximum value for the number of splits for numerical features. Defaults to 255.
:param learning_rate_min: Minimum value for learning rate. Defaults to 1e-3.
:param learning_rate_max: Maximum value for learning rate. Defaults to 0.3.
:param iterations_min: Minimum number of boosting rounds (iterations). Defaults to 1000.
:param iterations_max: Maximum number of boosting rounds (iterations). Defaults to 1000.
:param verbosity_during_hyperparameter_tuning: Verbosity level during hyperparameter tuning. Defaults to 0.
:param verbosity_during_final_model_training: Verbosity level during final model training. Defaults to 0.
:param bootstrap_type: List of bootstrap types to consider. Defaults to ["Bayesian", "Poisson", "MVS", "No"].
:param grow_policy: List of grow policies. Defaults to ["SymmetricTree"].
:param catboost_objective: CatBoost objective. Defaults to "MultiClass".
:param catboost_eval_metric: CatBoost evaluation metric. Defaults to "MultiClass".
:param catboost_eval_metric_tune_direction: Direction to tune the evaluation metric. Defaults to "minimize".
Must be any of ['minimize', 'maximize']
"""

@check_types_init
def __init__(
self,
depth_min: int = 1,
depth_max: int = 10,
l2_leaf_reg_min: float = 1e-8,
l2_leaf_reg_max: float = 100.0,
bagging_temperature_min: float = 0.0,
bagging_temperature_max: float = 10.0,
random_strength_min: float = 0.0,
random_strength_max: float = 10.0,
subsample_min: float = 0.1,
subsample_max: float = 1.0,
border_count_min: int = 32,
border_count_max: int = 255,
learning_rate_min: float = 1e-3,
learning_rate_max: float = 0.3,
iterations_min: int = 1000,
iterations_max: int = 1000,
verbosity_during_hyperparameter_tuning: int = 0,
verbosity_during_final_model_training: int = 0,
bootstrap_type: Optional[List[str]] = None,
grow_policy: Optional[List[str]] = None,
catboost_objective: str = "MultiClass",
catboost_eval_metric: str = "MultiClass",
catboost_eval_metric_tune_direction: str = "minimize",
):
if bootstrap_type is None:
bootstrap_type = ["Bayesian", "Poisson", "MVS", "No"]
if grow_policy is None:
grow_policy = ["SymmetricTree"]

Check warning on line 526 in bluecast/config/training_config.py

View check run for this annotation

Codecov / codecov/patch

bluecast/config/training_config.py#L523-L526

Added lines #L523 - L526 were not covered by tests

self.depth_min = depth_min
self.depth_max = depth_max
self.l2_leaf_reg_min = l2_leaf_reg_min
self.l2_leaf_reg_max = l2_leaf_reg_max
self.bagging_temperature_min = bagging_temperature_min
self.bagging_temperature_max = bagging_temperature_max
self.random_strength_min = random_strength_min
self.random_strength_max = random_strength_max
self.subsample_min = subsample_min
self.subsample_max = subsample_max
self.border_count_min = border_count_min
self.border_count_max = border_count_max
self.learning_rate_min = learning_rate_min
self.learning_rate_max = learning_rate_max
self.iterations_min = iterations_min
self.iterations_max = iterations_max
self.verbosity_during_hyperparameter_tuning = (

Check warning on line 544 in bluecast/config/training_config.py

View check run for this annotation

Codecov / codecov/patch

bluecast/config/training_config.py#L528-L544

Added lines #L528 - L544 were not covered by tests
verbosity_during_hyperparameter_tuning
)
self.verbosity_during_final_model_training = (

Check warning on line 547 in bluecast/config/training_config.py

View check run for this annotation

Codecov / codecov/patch

bluecast/config/training_config.py#L547

Added line #L547 was not covered by tests
verbosity_during_final_model_training
)
self.bootstrap_type = bootstrap_type
self.grow_policy = grow_policy
self.catboost_objective = catboost_objective
self.catboost_eval_metric = catboost_eval_metric
self.catboost_eval_metric_tune_direction = catboost_eval_metric_tune_direction

Check warning on line 554 in bluecast/config/training_config.py

View check run for this annotation

Codecov / codecov/patch

bluecast/config/training_config.py#L550-L554

Added lines #L550 - L554 were not covered by tests

def dict(self):
"""
Return dictionary with all class attributes.
The implementation keeps backwards compatibility as this class mimics a Pydantic BaseModel.
"""
return vars(self)

Check warning on line 562 in bluecast/config/training_config.py

View check run for this annotation

Codecov / codecov/patch

bluecast/config/training_config.py#L562

Added line #L562 was not covered by tests


class CatboostTuneParamsRegressionConfig:
"""Define hyperparameter tuning search space for CatBoost (regression).
:param depth_min: Minimum value for the depth of the trees. Defaults to 1.
:param depth_max: Maximum value for the depth of the trees. Defaults to 10.
:param l2_leaf_reg_min: Minimum value for L2 regularization term (l2_leaf_reg). Defaults to 1e-8.
:param l2_leaf_reg_max: Maximum value for L2 regularization term (l2_leaf_reg). Defaults to 100.
:param bagging_temperature_min: Minimum value for bagging temperature when bootstrap_type='Bayesian'. Defaults to 0.0.
:param bagging_temperature_max: Maximum value for bagging temperature when bootstrap_type='Bayesian'. Defaults to 10.0.
:param random_strength_min: Minimum value for the random strength. Defaults to 0.0.
:param random_strength_max: Maximum value for the random strength. Defaults to 10.0.
:param subsample_min: Minimum value of subsample ratio of the training instances. Defaults to 0.1.
:param subsample_max: Maximum value of subsample ratio of the training instances. Defaults to 1.0.
:param border_count_min: Minimum value for the number of splits for numerical features. Defaults to 32.
:param border_count_max: Maximum value for the number of splits for numerical features. Defaults to 255.
:param learning_rate_min: Minimum value for learning rate. Defaults to 1e-3.
:param learning_rate_max: Maximum value for learning rate. Defaults to 0.3.
:param iterations_min: Minimum number of boosting rounds (iterations). Defaults to 1000.
:param iterations_max: Maximum number of boosting rounds (iterations). Defaults to 1000.
:param verbosity_during_hyperparameter_tuning: Verbosity level during hyperparameter tuning. Defaults to 0.
:param verbosity_during_final_model_training: Verbosity level during final model training. Defaults to 0.
:param bootstrap_type: List of bootstrap types to consider. Defaults to ["Bayesian", "Poisson", "MVS", "No"].
:param grow_policy: List of grow policies. Defaults to ["SymmetricTree"].
:param catboost_objective: CatBoost objective. Defaults to "RMSE".
:param catboost_eval_metric: CatBoost evaluation metric. Defaults to "RMSE".
:param catboost_eval_metric_tune_direction: Direction to tune the evaluation metric. Defaults to "minimize".
Must be any of ['minimize', 'maximize']
"""

@check_types_init
def __init__(
self,
depth_min: int = 1,
depth_max: int = 10,
l2_leaf_reg_min: float = 1e-8,
l2_leaf_reg_max: float = 100.0,
bagging_temperature_min: float = 0.0,
bagging_temperature_max: float = 10.0,
random_strength_min: float = 0.0,
random_strength_max: float = 10.0,
subsample_min: float = 0.1,
subsample_max: float = 1.0,
border_count_min: int = 32,
border_count_max: int = 255,
learning_rate_min: float = 1e-3,
learning_rate_max: float = 0.3,
iterations_min: int = 1000,
iterations_max: int = 1000,
verbosity_during_hyperparameter_tuning: int = 0,
verbosity_during_final_model_training: int = 0,
bootstrap_type: Optional[List[str]] = None,
grow_policy: Optional[List[str]] = None,
catboost_objective: str = "RMSE",
catboost_eval_metric: str = "RMSE",
catboost_eval_metric_tune_direction: str = "minimize",
):
if bootstrap_type is None:
bootstrap_type = ["Bayesian", "Poisson", "MVS", "No"]
if grow_policy is None:
grow_policy = ["SymmetricTree"]

Check warning on line 624 in bluecast/config/training_config.py

View check run for this annotation

Codecov / codecov/patch

bluecast/config/training_config.py#L621-L624

Added lines #L621 - L624 were not covered by tests

self.depth_min = depth_min
self.depth_max = depth_max
self.l2_leaf_reg_min = l2_leaf_reg_min
self.l2_leaf_reg_max = l2_leaf_reg_max
self.bagging_temperature_min = bagging_temperature_min
self.bagging_temperature_max = bagging_temperature_max
self.random_strength_min = random_strength_min
self.random_strength_max = random_strength_max
self.subsample_min = subsample_min
self.subsample_max = subsample_max
self.border_count_min = border_count_min
self.border_count_max = border_count_max
self.learning_rate_min = learning_rate_min
self.learning_rate_max = learning_rate_max
self.iterations_min = iterations_min
self.iterations_max = iterations_max
self.verbosity_during_hyperparameter_tuning = (

Check warning on line 642 in bluecast/config/training_config.py

View check run for this annotation

Codecov / codecov/patch

bluecast/config/training_config.py#L626-L642

Added lines #L626 - L642 were not covered by tests
verbosity_during_hyperparameter_tuning
)
self.verbosity_during_final_model_training = (

Check warning on line 645 in bluecast/config/training_config.py

View check run for this annotation

Codecov / codecov/patch

bluecast/config/training_config.py#L645

Added line #L645 was not covered by tests
verbosity_during_final_model_training
)
self.bootstrap_type = bootstrap_type
self.grow_policy = grow_policy
self.catboost_objective = catboost_objective
self.catboost_eval_metric = catboost_eval_metric
self.catboost_eval_metric_tune_direction = catboost_eval_metric_tune_direction

Check warning on line 652 in bluecast/config/training_config.py

View check run for this annotation

Codecov / codecov/patch

bluecast/config/training_config.py#L648-L652

Added lines #L648 - L652 were not covered by tests

def dict(self):
"""
Return dictionary with all class attributes.
The implementation keeps backwards compatibility as this class mimics a Pydantic BaseModel.
"""
return vars(self)

Check warning on line 660 in bluecast/config/training_config.py

View check run for this annotation

Codecov / codecov/patch

bluecast/config/training_config.py#L660

Added line #L660 was not covered by tests


class CatboostFinalParamConfig:
"""Define final hyperparameters for CatBoost (classification or multiclass) using CatBoost defaults."""

params = {
"iterations": 1000, # default can vary in CatBoost, using 1000 for consistency
"depth": 6, # CatBoost default depth
"learning_rate": 0.03, # CatBoost default
"l2_leaf_reg": 3.0, # CatBoost default
"eval_metric": "MultiClass",
"loss_function": "MultiClass",
"random_seed": 0, # example default seed
"logging_level": "Silent",
# Other CatBoost parameters can be added here as needed.
}
sample_weight: Optional[Dict[str, float]] = None
classification_threshold: float = 0.5


class CatboostRegressionFinalParamConfig:
"""Define final hyperparameters for CatBoost (regression) using CatBoost defaults."""

params = {
"iterations": 1000, # default can vary in CatBoost, using 1000 for consistency
"depth": 6, # CatBoost default depth
"learning_rate": 0.03, # CatBoost default
"l2_leaf_reg": 3.0, # CatBoost default
"eval_metric": "RMSE",
"loss_function": "RMSE",
"random_seed": 0,
"logging_level": "Silent",
# Other CatBoost parameters can be added here as needed.
}
sample_weight: Optional[Dict[str, float]] = None
classification_threshold: float = (
999 # Not typically used in regression but kept for compatibility
)
Loading

0 comments on commit e0ba4ad

Please sign in to comment.