Skip to content

Commit

Permalink
feat: set learning rate of Gradient Boosting models (#253)
Browse files Browse the repository at this point in the history
Closes #168.

### Summary of Changes

Add `learning_rate` parameter to `GradientBoosting` classifier and
regressor.

---------

Co-authored-by: Lars Reimann <mail@larsreimann.com>
  • Loading branch information
alex-senger and lars-reimann authored Apr 28, 2023
1 parent 8eea3dd commit 9ffaf55
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,27 @@


class GradientBoosting(Classifier):
"""Gradient boosting classification."""
"""Gradient boosting classification.
def __init__(self) -> None:
Parameters
----------
learning_rate : float
The larger the value, the more the model is influenced by each additional tree. If the learning rate is too
low, the model might underfit. If the learning rate is too high, the model might overfit.
Raises
------
ValueError
If `learning_rate` is non-positive.
"""

def __init__(self, learning_rate: float = 0.1) -> None:
self._wrapped_classifier: sk_GradientBoostingClassifier | None = None
self._feature_names: list[str] | None = None
self._target_name: str | None = None
if learning_rate <= 0:
raise ValueError("learning_rate must be positive.")
self._learning_rate = learning_rate

def fit(self, training_set: TaggedTable) -> GradientBoosting:
"""
Expand All @@ -41,10 +56,10 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
LearningError
If the training data contains invalid values or if the training failed.
"""
wrapped_classifier = sk_GradientBoostingClassifier()
wrapped_classifier = sk_GradientBoostingClassifier(learning_rate=self._learning_rate)
fit(wrapped_classifier, training_set)

result = GradientBoosting()
result = GradientBoosting(learning_rate=self._learning_rate)
result._wrapped_classifier = wrapped_classifier
result._feature_names = training_set.features.column_names
result._target_name = training_set.target.name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,27 @@


class GradientBoosting(Regressor):
"""Gradient boosting regression."""
"""Gradient boosting regression.
def __init__(self) -> None:
Parameters
----------
learning_rate : float
The larger the value, the more the model is influenced by each additional tree. If the learning rate is too
low, the model might underfit. If the learning rate is too high, the model might overfit.
Raises
------
ValueError
If `learning_rate` is non-positive.
"""

def __init__(self, learning_rate: float = 0.1) -> None:
self._wrapped_regressor: sk_GradientBoostingRegressor | None = None
self._feature_names: list[str] | None = None
self._target_name: str | None = None
if learning_rate <= 0:
raise ValueError("learning_rate must be positive.")
self._learning_rate = learning_rate

def fit(self, training_set: TaggedTable) -> GradientBoosting:
"""
Expand All @@ -41,10 +56,10 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
LearningError
If the training data contains invalid values or if the training failed.
"""
wrapped_regressor = sk_GradientBoostingRegressor()
wrapped_regressor = sk_GradientBoostingRegressor(learning_rate=self._learning_rate)
fit(wrapped_regressor, training_set)

result = GradientBoosting()
result = GradientBoosting(learning_rate=self._learning_rate)
result._wrapped_regressor = wrapped_regressor
result._feature_names = training_set.features.column_names
result._target_name = training_set.target.name
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pytest
from safeds.data.tabular.containers import Table
from safeds.ml.classical.classification import GradientBoosting


def test_should_throw_value_error_if_learning_rate_is_non_positive() -> None:
with pytest.raises(ValueError, match="learning_rate must be positive."):
GradientBoosting(learning_rate=-1)


def test_should_pass_learning_rate_to_sklearn() -> None:
training_set = Table.from_dict({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]})
tagged_table = training_set.tag_columns("col1")

regressor = GradientBoosting(learning_rate=2).fit(tagged_table)
assert regressor._wrapped_classifier is not None
assert regressor._wrapped_classifier.learning_rate == regressor._learning_rate
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pytest
from safeds.data.tabular.containers import Table
from safeds.ml.classical.regression import GradientBoosting


def test_should_throw_value_error_if_learning_rate_is_non_positive() -> None:
with pytest.raises(ValueError, match="learning_rate must be positive."):
GradientBoosting(learning_rate=-1)


def test_should_pass_learning_rate_to_sklearn() -> None:
training_set = Table.from_dict({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]})
tagged_table = training_set.tag_columns("col1")

regressor = GradientBoosting(learning_rate=2).fit(tagged_table)
assert regressor._wrapped_regressor is not None
assert regressor._wrapped_regressor.learning_rate == regressor._learning_rate

0 comments on commit 9ffaf55

Please sign in to comment.