From 766f2ff2a6d68098be3e858ad12bf9e509e5f192 Mon Sep 17 00:00:00 2001
From: Alexander <47296670+Marsmaennchen221@users.noreply.github.com>
Date: Fri, 5 May 2023 14:43:31 +0200
Subject: [PATCH] feat: Added parameter `number_of_trees` to `GradientBoosting`
 (#268)

Closes #170.

### Summary of Changes

Added parameter `number_of_trees` to `GradientBoosting`

---------

Co-authored-by: alex-senger <91055000+alex-senger@users.noreply.github.com>
Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
Co-authored-by: Lars Reimann <mail@larsreimann.com>
---
 .../classification/_gradient_boosting.py      | 20 ++++++++++++++-----
 .../regression/_gradient_boosting.py          | 20 ++++++++++++++-----
 .../classification/test_gradient_boosting.py  | 17 +++++++++++++++-
 .../regression/test_gradient_boosting.py      | 17 +++++++++++++++-
 4 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/src/safeds/ml/classical/classification/_gradient_boosting.py b/src/safeds/ml/classical/classification/_gradient_boosting.py
index d82c7d8d3..f8c5e6501 100644
--- a/src/safeds/ml/classical/classification/_gradient_boosting.py
+++ b/src/safeds/ml/classical/classification/_gradient_boosting.py
@@ -13,10 +13,14 @@
 
 
 class GradientBoosting(Classifier):
-    """Gradient boosting classification.
+    """
+    Gradient boosting classification.
 
     Parameters
     ----------
+    number_of_trees: int
+        The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large
+        number usually results in better performance.
     learning_rate : float
         The larger the value, the more the model is influenced by each additional tree. If the learning rate is too
         low, the model might underfit. If the learning rate is too high, the model might overfit.
@@ -24,15 +28,18 @@ class GradientBoosting(Classifier):
     Raises
     ------
     ValueError
-        If `learning_rate` is non-positive.
+        If `learning_rate` is non-positive or the `number_of_trees` is less than or equal to 0.
     """
 
-    def __init__(self, learning_rate: float = 0.1) -> None:
+    def __init__(self, number_of_trees: int = 100, learning_rate: float = 0.1) -> None:
         # Validation
+        if number_of_trees <= 0:
+            raise ValueError("The number of boosting stages to perform has to be greater than 0.")
         if learning_rate <= 0:
             raise ValueError("The learning rate has to be greater than 0.")
 
         # Hyperparameters
+        self._number_of_trees = number_of_trees
         self._learning_rate = learning_rate
 
         # Internal state
@@ -61,10 +68,13 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
         LearningError
             If the training data contains invalid values or if the training failed.
         """
-        wrapped_classifier = sk_GradientBoostingClassifier(learning_rate=self._learning_rate)
+        wrapped_classifier = sk_GradientBoostingClassifier(
+            n_estimators=self._number_of_trees,
+            learning_rate=self._learning_rate,
+        )
         fit(wrapped_classifier, training_set)
 
-        result = GradientBoosting(learning_rate=self._learning_rate)
+        result = GradientBoosting(number_of_trees=self._number_of_trees, learning_rate=self._learning_rate)
         result._wrapped_classifier = wrapped_classifier
         result._feature_names = training_set.features.column_names
         result._target_name = training_set.target.name
diff --git a/src/safeds/ml/classical/regression/_gradient_boosting.py b/src/safeds/ml/classical/regression/_gradient_boosting.py
index 314a66468..1b8b92f60 100644
--- a/src/safeds/ml/classical/regression/_gradient_boosting.py
+++ b/src/safeds/ml/classical/regression/_gradient_boosting.py
@@ -13,10 +13,14 @@
 
 
 class GradientBoosting(Regressor):
-    """Gradient boosting regression.
+    """
+    Gradient boosting regression.
 
     Parameters
     ----------
+    number_of_trees: int
+        The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large
+        number usually results in better performance.
     learning_rate : float
         The larger the value, the more the model is influenced by each additional tree. If the learning rate is too
         low, the model might underfit. If the learning rate is too high, the model might overfit.
@@ -24,15 +28,18 @@ class GradientBoosting(Regressor):
     Raises
     ------
     ValueError
-        If `learning_rate` is non-positive.
+        If `learning_rate` is non-positive or the `number_of_trees` is less than or equal to 0.
     """
 
-    def __init__(self, learning_rate: float = 0.1) -> None:
+    def __init__(self, number_of_trees: int = 100, learning_rate: float = 0.1) -> None:
         # Validation
+        if number_of_trees <= 0:
+            raise ValueError("The number of boosting stages to perform has to be greater than 0.")
         if learning_rate <= 0:
             raise ValueError("The learning rate has to be greater than 0.")
 
         # Hyperparameters
+        self._number_of_trees = number_of_trees
         self._learning_rate = learning_rate
 
         # Internal state
@@ -61,10 +68,13 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
         LearningError
             If the training data contains invalid values or if the training failed.
         """
-        wrapped_regressor = sk_GradientBoostingRegressor(learning_rate=self._learning_rate)
+        wrapped_regressor = sk_GradientBoostingRegressor(
+            n_estimators=self._number_of_trees,
+            learning_rate=self._learning_rate,
+        )
         fit(wrapped_regressor, training_set)
 
-        result = GradientBoosting(learning_rate=self._learning_rate)
+        result = GradientBoosting(number_of_trees=self._number_of_trees, learning_rate=self._learning_rate)
         result._wrapped_regressor = wrapped_regressor
         result._feature_names = training_set.features.column_names
         result._target_name = training_set.target.name
diff --git a/tests/safeds/ml/classical/classification/test_gradient_boosting.py b/tests/safeds/ml/classical/classification/test_gradient_boosting.py
index d0982b42f..2413a5b7b 100644
--- a/tests/safeds/ml/classical/classification/test_gradient_boosting.py
+++ b/tests/safeds/ml/classical/classification/test_gradient_boosting.py
@@ -20,5 +20,20 @@ def test_should_be_passed_to_sklearn(self, training_set: TaggedTable) -> None:
         assert fitted_model._wrapped_classifier.learning_rate == 2
 
     def test_should_raise_if_less_than_or_equal_to_0(self) -> None:
-        with pytest.raises(ValueError, match="has to be greater than 0"):
+        with pytest.raises(ValueError, match="The learning rate has to be greater than 0."):
             GradientBoosting(learning_rate=-1)
+
+
+class TestNumberOfTrees:
+    def test_should_be_passed_to_fitted_model(self, training_set: TaggedTable) -> None:
+        fitted_model = GradientBoosting(number_of_trees=2).fit(training_set)
+        assert fitted_model._number_of_trees == 2
+
+    def test_should_be_passed_to_sklearn(self, training_set: TaggedTable) -> None:
+        fitted_model = GradientBoosting(number_of_trees=2).fit(training_set)
+        assert fitted_model._wrapped_classifier is not None
+        assert fitted_model._wrapped_classifier.n_estimators == 2
+
+    def test_should_raise_if_less_than_1(self) -> None:
+        with pytest.raises(ValueError, match="The number of boosting stages to perform has to be greater than 0."):
+            GradientBoosting(number_of_trees=0)
diff --git a/tests/safeds/ml/classical/regression/test_gradient_boosting.py b/tests/safeds/ml/classical/regression/test_gradient_boosting.py
index b12970082..b35fb0aa5 100644
--- a/tests/safeds/ml/classical/regression/test_gradient_boosting.py
+++ b/tests/safeds/ml/classical/regression/test_gradient_boosting.py
@@ -20,5 +20,20 @@ def test_should_be_passed_to_sklearn(self, training_set: TaggedTable) -> None:
         assert fitted_model._wrapped_regressor.learning_rate == 2
 
     def test_should_raise_if_less_than_or_equal_to_0(self) -> None:
-        with pytest.raises(ValueError, match="has to be greater than 0"):
+        with pytest.raises(ValueError, match="The learning rate has to be greater than 0."):
             GradientBoosting(learning_rate=-1)
+
+
+class TestNumberOfTrees:
+    def test_should_be_passed_to_fitted_model(self, training_set: TaggedTable) -> None:
+        fitted_model = GradientBoosting(number_of_trees=2).fit(training_set)
+        assert fitted_model._number_of_trees == 2
+
+    def test_should_be_passed_to_sklearn(self, training_set: TaggedTable) -> None:
+        fitted_model = GradientBoosting(number_of_trees=2).fit(training_set)
+        assert fitted_model._wrapped_regressor is not None
+        assert fitted_model._wrapped_regressor.n_estimators == 2
+
+    def test_should_raise_if_less_than_1(self) -> None:
+        with pytest.raises(ValueError, match="The number of boosting stages to perform has to be greater than 0."):
+            GradientBoosting(number_of_trees=0)