Safe-DS · lars-reimann · Jul 12, 2024 · Jul 12, 2024 · Jul 12, 2024 · Jul 12, 2024
@@ -1,4 +1,5 @@
 import copy
+import multiprocessing as mp
 from concurrent.futures import ALL_COMPLETED, wait
 from typing import Self
 
@@ -34,8 +35,11 @@ class BaselineClassifier:
 
     Get a baseline by fitting data on multiple different models and comparing the best metrics.
 
-    Parameters ---------- extended_search: If set to true, an extended set of models will be used to fit the
-    classifier. This might result in significantly higher runtime.
+    Parameters
+    ----------
+    extended_search:
+        If set to true, an extended set of models will be used to fit the classifier.
+        This might result in significantly higher runtime.
     """
 
     def __init__(self, extended_search: bool = False):
@@ -86,7 +90,10 @@ def fit(self, train_data: TabularDataset) -> Self:
 
         copied_model = copy.deepcopy(self)
 
-        with ProcessPoolExecutor(max_workers=len(self._list_of_model_types)) as executor:
+        with ProcessPoolExecutor(
+            max_workers=len(self._list_of_model_types),
+            mp_context=mp.get_context("spawn"),
+        ) as executor:
             futures = []
             for model in self._list_of_model_types:
                 futures.append(executor.submit(_fit_single_model, model, train_data))
@@ -149,7 +156,10 @@ def predict(self, test_data: TabularDataset) -> dict[str, float]:
             raise DatasetMissesDataError
         _check_columns_are_numeric(test_data_as_table, test_data.features.add_columns(test_data.target).column_names)
 
-        with ProcessPoolExecutor(max_workers=len(self._list_of_model_types)) as executor:
+        with ProcessPoolExecutor(
+            max_workers=len(self._list_of_model_types),
+            mp_context=mp.get_context("spawn"),
+        ) as executor:
             results = []
             futures = []
             for model in self._fitted_models:

@@ -1,4 +1,5 @@
 import copy
+import multiprocessing as mp
 from concurrent.futures import ALL_COMPLETED, wait
 from typing import Self
 
@@ -38,11 +39,14 @@ class BaselineRegressor:
 
     Get a baseline by fitting data on multiple different models and comparing the best metrics.
 
-    Parameters ---------- extended_search: If set to true, an extended set of models will be used to fit the
-    classifier. This might result in significantly higher runtime.
+    Parameters
+    ----------
+    extended_search:
+        If set to true, an extended set of models will be used to fit the classifier.
+        This might result in significantly higher runtime.
     """
 
-    def __init__(self, include_slower_models: bool = False):
+    def __init__(self, extended_search: bool = False):
         self._is_fitted = False
         self._list_of_model_types = [
             AdaBoostRegressor(),
@@ -53,7 +57,7 @@ def __init__(self, include_slower_models: bool = False):
             SupportVectorRegressor(),
         ]
 
-        if include_slower_models:
+        if extended_search:
             self._list_of_model_types.extend(
                 [ElasticNetRegressor(), LassoRegressor(), GradientBoostingRegressor()],
             )  # pragma: no cover
@@ -95,7 +99,10 @@ def fit(self, train_data: TabularDataset) -> Self:
 
         copied_model = copy.deepcopy(self)
 
-        with ProcessPoolExecutor(max_workers=len(self._list_of_model_types)) as executor:
+        with ProcessPoolExecutor(
+            max_workers=len(self._list_of_model_types),
+            mp_context=mp.get_context("spawn"),
+        ) as executor:
             futures = []
             for model in self._list_of_model_types:
                 futures.append(executor.submit(_fit_single_model, model, train_data))
@@ -159,7 +166,10 @@ def predict(self, test_data: TabularDataset) -> dict[str, float]:
         _check_columns_are_numeric(test_data_as_table, test_data.features.add_columns(test_data.target).column_names)
 
         # Start Processes
-        with ProcessPoolExecutor(max_workers=len(self._list_of_model_types)) as executor:
+        with ProcessPoolExecutor(
+            max_workers=len(self._list_of_model_types),
+            mp_context=mp.get_context("spawn"),
+        ) as executor:
             results = []
             futures = []
             for model in self._fitted_models: