Skip to content

Commit

Permalink
fix: make multi-processing in baseline models more consistent (#909)
Browse files Browse the repository at this point in the history
Closes #907 

Set context of ProcessPoolExecutors to spawn.

---------

Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
Co-authored-by: Lars Reimann <mail@larsreimann.com>
  • Loading branch information
3 people committed Jul 12, 2024
1 parent d3f81dc commit fa24560
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 10 deletions.
18 changes: 14 additions & 4 deletions src/safeds/ml/classical/classification/_baseline_classifier.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import copy
import multiprocessing as mp
from concurrent.futures import ALL_COMPLETED, wait
from typing import Self

Expand Down Expand Up @@ -34,8 +35,11 @@ class BaselineClassifier:
Get a baseline by fitting data on multiple different models and comparing the best metrics.
Parameters ---------- extended_search: If set to true, an extended set of models will be used to fit the
classifier. This might result in significantly higher runtime.
Parameters
----------
extended_search:
If set to true, an extended set of models will be used to fit the classifier.
This might result in significantly higher runtime.
"""

def __init__(self, extended_search: bool = False):
Expand Down Expand Up @@ -86,7 +90,10 @@ def fit(self, train_data: TabularDataset) -> Self:

copied_model = copy.deepcopy(self)

with ProcessPoolExecutor(max_workers=len(self._list_of_model_types)) as executor:
with ProcessPoolExecutor(
max_workers=len(self._list_of_model_types),
mp_context=mp.get_context("spawn"),
) as executor:
futures = []
for model in self._list_of_model_types:
futures.append(executor.submit(_fit_single_model, model, train_data))
Expand Down Expand Up @@ -149,7 +156,10 @@ def predict(self, test_data: TabularDataset) -> dict[str, float]:
raise DatasetMissesDataError
_check_columns_are_numeric(test_data_as_table, test_data.features.add_columns(test_data.target).column_names)

with ProcessPoolExecutor(max_workers=len(self._list_of_model_types)) as executor:
with ProcessPoolExecutor(
max_workers=len(self._list_of_model_types),
mp_context=mp.get_context("spawn"),
) as executor:
results = []
futures = []
for model in self._fitted_models:
Expand Down
22 changes: 16 additions & 6 deletions src/safeds/ml/classical/regression/_baseline_regressor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import copy
import multiprocessing as mp
from concurrent.futures import ALL_COMPLETED, wait
from typing import Self

Expand Down Expand Up @@ -38,11 +39,14 @@ class BaselineRegressor:
Get a baseline by fitting data on multiple different models and comparing the best metrics.
Parameters ---------- extended_search: If set to true, an extended set of models will be used to fit the
classifier. This might result in significantly higher runtime.
Parameters
----------
extended_search:
If set to true, an extended set of models will be used to fit the classifier.
This might result in significantly higher runtime.
"""

def __init__(self, include_slower_models: bool = False):
def __init__(self, extended_search: bool = False):
self._is_fitted = False
self._list_of_model_types = [
AdaBoostRegressor(),
Expand All @@ -53,7 +57,7 @@ def __init__(self, include_slower_models: bool = False):
SupportVectorRegressor(),
]

if include_slower_models:
if extended_search:
self._list_of_model_types.extend(
[ElasticNetRegressor(), LassoRegressor(), GradientBoostingRegressor()],
) # pragma: no cover
Expand Down Expand Up @@ -95,7 +99,10 @@ def fit(self, train_data: TabularDataset) -> Self:

copied_model = copy.deepcopy(self)

with ProcessPoolExecutor(max_workers=len(self._list_of_model_types)) as executor:
with ProcessPoolExecutor(
max_workers=len(self._list_of_model_types),
mp_context=mp.get_context("spawn"),
) as executor:
futures = []
for model in self._list_of_model_types:
futures.append(executor.submit(_fit_single_model, model, train_data))
Expand Down Expand Up @@ -159,7 +166,10 @@ def predict(self, test_data: TabularDataset) -> dict[str, float]:
_check_columns_are_numeric(test_data_as_table, test_data.features.add_columns(test_data.target).column_names)

# Start Processes
with ProcessPoolExecutor(max_workers=len(self._list_of_model_types)) as executor:
with ProcessPoolExecutor(
max_workers=len(self._list_of_model_types),
mp_context=mp.get_context("spawn"),
) as executor:
results = []
futures = []
for model in self._fitted_models:
Expand Down

0 comments on commit fa24560

Please sign in to comment.