From 71e7a8bde8f6bc06241548bd5d7045d152abd9ef Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sun, 11 Jun 2023 11:59:43 +0200 Subject: [PATCH 01/39] Add method to split off a subsample of the test set to file --- amlb/datasets/openml.py | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py index d037252d8..452ce9aa5 100644 --- a/amlb/datasets/openml.py +++ b/amlb/datasets/openml.py @@ -2,13 +2,14 @@ **openml** module implements the abstractions defined in **data** module to expose `OpenML`_ datasets. """ +import pathlib from abc import abstractmethod import copy import functools import logging import os import re -from typing import Generic, Tuple, TypeVar, Union +from typing import Generic, Tuple, TypeVar import arff import pandas.api.types as pat @@ -16,7 +17,7 @@ import xmltodict from ..data import AM, DF, Dataset, DatasetType, Datasplit, Feature -from ..resources import config as rconfig +from ..resources import config as rconfig, get as rget from ..utils import as_list, lazy_property, path_from_split, profile, split_path, unsparsify @@ -92,6 +93,39 @@ def test(self): self._ensure_split_created() return self._test + @profile(logger=log) + def _inference_subsample(self, fmt: str, n: int) -> pathlib.Path: + """ Write subset of `n` samples from the test split to disk in `fmt` format """ + # Just a hack for now, the splitters all work specifically with openml tasks. + # The important thing is that we split to disk and can load it later. + if fmt not in ["csv", "arff", "parquet"]: + msg = f"{fmt=}, but must be one of 'csv', 'arff', or 'parquet'." + raise ValueError(msg) + + # We should consider taking a stratified sample if n is large enough, + # inference time might differ based on class + subsample = self._test.X.sample( + n=n, + replace=True, + random_state=rget().seed(self.fold) + ) + + _, test_path = self._get_split_paths() + test_path = pathlib.Path(test_path) + subsample_path = test_path.parent / f"{test_path.stem}_{n}.{fmt}" + if fmt == "csv": + subsample.to_csv(subsample_path, header=True, index=False) + elif fmt == "arff": + ArffSplitter(self)._save_split( + subsample, + subsample_path, + name=f"{self._oml_dataset.name}_inference_{self.fold}_{n}" + ) + elif fmt == "parquet": + subsample.to_parquet(subsample_path) + + return subsample_path + @lazy_property @profile(logger=log) def features(self): From 87f80aabf26d656e329ba1177cfe768ad3211b77 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Mon, 12 Jun 2023 14:11:41 +0300 Subject: [PATCH 02/39] Add first draft for improving inference time measurements --- amlb/datasets/openml.py | 6 ++++++ amlb/results.py | 6 +++++- frameworks/AutoGluon/__init__.py | 3 ++- frameworks/AutoGluon/exec.py | 25 +++++++++++++++++-------- frameworks/TPOT/__init__.py | 3 ++- frameworks/TPOT/exec.py | 21 +++++++++++++++++++-- frameworks/constantpredictor/exec.py | 11 ++++++++++- frameworks/shared/callee.py | 14 +++++++++++++- 8 files changed, 74 insertions(+), 15 deletions(-) diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py index 452ce9aa5..445cf46c0 100644 --- a/amlb/datasets/openml.py +++ b/amlb/datasets/openml.py @@ -93,6 +93,12 @@ def test(self): self._ensure_split_created() return self._test + def inference_subsample_files(self, fmt: str) -> list[Tuple[int, str]]: + return [ + (n, str(self._inference_subsample(fmt=fmt, n=n))) + for n in [1, 1000, 10_000] + ] + @profile(logger=log) def _inference_subsample(self, fmt: str, n: int) -> pathlib.Path: """ Write subset of `n` samples from the test split to disk in `fmt` format """ diff --git a/amlb/results.py b/amlb/results.py index 052460fbd..48bd6e447 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -444,6 +444,10 @@ def compute_score(self, result=None, meta_result=None): required_meta_res = ['training_duration', 'predict_duration', 'models_count'] for m in required_meta_res: entry[m] = meta_result[m] if m in meta_result else nan + + if inference_times := Namespace.get(meta_result, "inference_times"): + for n_samples, measured_times in Namespace.dict(inference_times).items(): + entry[f"inference_{n_samples}_rows"] = np.mean(measured_times) result = self.get_result() if result is None else result scoring_errors = [] @@ -473,7 +477,7 @@ def set_score(score): entry.info = result.info if scoring_errors: entry.info = "; ".join(filter(lambda it: it, [entry.info, *scoring_errors])) - entry |= Namespace({k: v for k, v in meta_result if k not in required_meta_res}) + entry |= Namespace({k: v for k, v in meta_result if k not in required_meta_res and k != "inference_times"}) log.info("Metric scores: %s", entry) return entry diff --git a/frameworks/AutoGluon/__init__.py b/frameworks/AutoGluon/__init__.py index c8694148c..4c92d08f1 100644 --- a/frameworks/AutoGluon/__init__.py +++ b/frameworks/AutoGluon/__init__.py @@ -25,7 +25,8 @@ def run_autogluon_tabular(dataset: Dataset, config: TaskConfig): name=dataset.target.name, classes=dataset.target.values ), - problem_type=dataset.type.name # AutoGluon problem_type is using same names as amlb.data.DatasetType + problem_type=dataset.type.name, # AutoGluon problem_type is using same names as amlb.data.DatasetType + inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"), ) return run_in_venv(__file__, "exec.py", diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py index 6fe76769b..2a50ecf37 100644 --- a/frameworks/AutoGluon/exec.py +++ b/frameworks/AutoGluon/exec.py @@ -18,7 +18,8 @@ import autogluon.core.metrics as metrics from autogluon.tabular.version import __version__ -from frameworks.shared.callee import call_run, result, output_subdir +from frameworks.shared.callee import call_run, result, output_subdir, \ + measure_inference_times from frameworks.shared.utils import Timer, zip_path log = logging.getLogger(__name__) @@ -68,14 +69,21 @@ def run(dataset, config): # Persist model in memory that is going to be predicting to get correct inference latency predictor.persist_models('best') + def inference_time_classification(path: str): + data = TabularDataset(path) + return None, predictor.predict_proba(data, as_multiclass=True) + + def inference_time_regression(path: str): + data = TabularDataset(path) + return predictor.predict(data, as_pandas=False), None + + infer = inference_time_classification if is_classification else inference_time_regression + with Timer() as predict: + predictions, probabilities = infer(test_data) if is_classification: - with Timer() as predict: - probabilities = predictor.predict_proba(test_data, as_multiclass=True) predictions = probabilities.idxmax(axis=1).to_numpy() - else: - with Timer() as predict: - predictions = predictor.predict(test_data, as_pandas=False) - probabilities = None + + inference_times = measure_inference_times(infer, dataset.inference_subsample_files) prob_labels = probabilities.columns.values.astype(str).tolist() if probabilities is not None else None @@ -107,7 +115,8 @@ def run(dataset, config): models_count=num_models_trained, models_ensemble_count=num_models_ensemble, training_duration=training.duration, - predict_duration=predict.duration) + predict_duration=predict.duration, + inference_times=inference_times,) def save_artifacts(predictor, leaderboard, config): diff --git a/frameworks/TPOT/__init__.py b/frameworks/TPOT/__init__.py index 9828c6473..1aa3192ea 100644 --- a/frameworks/TPOT/__init__.py +++ b/frameworks/TPOT/__init__.py @@ -21,7 +21,8 @@ def run(dataset: Dataset, config: TaskConfig): test=dict( X=X_test, y=y_test - ) + ), + inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"), ) def process_results(results): diff --git a/frameworks/TPOT/exec.py b/frameworks/TPOT/exec.py index ce70cb7f9..a7e0858ed 100644 --- a/frameworks/TPOT/exec.py +++ b/frameworks/TPOT/exec.py @@ -4,6 +4,8 @@ import sys import tempfile as tmp +import pandas as pd + if sys.platform == 'darwin': os.environ['OBJC_DISABLE_INITIALIZE_FORK_SAFETY'] = 'YES' os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir() @@ -13,7 +15,8 @@ from tpot import TPOTClassifier, TPOTRegressor, __version__ -from frameworks.shared.callee import call_run, output_subdir, result +from frameworks.shared.callee import call_run, output_subdir, result, \ + measure_inference_times from frameworks.shared.utils import Timer, is_sparse @@ -67,6 +70,18 @@ def run(dataset, config): y_test = dataset.test.y with Timer() as predict: predictions = tpot.predict(X_test) + + def infer(path): + data = pd.read_parquet(path) + if is_classification: + try: + return tpot.predict_proba(data) + except RuntimeError: + return tpot.predict(data) + return tpot.predict(data) + + inference_times = measure_inference_times(infer, dataset.inference_subsample_files) + try: probabilities = tpot.predict_proba(X_test) if is_classification else None except RuntimeError: @@ -82,7 +97,9 @@ def run(dataset, config): target_is_encoded=is_classification, models_count=len(tpot.evaluated_individuals_), training_duration=training.duration, - predict_duration=predict.duration) + predict_duration=predict.duration, + inference_times=inference_times, + ) def save_artifacts(estimator, config): diff --git a/frameworks/constantpredictor/exec.py b/frameworks/constantpredictor/exec.py index 6a7aae69c..4a2c7cf9d 100644 --- a/frameworks/constantpredictor/exec.py +++ b/frameworks/constantpredictor/exec.py @@ -1,11 +1,13 @@ import logging +import pandas as pd from sklearn.dummy import DummyClassifier, DummyRegressor from amlb.benchmark import TaskConfig from amlb.data import Dataset from amlb.results import save_predictions from amlb.utils import Timer, unsparsify +from frameworks.shared.callee import measure_inference_times log = logging.getLogger(__name__) @@ -29,6 +31,12 @@ def run(dataset: Dataset, config: TaskConfig): predictions = predictor.predict(X_test) probabilities = predictor.predict_proba(X_test) if is_classification else None + def infer(path): + data = pd.read_parquet(path) + return predictor.predict(data) + + inference_times = measure_inference_times(infer, dataset.inference_subsample_files(fmt="parquet")) + save_predictions(dataset=dataset, output_file=config.output_predictions_file, probabilities=probabilities, @@ -39,5 +47,6 @@ def run(dataset: Dataset, config: TaskConfig): return dict( models_count=1, training_duration=training.duration, - predict_duration=predict.duration + predict_duration=predict.duration, + inference_times=inference_times, ) diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py index c596e01c5..3c13d144a 100644 --- a/frameworks/shared/callee.py +++ b/frameworks/shared/callee.py @@ -1,11 +1,14 @@ import logging import os +import pathlib import re import signal import sys +from collections import defaultdict +from typing import Callable, Any, Tuple from .utils import InterruptTimeout, Namespace as ns, json_dump, json_loads, kill_proc_tree, touch -from .utils import deserialize_data, serialize_data +from .utils import deserialize_data, serialize_data, Timer log = logging.getLogger(__name__) @@ -86,3 +89,12 @@ def load_data(name, path, **_): kill_proc_tree(include_parent=False, timeout=5) json_dump(res, config.result_file, style='compact') + +def measure_inference_times(predict_fn: Callable[[str], Any], files: list[Tuple[int, str]]) -> dict[int, list[float]]: + inference_times = defaultdict(list) + for subsample_size, subsample_path in files: + for _ in range(10): + with Timer() as predict: + predict_fn(subsample_path) + inference_times[subsample_size].append(predict.duration) + return inference_times \ No newline at end of file From 94ba496420128cfc8dc595244735fb2498ecfab9 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 10:43:45 +0300 Subject: [PATCH 03/39] Store all measured inference times to disk --- frameworks/shared/callee.py | 5 +++++ frameworks/shared/caller.py | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py index 3c13d144a..ca807024c 100644 --- a/frameworks/shared/callee.py +++ b/frameworks/shared/callee.py @@ -88,6 +88,11 @@ def load_data(name, path, **_): # ensure there's no subprocess left kill_proc_tree(include_parent=False, timeout=5) + inference_measurements = res.get("others", {}).get("inference_times") + if inference_measurements: + inference_file = pathlib.Path(config.result_file).parent / "inference_times.json" + json_dump(inference_measurements, inference_file, style="compact") + res["others"]["inference_times"] = str(inference_file) json_dump(res, config.result_file, style='compact') def measure_inference_times(predict_fn: Callable[[str], Any], files: list[Tuple[int, str]]) -> dict[int, list[float]]: diff --git a/frameworks/shared/caller.py b/frameworks/shared/caller.py index c833b994b..8422b98d4 100644 --- a/frameworks/shared/caller.py +++ b/frameworks/shared/caller.py @@ -1,6 +1,7 @@ import gc import logging import os +import pathlib import re from tempfile import TemporaryDirectory, mktemp from typing import List, Optional, Union @@ -11,6 +12,7 @@ from amlb.data import Dataset from amlb.resources import config as rconfig from amlb.results import NoResultError, save_predictions +from amlb.utils import json_dump, Namespace from .utils import Namespace as ns, Timer, dir_of, run_cmd, json_dumps, json_load, profile from .utils import is_serializable_data, deserialize_data, serialize_data @@ -152,6 +154,13 @@ def run_in_venv(caller_file, script_file: str, *args, for name in ['predictions', 'truth', 'probabilities', 'optional_columns']: res[name] = deserialize_data(res[name], config=ser_config) if res[name] is not None else None + inference_filepath = Namespace.dict(res.others).get("inference_times") + if inference_filepath: + inference_times = json_load(inference_filepath) + inference_filepath = pathlib.Path(res.output_file).parent / "inference.json" + json_dump(inference_times, inference_filepath) + res["others"]["inference_times"] = inference_times + if callable(process_results): res = process_results(res) From 0c7bf7d91938006028e708291f8502eaa6c5ee9d Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 10:46:42 +0300 Subject: [PATCH 04/39] Also accept a dataframe to allow to infer without disk load --- frameworks/AutoGluon/exec.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py index 2a50ecf37..53f8a42f0 100644 --- a/frameworks/AutoGluon/exec.py +++ b/frameworks/AutoGluon/exec.py @@ -4,6 +4,8 @@ import warnings import sys import tempfile +from typing import Union + warnings.simplefilter("ignore") if sys.platform == 'darwin': @@ -65,26 +67,24 @@ def run(dataset, config): **training_params ) - test_data = TabularDataset(test_path) # Persist model in memory that is going to be predicting to get correct inference latency predictor.persist_models('best') - def inference_time_classification(path: str): - data = TabularDataset(path) + def inference_time_classification(data: Union[str, pd.DataFrame]): return None, predictor.predict_proba(data, as_multiclass=True) - def inference_time_regression(path: str): - data = TabularDataset(path) + def inference_time_regression(data: Union[str, pd.DataFrame]): return predictor.predict(data, as_pandas=False), None infer = inference_time_classification if is_classification else inference_time_regression + inference_times = measure_inference_times(infer, dataset.inference_subsample_files) + + test_data = TabularDataset(test_path) with Timer() as predict: predictions, probabilities = infer(test_data) if is_classification: predictions = probabilities.idxmax(axis=1).to_numpy() - inference_times = measure_inference_times(infer, dataset.inference_subsample_files) - prob_labels = probabilities.columns.values.astype(str).tolist() if probabilities is not None else None _leaderboard_extra_info = config.framework_params.get('_leaderboard_extra_info', False) # whether to get extra model info (very verbose) From df46e1b63cee2d8ca3f9ef88081e06940391f2e5 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 12:20:43 +0300 Subject: [PATCH 05/39] Make repeats and batch sizes configurable --- amlb/datasets/openml.py | 3 ++- frameworks/shared/callee.py | 7 +++---- resources/config.yaml | 5 +++++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py index 445cf46c0..bccbd6318 100644 --- a/amlb/datasets/openml.py +++ b/amlb/datasets/openml.py @@ -96,7 +96,8 @@ def test(self): def inference_subsample_files(self, fmt: str) -> list[Tuple[int, str]]: return [ (n, str(self._inference_subsample(fmt=fmt, n=n))) - for n in [1, 1000, 10_000] + for n in rconfig().inference_time_measurements.batch_sizes + for _ in range(rconfig().inference_time_measurements.repeats) ] @profile(logger=log) diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py index ca807024c..a77be3e18 100644 --- a/frameworks/shared/callee.py +++ b/frameworks/shared/callee.py @@ -98,8 +98,7 @@ def load_data(name, path, **_): def measure_inference_times(predict_fn: Callable[[str], Any], files: list[Tuple[int, str]]) -> dict[int, list[float]]: inference_times = defaultdict(list) for subsample_size, subsample_path in files: - for _ in range(10): - with Timer() as predict: - predict_fn(subsample_path) - inference_times[subsample_size].append(predict.duration) + with Timer() as predict: + predict_fn(subsample_path) + inference_times[subsample_size].append(predict.duration) return inference_times \ No newline at end of file diff --git a/resources/config.yaml b/resources/config.yaml index 91ff68642..73bc81d64 100644 --- a/resources/config.yaml +++ b/resources/config.yaml @@ -84,6 +84,11 @@ results: # configuration namespace for the results.csv file. global_lock_timeout: 5 # the timeout used to wait for the lock on the global results file. incremental_save: true # if true save results after each job., otherwise save results only when all jobs are completed. +inference_time_measurements: # configuration namespace for performing additional inference time measurements on various batch sizes + enabled: true + batch_sizes: [1, 10, 100, 1000, 10000] # the batch sizes for which inference speed should be measured + repeats: 100 # the number of times to repeat the inference measurement for each batch size + openml: # configuration namespace for openML. apikey: c1994bdb7ecb3c6f3c8f3b35f4b47f1f infer_dtypes: False From 0621ee24ef6668910b196abef2ba75adb6e3f2b5 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 14:52:07 +0300 Subject: [PATCH 06/39] Forward inference measurement configuration through task config --- amlb/benchmark.py | 4 +++- frameworks/AutoGluon/exec.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/amlb/benchmark.py b/amlb/benchmark.py index d2c87ce6a..dee7c5c14 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -381,7 +381,7 @@ class TaskConfig: def __init__(self, name, fold, metrics, seed, max_runtime_seconds, cores, max_mem_size_mb, min_vol_size_mb, - input_dir, output_dir): + input_dir, output_dir, measure_inference_time: bool = False): self.framework = None self.framework_params = None self.framework_version = None @@ -397,6 +397,7 @@ def __init__(self, name, fold, metrics, seed, self.input_dir = input_dir self.output_dir = output_dir self.output_predictions_file = os.path.join(output_dir, "predictions.csv") + self.measure_inference_time = measure_inference_time self.ext = ns() # used if frameworks require extra config points def __setattr__(self, name, value): @@ -477,6 +478,7 @@ def __init__(self, benchmark: Benchmark, task_def, fold): min_vol_size_mb=task_def.min_vol_size_mb, input_dir=rconfig().input_dir, output_dir=benchmark.output_dirs.session, + measure_inference_time=rconfig().inference_time_measurements.enabled, ) # allowing to override some task parameters through command line, e.g.: -Xt.max_runtime_seconds=60 if rconfig()['t'] is not None: diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py index 53f8a42f0..79c4d37d6 100644 --- a/frameworks/AutoGluon/exec.py +++ b/frameworks/AutoGluon/exec.py @@ -77,7 +77,9 @@ def inference_time_regression(data: Union[str, pd.DataFrame]): return predictor.predict(data, as_pandas=False), None infer = inference_time_classification if is_classification else inference_time_regression - inference_times = measure_inference_times(infer, dataset.inference_subsample_files) + inference_times = None + if config.measure_inference_time: + inference_times = measure_inference_times(infer, dataset.inference_subsample_files) test_data = TabularDataset(test_path) with Timer() as predict: From 1a72ae16531a8d56f59ab71df693dfdf2901971e Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 15:08:54 +0300 Subject: [PATCH 07/39] Randomize samples within the same batch size --- amlb/datasets/openml.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py index bccbd6318..e926b8e01 100644 --- a/amlb/datasets/openml.py +++ b/amlb/datasets/openml.py @@ -94,14 +94,15 @@ def test(self): return self._test def inference_subsample_files(self, fmt: str) -> list[Tuple[int, str]]: + seed = rget().seed(self.fold) return [ - (n, str(self._inference_subsample(fmt=fmt, n=n))) + (n, str(self._inference_subsample(fmt=fmt, n=n, seed=seed + i))) for n in rconfig().inference_time_measurements.batch_sizes - for _ in range(rconfig().inference_time_measurements.repeats) + for i, _ in enumerate(range(rconfig().inference_time_measurements.repeats)) ] @profile(logger=log) - def _inference_subsample(self, fmt: str, n: int) -> pathlib.Path: + def _inference_subsample(self, fmt: str, n: int, seed: int = 0) -> pathlib.Path: """ Write subset of `n` samples from the test split to disk in `fmt` format """ # Just a hack for now, the splitters all work specifically with openml tasks. # The important thing is that we split to disk and can load it later. @@ -114,7 +115,7 @@ def _inference_subsample(self, fmt: str, n: int) -> pathlib.Path: subsample = self._test.X.sample( n=n, replace=True, - random_state=rget().seed(self.fold) + random_state=seed, ) _, test_path = self._get_split_paths() From 7100391b7df7e4815775f8bca099d12bdc6d3102 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 15:17:04 +0300 Subject: [PATCH 08/39] Rename inference_X_rows column to infer_batch_size_X --- amlb/results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amlb/results.py b/amlb/results.py index 48bd6e447..3d6cffd8c 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -447,7 +447,7 @@ def compute_score(self, result=None, meta_result=None): if inference_times := Namespace.get(meta_result, "inference_times"): for n_samples, measured_times in Namespace.dict(inference_times).items(): - entry[f"inference_{n_samples}_rows"] = np.mean(measured_times) + entry[f"infer_batch_size_{n_samples}"] = np.mean(measured_times) result = self.get_result() if result is None else result scoring_errors = [] From f76e75550a9a67fc9c57f8167346ae930b379a96 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 15:23:55 +0300 Subject: [PATCH 09/39] Add docstring and move value checking of `fmt` Moving the value check makes it less error prone if there are changes in accepted values. --- amlb/datasets/openml.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py index e926b8e01..1f8adde43 100644 --- a/amlb/datasets/openml.py +++ b/amlb/datasets/openml.py @@ -94,6 +94,14 @@ def test(self): return self._test def inference_subsample_files(self, fmt: str) -> list[Tuple[int, str]]: + """Generates n subsamples of size k from the test dataset in `fmt` data format. + + We measure the inference time of the models for various batch sizes + (number of rows). We generate config.inference_time_measurements.repeats + subsamples for each of the config.inference_time_measurements.batch_sizes. + These subsamples are stored to file in the `fmt` format (parquet, arff, or csv). + The function returns a list of tuples of (batch size, file path). + """ seed = rget().seed(self.fold) return [ (n, str(self._inference_subsample(fmt=fmt, n=n, seed=seed + i))) @@ -106,9 +114,6 @@ def _inference_subsample(self, fmt: str, n: int, seed: int = 0) -> pathlib.Path: """ Write subset of `n` samples from the test split to disk in `fmt` format """ # Just a hack for now, the splitters all work specifically with openml tasks. # The important thing is that we split to disk and can load it later. - if fmt not in ["csv", "arff", "parquet"]: - msg = f"{fmt=}, but must be one of 'csv', 'arff', or 'parquet'." - raise ValueError(msg) # We should consider taking a stratified sample if n is large enough, # inference time might differ based on class @@ -131,6 +136,9 @@ def _inference_subsample(self, fmt: str, n: int, seed: int = 0) -> pathlib.Path: ) elif fmt == "parquet": subsample.to_parquet(subsample_path) + else: + msg = f"{fmt=}, but must be one of 'csv', 'arff', or 'parquet'." + raise ValueError(msg) return subsample_path From bf8cbc750961a526e6968406c487e848e9b99bc0 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 18:02:11 +0300 Subject: [PATCH 10/39] Add inference time measurements for flaml --- frameworks/flaml/__init__.py | 3 ++- frameworks/flaml/exec.py | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/frameworks/flaml/__init__.py b/frameworks/flaml/__init__.py index c911edf3b..bca1b6893 100644 --- a/frameworks/flaml/__init__.py +++ b/frameworks/flaml/__init__.py @@ -17,7 +17,8 @@ def run(dataset, config): X=dataset.test.X, y=dataset.test.y ), - problem_type=dataset.type.name + problem_type=dataset.type.name, + inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"), ) options = dict( serialization=dict(sparse_dataframe_deserialized_format='dense') diff --git a/frameworks/flaml/exec.py b/frameworks/flaml/exec.py index a8a5131af..d0acebde1 100644 --- a/frameworks/flaml/exec.py +++ b/frameworks/flaml/exec.py @@ -1,9 +1,11 @@ import logging import os +import pandas as pd from flaml import AutoML, __version__ -from frameworks.shared.callee import call_run, result, output_subdir +from frameworks.shared.callee import call_run, result, output_subdir, \ + measure_inference_times from frameworks.shared.utils import Timer log = logging.getLogger(__name__) @@ -49,7 +51,16 @@ def run(dataset, config): n_jobs=n_jobs, log_file_name= flaml_log_file_name, time_budget=time_budget, **training_params) - + + def infer(path: str): + data = pd.read_parquet(path) + predict_fn = aml.predict_proba if is_classification else aml.predict + return predict_fn(data) + + inference_times = None + if config.measure_inference_time: + inference_times = measure_inference_times(infer, dataset.inference_subsample_files) + with Timer() as predict: predictions = aml.predict(X_test) probabilities = aml.predict_proba(X_test) if is_classification else None @@ -65,6 +76,7 @@ def run(dataset, config): training_duration=training.duration, predict_duration=predict.duration, probabilities_labels=labels, + inference_times=inference_times, ) From cb04989cbc638ecbb75dc00fde4b35f943e9f87f Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 21:53:58 +0300 Subject: [PATCH 11/39] Add inference time measurements --- frameworks/GAMA/__init__.py | 1 + frameworks/GAMA/exec.py | 21 +++++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/frameworks/GAMA/__init__.py b/frameworks/GAMA/__init__.py index f660e2f8f..5476600cb 100644 --- a/frameworks/GAMA/__init__.py +++ b/frameworks/GAMA/__init__.py @@ -22,6 +22,7 @@ def run(dataset: Dataset, config: TaskConfig): X=dataset.test.X, y=dataset.test.y ), + inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"), ) options = dict( serialization=dict(sparse_dataframe_deserialized_format='dense') diff --git a/frameworks/GAMA/exec.py b/frameworks/GAMA/exec.py index e0880bf34..54498c8ac 100644 --- a/frameworks/GAMA/exec.py +++ b/frameworks/GAMA/exec.py @@ -3,6 +3,8 @@ import sys import tempfile as tmp +import pandas as pd + if sys.platform == 'darwin': os.environ['OBJC_DISABLE_INITIALIZE_FORK_SAFETY'] = 'YES' os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir() @@ -18,7 +20,8 @@ from gama.data_loading import file_to_pandas from gama import GamaClassifier, GamaRegressor, __version__ -from frameworks.shared.callee import call_run, result, output_subdir +from frameworks.shared.callee import call_run, result, output_subdir, \ + measure_inference_times from frameworks.shared.utils import Timer, touch @@ -83,12 +86,21 @@ def run(dataset, config): # data = file_to_pandas(dataset.test.path, encoding='utf-8') # X_test, y_test = data.loc[:, data.columns != dataset.target], data.loc[:, dataset.target] + def infer(path: str): + test_data = pd.read_parquet(path) + predict_fn = gama_automl.predict_proba if is_classification else gama_automl.predict + return predict_fn(test_data) + + inference_times = None + if config.measure_inference_time: + inference_times = measure_inference_times(infer, dataset.inference_subsample_files) + with Timer() as predict_timer: predictions = gama_automl.predict(X_test) + + probabilities = None if is_classification: probabilities = gama_automl.predict_proba(X_test) - else: - probabilities = None return result( output_file=config.output_predictions_file, @@ -98,7 +110,8 @@ def run(dataset, config): target_is_encoded=False, models_count=len(gama_automl._final_pop), training_duration=training_timer.duration, - predict_duration=predict_timer.duration + predict_duration=predict_timer.duration, + inference_times=inference_times, ) From 8ea90338c214b19d5d5c4d39b201c87c9034824b Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 22:07:19 +0300 Subject: [PATCH 12/39] Add inference time measurements --- frameworks/lightautoml/__init__.py | 3 ++- frameworks/lightautoml/exec.py | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/frameworks/lightautoml/__init__.py b/frameworks/lightautoml/__init__.py index 97c09fa0e..fedabacf3 100644 --- a/frameworks/lightautoml/__init__.py +++ b/frameworks/lightautoml/__init__.py @@ -21,7 +21,8 @@ def run(dataset: Dataset, config: TaskConfig): target=dict( name=dataset.target.name, ), - problem_type=dataset.type.name + problem_type=dataset.type.name, + inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"), ) options = dict( serialization=dict(sparse_dataframe_deserialized_format='dense') diff --git a/frameworks/lightautoml/exec.py b/frameworks/lightautoml/exec.py index aee255902..56cbe87c5 100644 --- a/frameworks/lightautoml/exec.py +++ b/frameworks/lightautoml/exec.py @@ -5,13 +5,16 @@ import matplotlib import numpy as np +import pandas as pd + matplotlib.use("agg") # no need for tk from lightautoml.tasks import Task from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML from lightautoml import __version__ -from frameworks.shared.callee import call_run, result, output_subdir +from frameworks.shared.callee import call_run, result, output_subdir, \ + measure_inference_times from frameworks.shared.utils import Timer log = logging.getLogger(__name__) @@ -37,6 +40,15 @@ def run(dataset, config): with Timer() as training: automl.fit_predict(train_data=df_train, roles={'target': label}) + def infer(path: str): + batch = pd.read_parquet(path) + return automl.predict(batch) + + inference_times = None + if config.measure_inference_time: + inference_times = measure_inference_times(infer, + dataset.inference_subsample_files) + X_test, y_test = dataset.test.X, dataset.test.y log.info("Predicting on the test set...") with Timer() as predict: @@ -75,6 +87,7 @@ def run(dataset, config): predictions=predictions, training_duration=training.duration, predict_duration=predict.duration, + inference_times=inference_times, ) From 338fc94e2851d32eee337400a222ec5974f78895 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 22:09:39 +0300 Subject: [PATCH 13/39] Add inference time measurements --- frameworks/mljarsupervised/__init__.py | 3 ++- frameworks/mljarsupervised/exec.py | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/frameworks/mljarsupervised/__init__.py b/frameworks/mljarsupervised/__init__.py index 9bee9f4a5..3cd6003ce 100644 --- a/frameworks/mljarsupervised/__init__.py +++ b/frameworks/mljarsupervised/__init__.py @@ -19,7 +19,8 @@ def run(dataset: Dataset, config: TaskConfig): X=dataset.test.X, y=dataset.test.y ), - problem_type=dataset.type.name + problem_type=dataset.type.name, + inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"), ) options = dict( serialization=dict(sparse_dataframe_deserialized_format='dense') diff --git a/frameworks/mljarsupervised/exec.py b/frameworks/mljarsupervised/exec.py index 653d9cfd6..3287e5be3 100644 --- a/frameworks/mljarsupervised/exec.py +++ b/frameworks/mljarsupervised/exec.py @@ -4,12 +4,15 @@ import numpy as np import matplotlib +import pandas as pd + matplotlib.use("agg") # no need for tk import supervised from supervised.automl import AutoML -from frameworks.shared.callee import call_run, result, output_subdir +from frameworks.shared.callee import call_run, result, output_subdir, \ + measure_inference_times from frameworks.shared.utils import Timer log = logging.getLogger(os.path.basename(__file__)) @@ -56,6 +59,16 @@ def run(dataset, config): with Timer() as training: automl.fit(X_train, y_train) + + def infer(path: str): + batch = pd.read_parquet(path) + return automl.predict_all(batch) + + inference_times = None + if config.measure_inference_time: + inference_times = measure_inference_times(infer, + dataset.inference_subsample_files) + with Timer() as predict: preds = automl.predict_all(X_test) @@ -88,7 +101,8 @@ def run(dataset, config): probabilities_labels=probabilities_labels, models_count=len(automl._models), training_duration=training.duration, - predict_duration=predict.duration + predict_duration=predict.duration, + inference_times=inference_times, ) From 9b024e2a46c63ca849b237c50d47ac181ac33eef Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 22:11:44 +0300 Subject: [PATCH 14/39] Document shortcoming of measuring inference time for tpot --- frameworks/TPOT/exec.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/frameworks/TPOT/exec.py b/frameworks/TPOT/exec.py index a7e0858ed..ff445eae7 100644 --- a/frameworks/TPOT/exec.py +++ b/frameworks/TPOT/exec.py @@ -80,7 +80,10 @@ def infer(path): return tpot.predict(data) return tpot.predict(data) - inference_times = measure_inference_times(infer, dataset.inference_subsample_files) + inference_times = None + if config.measure_inference_time: + log.info("TPOT inference time measurements exclude preprocessing time of AMLB.") + inference_times = measure_inference_times(infer, dataset.inference_subsample_files) try: probabilities = tpot.predict_proba(X_test) if is_classification else None From 7b3455f8ffb0b04c8b3877378a60c3df9d0b6b2d Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 22:29:38 +0300 Subject: [PATCH 15/39] Add inference time measurement --- frameworks/autosklearn/__init__.py | 3 ++- frameworks/autosklearn/exec.py | 21 +++++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/frameworks/autosklearn/__init__.py b/frameworks/autosklearn/__init__.py index 3e31b6d64..92e059ef7 100644 --- a/frameworks/autosklearn/__init__.py +++ b/frameworks/autosklearn/__init__.py @@ -21,7 +21,8 @@ def run(dataset: Dataset, config: TaskConfig): X=X_test, y=y_test ), - predictors_type=['Numerical' if p.is_numerical() else 'Categorical' for p in dataset.predictors] + predictors_type=['Numerical' if p.is_numerical() else 'Categorical' for p in dataset.predictors], + inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"), ) return run_in_venv(__file__, "exec.py", diff --git a/frameworks/autosklearn/exec.py b/frameworks/autosklearn/exec.py index 11690e09c..39f301fc5 100644 --- a/frameworks/autosklearn/exec.py +++ b/frameworks/autosklearn/exec.py @@ -5,6 +5,8 @@ import tempfile as tmp import warnings +import pandas as pd + os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir() os.environ['OMP_NUM_THREADS'] = '1' os.environ['OPENBLAS_NUM_THREADS'] = '1' @@ -15,7 +17,8 @@ import autosklearn.metrics as metrics from packaging import version -from frameworks.shared.callee import call_run, result, output_subdir +from frameworks.shared.callee import call_run, result, output_subdir, \ + measure_inference_times from frameworks.shared.utils import Timer, system_memory_mb, walk_apply, zip_path log = logging.getLogger(__name__) @@ -130,10 +133,18 @@ def run(dataset, config): with Timer() as training: auto_sklearn.fit(X_train, y_train, feat_type=predictors_type, **fit_extra_params) + def infer(path: str): + test_data = pd.read_parquet(path) + predict_fn = auto_sklearn.predict_proba if is_classification else auto_sklearn.predict + return predict_fn(test_data) + + inference_times = None + if config.measure_inference_time: + inference_times = measure_inference_times(infer, dataset.inference_subsample_files) + # Convert output to strings for classification log.info("Predicting on the test set.") X_test = dataset.test.X - y_test = dataset.test.y with Timer() as predict: predictions = auto_sklearn.predict(X_test) probabilities = auto_sklearn.predict_proba(X_test) if is_classification else None @@ -142,12 +153,14 @@ def run(dataset, config): return result(output_file=config.output_predictions_file, predictions=predictions, - truth=y_test, + truth=dataset.test.y, probabilities=probabilities, target_is_encoded=is_classification, models_count=len(auto_sklearn.get_models_with_weights()), training_duration=training.duration, - predict_duration=predict.duration) + predict_duration=predict.duration, + inference_times=inference_times, + ) def save_artifacts(estimator, config): From 573322338ccba61d4605961387a4b65969175632 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 22:30:02 +0300 Subject: [PATCH 16/39] Bump ubuntu base to 22.04 --- amlb/runners/docker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amlb/runners/docker.py b/amlb/runners/docker.py index 27cfedcf3..4dc601d63 100644 --- a/amlb/runners/docker.py +++ b/amlb/runners/docker.py @@ -116,7 +116,7 @@ def _upload_image(self, image): log.info(f"Successfully published docker image {image}.") def _generate_script(self, custom_commands): - docker_content = """FROM ubuntu:18.04 + docker_content = """FROM ubuntu:22.04 ENV DEBIAN_FRONTEND noninteractive RUN apt-get update From 25a1bd443ac04096549fcecfb5463fb7534aee54 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 23:02:43 +0300 Subject: [PATCH 17/39] Add inference measurement for dataframe --- amlb/results.py | 5 +++-- frameworks/AutoGluon/exec.py | 9 +++++++-- frameworks/shared/callee.py | 7 +++++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/amlb/results.py b/amlb/results.py index 3d6cffd8c..ab3568430 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -446,8 +446,9 @@ def compute_score(self, result=None, meta_result=None): entry[m] = meta_result[m] if m in meta_result else nan if inference_times := Namespace.get(meta_result, "inference_times"): - for n_samples, measured_times in Namespace.dict(inference_times).items(): - entry[f"infer_batch_size_{n_samples}"] = np.mean(measured_times) + for data_type, measurements in Namespace.dict(inference_times).items(): + for n_samples, measured_times in Namespace.dict(measurements).items(): + entry[f"infer_batch_size_{data_type}_{n_samples}"] = np.mean(measured_times) result = self.get_result() if result is None else result scoring_errors = [] diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py index 79c4d37d6..1884c7919 100644 --- a/frameworks/AutoGluon/exec.py +++ b/frameworks/AutoGluon/exec.py @@ -77,9 +77,14 @@ def inference_time_regression(data: Union[str, pd.DataFrame]): return predictor.predict(data, as_pandas=False), None infer = inference_time_classification if is_classification else inference_time_regression - inference_times = None + inference_times = {} if config.measure_inference_time: - inference_times = measure_inference_times(infer, dataset.inference_subsample_files) + inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) + test_data = pd.read_parquet(dataset.test.path) + inference_times["df"] = measure_inference_times( + infer, + [(1, test_data.sample(1, random_state=i)) for i in range(100)], + ) test_data = TabularDataset(test_path) with Timer() as predict: diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py index a77be3e18..2f94e5262 100644 --- a/frameworks/shared/callee.py +++ b/frameworks/shared/callee.py @@ -5,7 +5,9 @@ import signal import sys from collections import defaultdict -from typing import Callable, Any, Tuple +from typing import Callable, Any, Tuple, Union, TypeVar + +import pandas as pd from .utils import InterruptTimeout, Namespace as ns, json_dump, json_loads, kill_proc_tree, touch from .utils import deserialize_data, serialize_data, Timer @@ -95,7 +97,8 @@ def load_data(name, path, **_): res["others"]["inference_times"] = str(inference_file) json_dump(res, config.result_file, style='compact') -def measure_inference_times(predict_fn: Callable[[str], Any], files: list[Tuple[int, str]]) -> dict[int, list[float]]: +DATA_INPUT = TypeVar("DATA_INPUT", bound=Union[str, pd.DataFrame]) +def measure_inference_times(predict_fn: Callable[[DATA_INPUT], Any], files: list[Tuple[int, DATA_INPUT]]) -> dict[int, list[float]]: inference_times = defaultdict(list) for subsample_size, subsample_path in files: with Timer() as predict: From a6f7a5213e273fd7d7a8d3c1ffa7bc1b0a1d8a4a Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 23:07:45 +0300 Subject: [PATCH 18/39] Defaults to display median inference time This mitigates the effect of outliers, such as cold-start runs. --- amlb/results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amlb/results.py b/amlb/results.py index ab3568430..6e1a5bc60 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -448,7 +448,7 @@ def compute_score(self, result=None, meta_result=None): if inference_times := Namespace.get(meta_result, "inference_times"): for data_type, measurements in Namespace.dict(inference_times).items(): for n_samples, measured_times in Namespace.dict(measurements).items(): - entry[f"infer_batch_size_{data_type}_{n_samples}"] = np.mean(measured_times) + entry[f"infer_batch_size_{data_type}_{n_samples}"] = np.median(measured_times) result = self.get_result() if result is None else result scoring_errors = [] From 94f11f6b12fadb67da936a30e09dce488d573552 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 23:17:19 +0300 Subject: [PATCH 19/39] Add seed to filename Otherwise only one file is actually kept and used for experiments, thus not actually mitigating the variances of sampling. --- amlb/datasets/openml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py index 1f8adde43..1a5d0f805 100644 --- a/amlb/datasets/openml.py +++ b/amlb/datasets/openml.py @@ -125,7 +125,7 @@ def _inference_subsample(self, fmt: str, n: int, seed: int = 0) -> pathlib.Path: _, test_path = self._get_split_paths() test_path = pathlib.Path(test_path) - subsample_path = test_path.parent / f"{test_path.stem}_{n}.{fmt}" + subsample_path = test_path.parent / f"{test_path.stem}_{n}_{seed}.{fmt}" if fmt == "csv": subsample.to_csv(subsample_path, header=True, index=False) elif fmt == "arff": From 4119d5664e2b48c6b55bae135dc7d5ebc965228d Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 13 Jun 2023 23:41:48 +0300 Subject: [PATCH 20/39] Start with inference measurements (broken) --- amlb/datasets/openml.py | 9 +++++---- frameworks/H2OAutoML/__init__.py | 13 ++----------- frameworks/H2OAutoML/exec.py | 18 ++++++++++++++++-- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py index 1a5d0f805..3471fe7eb 100644 --- a/amlb/datasets/openml.py +++ b/amlb/datasets/openml.py @@ -93,7 +93,7 @@ def test(self): self._ensure_split_created() return self._test - def inference_subsample_files(self, fmt: str) -> list[Tuple[int, str]]: + def inference_subsample_files(self, fmt: str, with_labels: bool = False) -> list[Tuple[int, str]]: """Generates n subsamples of size k from the test dataset in `fmt` data format. We measure the inference time of the models for various batch sizes @@ -104,20 +104,21 @@ def inference_subsample_files(self, fmt: str) -> list[Tuple[int, str]]: """ seed = rget().seed(self.fold) return [ - (n, str(self._inference_subsample(fmt=fmt, n=n, seed=seed + i))) + (n, str(self._inference_subsample(fmt=fmt, n=n, seed=seed + i, with_labels=with_labels))) for n in rconfig().inference_time_measurements.batch_sizes for i, _ in enumerate(range(rconfig().inference_time_measurements.repeats)) ] @profile(logger=log) - def _inference_subsample(self, fmt: str, n: int, seed: int = 0) -> pathlib.Path: + def _inference_subsample(self, fmt: str, n: int, seed: int = 0, with_labels: bool = False) -> pathlib.Path: """ Write subset of `n` samples from the test split to disk in `fmt` format """ # Just a hack for now, the splitters all work specifically with openml tasks. # The important thing is that we split to disk and can load it later. # We should consider taking a stratified sample if n is large enough, # inference time might differ based on class - subsample = self._test.X.sample( + test = self._test.data if with_labels else self._test.X + subsample = test.sample( n=n, replace=True, random_state=seed, diff --git a/frameworks/H2OAutoML/__init__.py b/frameworks/H2OAutoML/__init__.py index 2b45dc6d3..ce51582ef 100644 --- a/frameworks/H2OAutoML/__init__.py +++ b/frameworks/H2OAutoML/__init__.py @@ -8,25 +8,16 @@ def setup(*args, **kwargs): call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs) -# def version(): -# from frameworks.shared.caller import run_cmd_in_venv -# out, err = run_cmd_in_venv(__file__, """{py} -c "from h2o import __version__; print(__version__)" | grep "^\d\." """) -# if err: -# raise ValueError(err) -# return out - - def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv - data = dict( train=dict(path=dataset.train.path), test=dict(path=dataset.test.path), target=dict(index=dataset.target.index), domains=dict(cardinalities=[0 if f.values is None else len(f.values) for f in dataset.features]), - format=dataset.train.format + format=dataset.train.format, + inference_subsample_files=dataset.inference_subsample_files(fmt=dataset.train.format, with_labels=True), ) - config.ext.monitoring = rconfig().monitoring return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config) diff --git a/frameworks/H2OAutoML/exec.py b/frameworks/H2OAutoML/exec.py index 026d1d062..5a9e9d7fa 100644 --- a/frameworks/H2OAutoML/exec.py +++ b/frameworks/H2OAutoML/exec.py @@ -1,6 +1,8 @@ import contextlib import logging import os +import pathlib + import psutil import re @@ -10,7 +12,8 @@ import h2o from h2o.automl import H2OAutoML -from frameworks.shared.callee import FrameworkError, call_run, output_subdir, result +from frameworks.shared.callee import FrameworkError, call_run, output_subdir, result, \ + measure_inference_times from frameworks.shared.utils import Monitoring, Namespace as ns, Timer, clean_dir, touch, zip_path log = logging.getLogger(__name__) @@ -115,6 +118,16 @@ def run(dataset, config): if not aml.leader: raise FrameworkError("H2O could not produce any model in the requested time.") + def infer(path: str): + filename = pathlib.Path(path).name + batch = h2o.import_file(path, destination_frame=frame_name(filename, config), **import_kwargs) + return aml.predict(batch) + + inference_times = None + if config.measure_inference_time: + inference_times = measure_inference_times(infer, + dataset.inference_subsample_files) + with Timer() as predict: preds = aml.predict(test) @@ -129,7 +142,8 @@ def run(dataset, config): probabilities_labels=preds.probabilities_labels, models_count=len(aml.leaderboard), training_duration=training.duration, - predict_duration=predict.duration + predict_duration=predict.duration, + inference_times=inference_times, ) finally: From 0000c63cce54301eae940213c1ca957fdb858c20 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 14 Jun 2023 11:10:18 +0300 Subject: [PATCH 21/39] Add inference measurement on dataframe --- frameworks/GAMA/exec.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/frameworks/GAMA/exec.py b/frameworks/GAMA/exec.py index 54498c8ac..12fa75111 100644 --- a/frameworks/GAMA/exec.py +++ b/frameworks/GAMA/exec.py @@ -75,27 +75,24 @@ def run(dataset, config): gama_automl = estimator(**kwargs) X_train, y_train = dataset.train.X, dataset.train.y - # data = file_to_pandas(dataset.train.path, encoding='utf-8') - # X_train, y_train = data.loc[:, data.columns != dataset.target], data.loc[:, dataset.target] - with Timer() as training_timer: gama_automl.fit(X_train, y_train) log.info('Predicting on the test set.') - X_test, y_test = dataset.test.X, dataset.test.y - # data = file_to_pandas(dataset.test.path, encoding='utf-8') - # X_test, y_test = data.loc[:, data.columns != dataset.target], data.loc[:, dataset.target] - - def infer(path: str): - test_data = pd.read_parquet(path) + def infer(data): + test_data = pd.read_parquet(data) if isinstance(data, str) else data predict_fn = gama_automl.predict_proba if is_classification else gama_automl.predict return predict_fn(test_data) - inference_times = None + inference_times = {} if config.measure_inference_time: - inference_times = measure_inference_times(infer, dataset.inference_subsample_files) - + inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) + inference_times["df"] = measure_inference_times( + infer, + [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)], + ) with Timer() as predict_timer: + X_test, y_test = dataset.test.X, dataset.test.y predictions = gama_automl.predict(X_test) probabilities = None From 1d2054a3d7d97938bfcc65746d4d084deff08155 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 14 Jun 2023 11:22:01 +0300 Subject: [PATCH 22/39] Add inference time measurement with dataframes --- frameworks/flaml/exec.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/frameworks/flaml/exec.py b/frameworks/flaml/exec.py index d0acebde1..510f02b9c 100644 --- a/frameworks/flaml/exec.py +++ b/frameworks/flaml/exec.py @@ -1,5 +1,6 @@ import logging import os +from typing import Union import pandas as pd from flaml import AutoML, __version__ @@ -15,7 +16,6 @@ def run(dataset, config): log.info(f"\n**** FLAML [v{__version__}] ****\n") X_train, y_train = dataset.train.X, dataset.train.y.squeeze() - X_test, y_test = dataset.test.X, dataset.test.y.squeeze() is_classification = config.type == 'classification' time_budget = config.max_runtime_seconds @@ -52,16 +52,21 @@ def run(dataset, config): log_file_name= flaml_log_file_name, time_budget=time_budget, **training_params) - def infer(path: str): - data = pd.read_parquet(path) + def infer(data: Union[str, pd.DataFrame]): + data = pd.read_parquet(data) if isinstance(data, str) else data predict_fn = aml.predict_proba if is_classification else aml.predict return predict_fn(data) - inference_times = None + inference_times = {} if config.measure_inference_time: - inference_times = measure_inference_times(infer, dataset.inference_subsample_files) + inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) + inference_times["df"] = measure_inference_times( + infer, + [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)], + ) with Timer() as predict: + X_test, y_test = dataset.test.X, dataset.test.y.squeeze() predictions = aml.predict(X_test) probabilities = aml.predict_proba(X_test) if is_classification else None labels = None From 357a63fd9087038bc6b3e93e71d1fbd6e4e30fa7 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 14 Jun 2023 11:40:52 +0300 Subject: [PATCH 23/39] Add dataframe inference time measurement --- frameworks/mljarsupervised/exec.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/frameworks/mljarsupervised/exec.py b/frameworks/mljarsupervised/exec.py index 3287e5be3..66d2c6d64 100644 --- a/frameworks/mljarsupervised/exec.py +++ b/frameworks/mljarsupervised/exec.py @@ -1,6 +1,7 @@ import os import shutil import logging +from typing import Union import numpy as np import matplotlib @@ -45,7 +46,6 @@ def run(dataset, config): } X_train, y_train = dataset.train.X, dataset.train.y.squeeze() - X_test, y_test = dataset.test.X, dataset.test.y.squeeze() automl = AutoML( results_path=results_path, @@ -60,16 +60,20 @@ def run(dataset, config): automl.fit(X_train, y_train) - def infer(path: str): - batch = pd.read_parquet(path) + def infer(data: Union[str, pd.DataFrame]): + batch = pd.read_parquet(data) if isinstance(data, str) else data return automl.predict_all(batch) - inference_times = None + inference_times = {} if config.measure_inference_time: - inference_times = measure_inference_times(infer, - dataset.inference_subsample_files) + inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) + inference_times["df"] = measure_inference_times( + infer, + [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)], + ) with Timer() as predict: + X_test, y_test = dataset.test.X, dataset.test.y.squeeze() preds = automl.predict_all(X_test) predictions, probabilities, probabilities_labels = None, None, None From 2dd1ffc7d26eaf3c800860a17c6b4fb221dcb175 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 14 Jun 2023 11:42:33 +0300 Subject: [PATCH 24/39] Add dataframe inference time measurement --- frameworks/GAMA/exec.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/frameworks/GAMA/exec.py b/frameworks/GAMA/exec.py index 12fa75111..d9e89a8e8 100644 --- a/frameworks/GAMA/exec.py +++ b/frameworks/GAMA/exec.py @@ -2,6 +2,7 @@ import os import sys import tempfile as tmp +from typing import Union import pandas as pd @@ -79,7 +80,7 @@ def run(dataset, config): gama_automl.fit(X_train, y_train) log.info('Predicting on the test set.') - def infer(data): + def infer(data: Union[str, pd.DataFrame]): test_data = pd.read_parquet(data) if isinstance(data, str) else data predict_fn = gama_automl.predict_proba if is_classification else gama_automl.predict return predict_fn(test_data) From 1e5a61dd965768fdb3b79f0709b384e705a6b05c Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 14 Jun 2023 11:58:53 +0300 Subject: [PATCH 25/39] Add dataframe inference measurement (ignores encoding) --- frameworks/TPOT/exec.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/frameworks/TPOT/exec.py b/frameworks/TPOT/exec.py index ff445eae7..56e72609e 100644 --- a/frameworks/TPOT/exec.py +++ b/frameworks/TPOT/exec.py @@ -5,6 +5,7 @@ import tempfile as tmp import pandas as pd +from numpy.random import default_rng if sys.platform == 'darwin': os.environ['OBJC_DISABLE_INITIALIZE_FORK_SAFETY'] = 'YES' @@ -65,14 +66,8 @@ def run(dataset, config): with Timer() as training: tpot.fit(X_train, y_train) - log.info('Predicting on the test set.') - X_test = dataset.test.X - y_test = dataset.test.y - with Timer() as predict: - predictions = tpot.predict(X_test) - - def infer(path): - data = pd.read_parquet(path) + def infer(data): + data = pd.read_parquet(data) if isinstance(data, str) else data if is_classification: try: return tpot.predict_proba(data) @@ -80,10 +75,22 @@ def infer(path): return tpot.predict(data) return tpot.predict(data) - inference_times = None + inference_times = {} if config.measure_inference_time: log.info("TPOT inference time measurements exclude preprocessing time of AMLB.") - inference_times = measure_inference_times(infer, dataset.inference_subsample_files) + inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) + inference_times["df"] = measure_inference_times( + infer, [ + (1, dataset.test.X[default_rng(seed=i).integers(len(dataset.test.X)), :].reshape(1, -1)) + for i in range(100) + ], + ) + + log.info('Predicting on the test set.') + y_test = dataset.test.y + with Timer() as predict: + X_test = dataset.test.X + predictions = tpot.predict(X_test) try: probabilities = tpot.predict_proba(X_test) if is_classification else None From b48c5edc4c91c96cbf2ce74626b74fb2a7da43c1 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 14 Jun 2023 12:04:59 +0300 Subject: [PATCH 26/39] Add dataframe inference measurement --- frameworks/autosklearn/exec.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/frameworks/autosklearn/exec.py b/frameworks/autosklearn/exec.py index 39f301fc5..e7ca55bab 100644 --- a/frameworks/autosklearn/exec.py +++ b/frameworks/autosklearn/exec.py @@ -4,8 +4,10 @@ import shutil import tempfile as tmp import warnings +from typing import Union import pandas as pd +from numpy.random import default_rng os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir() os.environ['OMP_NUM_THREADS'] = '1' @@ -133,19 +135,25 @@ def run(dataset, config): with Timer() as training: auto_sklearn.fit(X_train, y_train, feat_type=predictors_type, **fit_extra_params) - def infer(path: str): - test_data = pd.read_parquet(path) + def infer(data: Union[str, pd.DataFrame]): + test_data = pd.read_parquet(data) if isinstance(data, str) else data predict_fn = auto_sklearn.predict_proba if is_classification else auto_sklearn.predict return predict_fn(test_data) - inference_times = None + inference_times = {} if config.measure_inference_time: - inference_times = measure_inference_times(infer, dataset.inference_subsample_files) + inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) + inference_times["df"] = measure_inference_times( + infer, [ + (1, dataset.test.X[default_rng(seed=i).integers(len(dataset.test.X)), :].reshape(1, -1)) + for i in range(100) + ], + ) # Convert output to strings for classification log.info("Predicting on the test set.") - X_test = dataset.test.X with Timer() as predict: + X_test = dataset.test.X predictions = auto_sklearn.predict(X_test) probabilities = auto_sklearn.predict_proba(X_test) if is_classification else None From 42d38aca074f59672de655e032655da0af3f49b2 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 14 Jun 2023 12:38:19 +0300 Subject: [PATCH 27/39] Add inference time measurements It seems lightautoml inference is considerably slower than that of any other framework. --- frameworks/lightautoml/exec.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/frameworks/lightautoml/exec.py b/frameworks/lightautoml/exec.py index 56cbe87c5..01217abd1 100644 --- a/frameworks/lightautoml/exec.py +++ b/frameworks/lightautoml/exec.py @@ -2,6 +2,7 @@ import os import pickle import warnings +from typing import Union import matplotlib import numpy as np @@ -40,18 +41,21 @@ def run(dataset, config): with Timer() as training: automl.fit_predict(train_data=df_train, roles={'target': label}) - def infer(path: str): - batch = pd.read_parquet(path) + def infer(data: Union[str, pd.DataFrame]): + batch = pd.read_parquet(data) if isinstance(data, str) else data return automl.predict(batch) - inference_times = None + inference_times = {} if config.measure_inference_time: - inference_times = measure_inference_times(infer, - dataset.inference_subsample_files) + inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) + inference_times["df"] = measure_inference_times( + infer, + [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)], + ) - X_test, y_test = dataset.test.X, dataset.test.y log.info("Predicting on the test set...") with Timer() as predict: + X_test, y_test = dataset.test.X, dataset.test.y preds = automl.predict(X_test).data probabilities_labels = None From 4a38f6cad3cc66e561cacac38b9dd9944d7180b1 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 14 Jun 2023 13:04:25 +0300 Subject: [PATCH 28/39] Add inference time measurements --- frameworks/H2OAutoML/exec.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/frameworks/H2OAutoML/exec.py b/frameworks/H2OAutoML/exec.py index 5a9e9d7fa..f6603c886 100644 --- a/frameworks/H2OAutoML/exec.py +++ b/frameworks/H2OAutoML/exec.py @@ -123,10 +123,16 @@ def infer(path: str): batch = h2o.import_file(path, destination_frame=frame_name(filename, config), **import_kwargs) return aml.predict(batch) - inference_times = None + inference_times = {} if config.measure_inference_time: - inference_times = measure_inference_times(infer, - dataset.inference_subsample_files) + # H2O can't do inference on single row arff: + # https://github.com/h2oai/h2o-3/issues/15572 + without_single_row_files = [ + (subsample_size, subsample_path) + for subsample_size, subsample_path in dataset.inference_subsample_files + if subsample_size > 1 + ] + inference_times["file"] = measure_inference_times(infer, without_single_row_files) with Timer() as predict: preds = aml.predict(test) From 6c9ccb36bb5d7ca235064517dcc8cd5d621c6a26 Mon Sep 17 00:00:00 2001 From: Pieter Gijsbers Date: Fri, 16 Jun 2023 10:04:04 +0300 Subject: [PATCH 29/39] Allow newer autosklearn versions to use the pandas data instead (#534) --- frameworks/autosklearn/__init__.py | 14 ++++++++------ frameworks/autosklearn/exec.py | 28 ++++++++++++++++++---------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/frameworks/autosklearn/__init__.py b/frameworks/autosklearn/__init__.py index 92e059ef7..a00a7d833 100644 --- a/frameworks/autosklearn/__init__.py +++ b/frameworks/autosklearn/__init__.py @@ -10,16 +10,18 @@ def setup(*args, **kwargs): def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv - X_train, X_test = dataset.train.X_enc, dataset.test.X_enc - y_train, y_test = unsparsify(dataset.train.y_enc, dataset.test.y_enc) data = dict( train=dict( - X=X_train, - y=y_train + X=dataset.train.X, + y=dataset.train.y, + X_enc=dataset.train.X_enc, + y_enc=unsparsify(dataset.train.y_enc), ), test=dict( - X=X_test, - y=y_test + X=dataset.test.X, + y=dataset.test.y, + X_enc=dataset.test.X_enc, + y_enc=unsparsify(dataset.test.y_enc), ), predictors_type=['Numerical' if p.is_numerical() else 'Categorical' for p in dataset.predictors], inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"), diff --git a/frameworks/autosklearn/exec.py b/frameworks/autosklearn/exec.py index e7ca55bab..6c98fc199 100644 --- a/frameworks/autosklearn/exec.py +++ b/frameworks/autosklearn/exec.py @@ -67,8 +67,9 @@ def run(dataset, config): ) log.info("Environment: %s", os.environ) - X_train = dataset.train.X - y_train = dataset.train.y + use_pandas = askl_version >= version.parse("0.15") + X_train = dataset.train.X if use_pandas else dataset.train.X_enc + y_train = dataset.train.y if use_pandas else dataset.train.y_enc predictors_type = dataset.predictors_type log.debug("predictors_type=%s", predictors_type) @@ -123,6 +124,10 @@ def run(dataset, config): else: fit_extra_params['metric'] = perf_metric + if not use_pandas: + fit_extra_params["feat_type"] = predictors_type + + constr_params["time_left_for_this_task"] = config.max_runtime_seconds constr_params["n_jobs"] = n_jobs constr_params["seed"] = config.seed @@ -133,7 +138,7 @@ def run(dataset, config): auto_sklearn = estimator(**constr_params, **training_params) with Timer() as training: - auto_sklearn.fit(X_train, y_train, feat_type=predictors_type, **fit_extra_params) + auto_sklearn.fit(X_train, y_train, **fit_extra_params) def infer(data: Union[str, pd.DataFrame]): test_data = pd.read_parquet(data) if isinstance(data, str) else data @@ -143,17 +148,20 @@ def infer(data: Union[str, pd.DataFrame]): inference_times = {} if config.measure_inference_time: inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) + test_data = dataset.test.X if use_pandas else dataset.test.X_enc + def sample_one_test_row(seed: int): + if use_pandas: + return test_data.sample(1, random_state=seed) + return test_data[default_rng(seed=seed).integers(len(test_data)), :] + inference_times["df"] = measure_inference_times( - infer, [ - (1, dataset.test.X[default_rng(seed=i).integers(len(dataset.test.X)), :].reshape(1, -1)) - for i in range(100) - ], + infer, [(1, sample_one_test_row(seed=i)) for i in range(100)], ) # Convert output to strings for classification log.info("Predicting on the test set.") with Timer() as predict: - X_test = dataset.test.X + X_test = dataset.test.X if use_pandas else dataset.test.X_enc predictions = auto_sklearn.predict(X_test) probabilities = auto_sklearn.predict_proba(X_test) if is_classification else None @@ -161,9 +169,9 @@ def infer(data: Union[str, pd.DataFrame]): return result(output_file=config.output_predictions_file, predictions=predictions, - truth=dataset.test.y, + truth=dataset.test.y if use_pandas else dataset.test.y_enc, probabilities=probabilities, - target_is_encoded=is_classification, + target_is_encoded=is_classification and not use_pandas, models_count=len(auto_sklearn.get_models_with_weights()), training_duration=training.duration, predict_duration=predict.duration, From b24c12c801698b22dd583fe771b9dc2f2fc714e1 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 16 Jun 2023 15:45:33 +0300 Subject: [PATCH 30/39] Add single row file inference for H2O --- frameworks/H2OAutoML/exec.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/frameworks/H2OAutoML/exec.py b/frameworks/H2OAutoML/exec.py index f6603c886..d0288903a 100644 --- a/frameworks/H2OAutoML/exec.py +++ b/frameworks/H2OAutoML/exec.py @@ -120,19 +120,14 @@ def run(dataset, config): def infer(path: str): filename = pathlib.Path(path).name - batch = h2o.import_file(path, destination_frame=frame_name(filename, config), **import_kwargs) + # H2O can't do inference on single row arff, it needs columns explicitly: + # https://github.com/h2oai/h2o-3/issues/15572 + batch = h2o.import_file(path, col_names=train.names, destination_frame=frame_name(filename, config), **import_kwargs) return aml.predict(batch) inference_times = {} if config.measure_inference_time: - # H2O can't do inference on single row arff: - # https://github.com/h2oai/h2o-3/issues/15572 - without_single_row_files = [ - (subsample_size, subsample_path) - for subsample_size, subsample_path in dataset.inference_subsample_files - if subsample_size > 1 - ] - inference_times["file"] = measure_inference_times(infer, without_single_row_files) + inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) with Timer() as predict: preds = aml.predict(test) From d5da2bd63cfae3d066d2d8f76223125667ed3bbd Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 16 Jun 2023 15:47:19 +0300 Subject: [PATCH 31/39] Update inference measurement to record its from file --- frameworks/constantpredictor/exec.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/frameworks/constantpredictor/exec.py b/frameworks/constantpredictor/exec.py index 4a2c7cf9d..332de6592 100644 --- a/frameworks/constantpredictor/exec.py +++ b/frameworks/constantpredictor/exec.py @@ -35,7 +35,8 @@ def infer(path): data = pd.read_parquet(path) return predictor.predict(data) - inference_times = measure_inference_times(infer, dataset.inference_subsample_files(fmt="parquet")) + inference_times = {} + inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files(fmt="parquet")) save_predictions(dataset=dataset, output_file=config.output_predictions_file, From cdb08284ea20af4b2f6c0a2f39b65d5b61155743 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 16 Jun 2023 19:28:09 +0300 Subject: [PATCH 32/39] Dynamically set type depending on presence of pandas --- frameworks/shared/callee.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py index 2f94e5262..84fe1c6dc 100644 --- a/frameworks/shared/callee.py +++ b/frameworks/shared/callee.py @@ -5,13 +5,12 @@ import signal import sys from collections import defaultdict -from typing import Callable, Any, Tuple, Union, TypeVar - -import pandas as pd +from typing import Callable, Any, Tuple, Union, TypeVar, TYPE_CHECKING from .utils import InterruptTimeout, Namespace as ns, json_dump, json_loads, kill_proc_tree, touch from .utils import deserialize_data, serialize_data, Timer + log = logging.getLogger(__name__) @@ -97,7 +96,14 @@ def load_data(name, path, **_): res["others"]["inference_times"] = str(inference_file) json_dump(res, config.result_file, style='compact') -DATA_INPUT = TypeVar("DATA_INPUT", bound=Union[str, pd.DataFrame]) +try: + import pandas as pd + DATA_TYPES = Union[str, pd.DataFrame] +except ImportError: + DATA_TYPES = str + +DATA_INPUT = TypeVar("DATA_INPUT", bound=DATA_TYPES) + def measure_inference_times(predict_fn: Callable[[DATA_INPUT], Any], files: list[Tuple[int, DATA_INPUT]]) -> dict[int, list[float]]: inference_times = defaultdict(list) for subsample_size, subsample_path in files: From 4c14dbee45585306eb37407a987cc979bfa0aa16 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 16 Jun 2023 19:43:34 +0300 Subject: [PATCH 33/39] Add time to job timeout if inference measurements enabled --- amlb/benchmark.py | 7 +++++-- resources/config.yaml | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/amlb/benchmark.py b/amlb/benchmark.py index dee7c5c14..2f913a8df 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -404,8 +404,11 @@ def __setattr__(self, name, value): if name == 'metrics': self.metric = value[0] if isinstance(value, list) else value elif name == 'max_runtime_seconds': - self.job_timeout_seconds = min(value * 2, - value + rconfig().benchmarks.overhead_time_seconds) + inference_time_extension = 0 + if rconfig().inference_time_measurements.enabled: + inference_time_extension = rconfig().inference_time_measurements.additional_job_time + self.job_timeout_seconds = min(value * 2 + inference_time_extension, + value + rconfig().benchmarks.overhead_time_seconds + inference_time_extension) super().__setattr__(name, value) def __json__(self): diff --git a/resources/config.yaml b/resources/config.yaml index 73bc81d64..5fbbd968b 100644 --- a/resources/config.yaml +++ b/resources/config.yaml @@ -88,6 +88,7 @@ inference_time_measurements: # configuration namespace for performing additiona enabled: true batch_sizes: [1, 10, 100, 1000, 10000] # the batch sizes for which inference speed should be measured repeats: 100 # the number of times to repeat the inference measurement for each batch size + additional_job_time: 300 # the time in seconds that will be added to the maximum job time if inference time is measured openml: # configuration namespace for openML. apikey: c1994bdb7ecb3c6f3c8f3b35f4b47f1f From acae403c5f7842ffd5fd73f17fbcb1632928e699 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 16 Jun 2023 20:56:41 +0300 Subject: [PATCH 34/39] Disable inference time measurements for CI --- .github/workflows/run_all_frameworks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_all_frameworks.yml b/.github/workflows/run_all_frameworks.yml index b11ce4629..ed772acab 100644 --- a/.github/workflows/run_all_frameworks.yml +++ b/.github/workflows/run_all_frameworks.yml @@ -156,6 +156,6 @@ jobs: - name: Run ${{ matrix.framework }} on ${{ matrix.task }} run: | source venv/bin/activate - python runbenchmark.py ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e + python runbenchmark.py ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e --Xinference_time_measurements.enabled=False env: GITHUB_PAT: ${{ secrets.PUBLIC_ACCESS_GITHUB_PAT }} From 8caf62918a1b807b40ba8392394c5fdb517cc9ef Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sat, 17 Jun 2023 16:10:17 +0200 Subject: [PATCH 35/39] Remove one dash --- .github/workflows/run_all_frameworks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_all_frameworks.yml b/.github/workflows/run_all_frameworks.yml index ed772acab..15750019a 100644 --- a/.github/workflows/run_all_frameworks.yml +++ b/.github/workflows/run_all_frameworks.yml @@ -156,6 +156,6 @@ jobs: - name: Run ${{ matrix.framework }} on ${{ matrix.task }} run: | source venv/bin/activate - python runbenchmark.py ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e --Xinference_time_measurements.enabled=False + python runbenchmark.py ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e -Xinference_time_measurements.enabled=False env: GITHUB_PAT: ${{ secrets.PUBLIC_ACCESS_GITHUB_PAT }} From f0cbfc055b3b489640bbb516b83148cb0ed11f60 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sat, 17 Jun 2023 16:30:49 +0200 Subject: [PATCH 36/39] Disable inference time measurement by default --- resources/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/config.yaml b/resources/config.yaml index 5fbbd968b..e0d526a7a 100644 --- a/resources/config.yaml +++ b/resources/config.yaml @@ -85,7 +85,7 @@ results: # configuration namespace for the results.csv file. incremental_save: true # if true save results after each job., otherwise save results only when all jobs are completed. inference_time_measurements: # configuration namespace for performing additional inference time measurements on various batch sizes - enabled: true + enabled: false batch_sizes: [1, 10, 100, 1000, 10000] # the batch sizes for which inference speed should be measured repeats: 100 # the number of times to repeat the inference measurement for each batch size additional_job_time: 300 # the time in seconds that will be added to the maximum job time if inference time is measured From b2c9b385fe58568e09bc254826479a83cf23efdd Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sat, 17 Jun 2023 16:36:24 +0200 Subject: [PATCH 37/39] Make measuring inference time optional, also measure single row df --- frameworks/constantpredictor/exec.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/frameworks/constantpredictor/exec.py b/frameworks/constantpredictor/exec.py index 332de6592..049e19b76 100644 --- a/frameworks/constantpredictor/exec.py +++ b/frameworks/constantpredictor/exec.py @@ -31,12 +31,18 @@ def run(dataset: Dataset, config: TaskConfig): predictions = predictor.predict(X_test) probabilities = predictor.predict_proba(X_test) if is_classification else None - def infer(path): - data = pd.read_parquet(path) + def infer(data): + data = pd.read_parquet(data) if isinstance(data, str) else data return predictor.predict(data) inference_times = {} - inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files(fmt="parquet")) + if config.measure_inference_time: + inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files(fmt="parquet")) + test_data = X_test if isinstance(X_test, pd.DataFrame) else pd.DataFrame(X_test) + inference_times["df"] = measure_inference_times( + infer, + [(1, test_data.sample(1, random_state=i)) for i in range(100)], + ) save_predictions(dataset=dataset, output_file=config.output_predictions_file, From 7a3f433674666450331c8b860349f9425ba614bc Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sat, 17 Jun 2023 17:10:55 +0200 Subject: [PATCH 38/39] Add inference time measurement to (T)RF baselines --- frameworks/RandomForest/__init__.py | 3 ++- frameworks/RandomForest/exec.py | 20 ++++++++++++++++++-- frameworks/TunedRandomForest/__init__.py | 3 ++- frameworks/TunedRandomForest/exec.py | 18 ++++++++++++++++-- 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/frameworks/RandomForest/__init__.py b/frameworks/RandomForest/__init__.py index a25cbeee7..3de306f59 100644 --- a/frameworks/RandomForest/__init__.py +++ b/frameworks/RandomForest/__init__.py @@ -23,7 +23,8 @@ def run(dataset: Dataset, config: TaskConfig): test=dict( X=X_test, y=y_test - ) + ), + inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"), ) return run_in_venv(__file__, "exec.py", diff --git a/frameworks/RandomForest/exec.py b/frameworks/RandomForest/exec.py index 77bdc99ef..dd23a763d 100644 --- a/frameworks/RandomForest/exec.py +++ b/frameworks/RandomForest/exec.py @@ -3,6 +3,8 @@ import tempfile as tmp from typing import List +import pandas as pd + os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir() os.environ['OMP_NUM_THREADS'] = '1' os.environ['OPENBLAS_NUM_THREADS'] = '1' @@ -12,7 +14,7 @@ import sklearn from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor -from frameworks.shared.callee import call_run, result +from frameworks.shared.callee import call_run, result, measure_inference_times from frameworks.shared.utils import Timer log = logging.getLogger(os.path.basename(__file__)) @@ -86,6 +88,19 @@ def run(dataset, config): predictions = rf.predict(X_test) probabilities = rf.predict_proba(X_test) if is_classification else None + def infer(data): + data = pd.read_parquet(data) if isinstance(data, str) else data + return rf.predict(data) + + inference_times = {} + if config.measure_inference_time: + inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) + test_data = X_test if isinstance(X_test, pd.DataFrame) else pd.DataFrame(X_test) + inference_times["df"] = measure_inference_times( + infer, + [(1, test_data.sample(1, random_state=i)) for i in range(100)], + ) + return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, @@ -93,7 +108,8 @@ def run(dataset, config): target_is_encoded=encode, models_count=len(rf), training_duration=training.duration, - predict_duration=predict.duration) + predict_duration=predict.duration, + inference_times=inference_times,) if __name__ == '__main__': diff --git a/frameworks/TunedRandomForest/__init__.py b/frameworks/TunedRandomForest/__init__.py index 561678497..dc0cad908 100644 --- a/frameworks/TunedRandomForest/__init__.py +++ b/frameworks/TunedRandomForest/__init__.py @@ -21,7 +21,8 @@ def run(dataset: Dataset, config: TaskConfig): test=dict( X=X_test, y=y_test - ) + ), + inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"), ) return run_in_venv(__file__, "exec.py", diff --git a/frameworks/TunedRandomForest/exec.py b/frameworks/TunedRandomForest/exec.py index 7c7a7dc15..c724487d0 100644 --- a/frameworks/TunedRandomForest/exec.py +++ b/frameworks/TunedRandomForest/exec.py @@ -21,7 +21,7 @@ import sklearn from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor -from frameworks.shared.callee import call_run, result +from frameworks.shared.callee import call_run, result, measure_inference_times from frameworks.shared.utils import Timer from custom_validate import cross_validate @@ -211,6 +211,19 @@ def run(dataset, config): predictions = rf.predict(X_test) probabilities = rf.predict_proba(X_test) if is_classification else None + def infer(data): + data = pd.read_parquet(data) if isinstance(data, str) else data + return rf.predict(data) + + inference_times = {} + if config.measure_inference_time: + inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) + test_data = X_test if isinstance(X_test, pd.DataFrame) else pd.DataFrame(X_test) + inference_times["df"] = measure_inference_times( + infer, + [(1, test_data.sample(1, random_state=i)) for i in range(100)], + ) + return result( output_file=config.output_predictions_file, predictions=predictions, @@ -219,7 +232,8 @@ def run(dataset, config): target_is_encoded=is_classification, models_count=len(rf), training_duration=training.duration, - predict_duration=predict.duration + predict_duration=predict.duration, + inference_times=inference_times, ) From 54a2dd6257fdc9cf1f7097add446affc6930c0fc Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sat, 17 Jun 2023 17:13:35 +0200 Subject: [PATCH 39/39] Remove skip inference measurement override since its default now --- .github/workflows/run_all_frameworks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_all_frameworks.yml b/.github/workflows/run_all_frameworks.yml index 15750019a..b11ce4629 100644 --- a/.github/workflows/run_all_frameworks.yml +++ b/.github/workflows/run_all_frameworks.yml @@ -156,6 +156,6 @@ jobs: - name: Run ${{ matrix.framework }} on ${{ matrix.task }} run: | source venv/bin/activate - python runbenchmark.py ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e -Xinference_time_measurements.enabled=False + python runbenchmark.py ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e env: GITHUB_PAT: ${{ secrets.PUBLIC_ACCESS_GITHUB_PAT }}