From 71e7a8bde8f6bc06241548bd5d7045d152abd9ef Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sun, 11 Jun 2023 11:59:43 +0200
Subject: [PATCH 01/39] Add method to split off a subsample of the test set to
file
---
amlb/datasets/openml.py | 38 ++++++++++++++++++++++++++++++++++++--
1 file changed, 36 insertions(+), 2 deletions(-)
diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py
index d037252d8..452ce9aa5 100644
--- a/amlb/datasets/openml.py
+++ b/amlb/datasets/openml.py
@@ -2,13 +2,14 @@
**openml** module implements the abstractions defined in **data** module
to expose `OpenML`_ datasets.
"""
+import pathlib
from abc import abstractmethod
import copy
import functools
import logging
import os
import re
-from typing import Generic, Tuple, TypeVar, Union
+from typing import Generic, Tuple, TypeVar
import arff
import pandas.api.types as pat
@@ -16,7 +17,7 @@
import xmltodict
from ..data import AM, DF, Dataset, DatasetType, Datasplit, Feature
-from ..resources import config as rconfig
+from ..resources import config as rconfig, get as rget
from ..utils import as_list, lazy_property, path_from_split, profile, split_path, unsparsify
@@ -92,6 +93,39 @@ def test(self):
self._ensure_split_created()
return self._test
+ @profile(logger=log)
+ def _inference_subsample(self, fmt: str, n: int) -> pathlib.Path:
+ """ Write subset of `n` samples from the test split to disk in `fmt` format """
+ # Just a hack for now, the splitters all work specifically with openml tasks.
+ # The important thing is that we split to disk and can load it later.
+ if fmt not in ["csv", "arff", "parquet"]:
+ msg = f"{fmt=}, but must be one of 'csv', 'arff', or 'parquet'."
+ raise ValueError(msg)
+
+ # We should consider taking a stratified sample if n is large enough,
+ # inference time might differ based on class
+ subsample = self._test.X.sample(
+ n=n,
+ replace=True,
+ random_state=rget().seed(self.fold)
+ )
+
+ _, test_path = self._get_split_paths()
+ test_path = pathlib.Path(test_path)
+ subsample_path = test_path.parent / f"{test_path.stem}_{n}.{fmt}"
+ if fmt == "csv":
+ subsample.to_csv(subsample_path, header=True, index=False)
+ elif fmt == "arff":
+ ArffSplitter(self)._save_split(
+ subsample,
+ subsample_path,
+ name=f"{self._oml_dataset.name}_inference_{self.fold}_{n}"
+ )
+ elif fmt == "parquet":
+ subsample.to_parquet(subsample_path)
+
+ return subsample_path
+
@lazy_property
@profile(logger=log)
def features(self):
From 87f80aabf26d656e329ba1177cfe768ad3211b77 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 12 Jun 2023 14:11:41 +0300
Subject: [PATCH 02/39] Add first draft for improving inference time
measurements
---
amlb/datasets/openml.py | 6 ++++++
amlb/results.py | 6 +++++-
frameworks/AutoGluon/__init__.py | 3 ++-
frameworks/AutoGluon/exec.py | 25 +++++++++++++++++--------
frameworks/TPOT/__init__.py | 3 ++-
frameworks/TPOT/exec.py | 21 +++++++++++++++++++--
frameworks/constantpredictor/exec.py | 11 ++++++++++-
frameworks/shared/callee.py | 14 +++++++++++++-
8 files changed, 74 insertions(+), 15 deletions(-)
diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py
index 452ce9aa5..445cf46c0 100644
--- a/amlb/datasets/openml.py
+++ b/amlb/datasets/openml.py
@@ -93,6 +93,12 @@ def test(self):
self._ensure_split_created()
return self._test
+ def inference_subsample_files(self, fmt: str) -> list[Tuple[int, str]]:
+ return [
+ (n, str(self._inference_subsample(fmt=fmt, n=n)))
+ for n in [1, 1000, 10_000]
+ ]
+
@profile(logger=log)
def _inference_subsample(self, fmt: str, n: int) -> pathlib.Path:
""" Write subset of `n` samples from the test split to disk in `fmt` format """
diff --git a/amlb/results.py b/amlb/results.py
index 052460fbd..48bd6e447 100644
--- a/amlb/results.py
+++ b/amlb/results.py
@@ -444,6 +444,10 @@ def compute_score(self, result=None, meta_result=None):
required_meta_res = ['training_duration', 'predict_duration', 'models_count']
for m in required_meta_res:
entry[m] = meta_result[m] if m in meta_result else nan
+
+ if inference_times := Namespace.get(meta_result, "inference_times"):
+ for n_samples, measured_times in Namespace.dict(inference_times).items():
+ entry[f"inference_{n_samples}_rows"] = np.mean(measured_times)
result = self.get_result() if result is None else result
scoring_errors = []
@@ -473,7 +477,7 @@ def set_score(score):
entry.info = result.info
if scoring_errors:
entry.info = "; ".join(filter(lambda it: it, [entry.info, *scoring_errors]))
- entry |= Namespace({k: v for k, v in meta_result if k not in required_meta_res})
+ entry |= Namespace({k: v for k, v in meta_result if k not in required_meta_res and k != "inference_times"})
log.info("Metric scores: %s", entry)
return entry
diff --git a/frameworks/AutoGluon/__init__.py b/frameworks/AutoGluon/__init__.py
index c8694148c..4c92d08f1 100644
--- a/frameworks/AutoGluon/__init__.py
+++ b/frameworks/AutoGluon/__init__.py
@@ -25,7 +25,8 @@ def run_autogluon_tabular(dataset: Dataset, config: TaskConfig):
name=dataset.target.name,
classes=dataset.target.values
),
- problem_type=dataset.type.name # AutoGluon problem_type is using same names as amlb.data.DatasetType
+ problem_type=dataset.type.name, # AutoGluon problem_type is using same names as amlb.data.DatasetType
+ inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"),
)
return run_in_venv(__file__, "exec.py",
diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py
index 6fe76769b..2a50ecf37 100644
--- a/frameworks/AutoGluon/exec.py
+++ b/frameworks/AutoGluon/exec.py
@@ -18,7 +18,8 @@
import autogluon.core.metrics as metrics
from autogluon.tabular.version import __version__
-from frameworks.shared.callee import call_run, result, output_subdir
+from frameworks.shared.callee import call_run, result, output_subdir, \
+ measure_inference_times
from frameworks.shared.utils import Timer, zip_path
log = logging.getLogger(__name__)
@@ -68,14 +69,21 @@ def run(dataset, config):
# Persist model in memory that is going to be predicting to get correct inference latency
predictor.persist_models('best')
+ def inference_time_classification(path: str):
+ data = TabularDataset(path)
+ return None, predictor.predict_proba(data, as_multiclass=True)
+
+ def inference_time_regression(path: str):
+ data = TabularDataset(path)
+ return predictor.predict(data, as_pandas=False), None
+
+ infer = inference_time_classification if is_classification else inference_time_regression
+ with Timer() as predict:
+ predictions, probabilities = infer(test_data)
if is_classification:
- with Timer() as predict:
- probabilities = predictor.predict_proba(test_data, as_multiclass=True)
predictions = probabilities.idxmax(axis=1).to_numpy()
- else:
- with Timer() as predict:
- predictions = predictor.predict(test_data, as_pandas=False)
- probabilities = None
+
+ inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
prob_labels = probabilities.columns.values.astype(str).tolist() if probabilities is not None else None
@@ -107,7 +115,8 @@ def run(dataset, config):
models_count=num_models_trained,
models_ensemble_count=num_models_ensemble,
training_duration=training.duration,
- predict_duration=predict.duration)
+ predict_duration=predict.duration,
+ inference_times=inference_times,)
def save_artifacts(predictor, leaderboard, config):
diff --git a/frameworks/TPOT/__init__.py b/frameworks/TPOT/__init__.py
index 9828c6473..1aa3192ea 100644
--- a/frameworks/TPOT/__init__.py
+++ b/frameworks/TPOT/__init__.py
@@ -21,7 +21,8 @@ def run(dataset: Dataset, config: TaskConfig):
test=dict(
X=X_test,
y=y_test
- )
+ ),
+ inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"),
)
def process_results(results):
diff --git a/frameworks/TPOT/exec.py b/frameworks/TPOT/exec.py
index ce70cb7f9..a7e0858ed 100644
--- a/frameworks/TPOT/exec.py
+++ b/frameworks/TPOT/exec.py
@@ -4,6 +4,8 @@
import sys
import tempfile as tmp
+import pandas as pd
+
if sys.platform == 'darwin':
os.environ['OBJC_DISABLE_INITIALIZE_FORK_SAFETY'] = 'YES'
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
@@ -13,7 +15,8 @@
from tpot import TPOTClassifier, TPOTRegressor, __version__
-from frameworks.shared.callee import call_run, output_subdir, result
+from frameworks.shared.callee import call_run, output_subdir, result, \
+ measure_inference_times
from frameworks.shared.utils import Timer, is_sparse
@@ -67,6 +70,18 @@ def run(dataset, config):
y_test = dataset.test.y
with Timer() as predict:
predictions = tpot.predict(X_test)
+
+ def infer(path):
+ data = pd.read_parquet(path)
+ if is_classification:
+ try:
+ return tpot.predict_proba(data)
+ except RuntimeError:
+ return tpot.predict(data)
+ return tpot.predict(data)
+
+ inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
+
try:
probabilities = tpot.predict_proba(X_test) if is_classification else None
except RuntimeError:
@@ -82,7 +97,9 @@ def run(dataset, config):
target_is_encoded=is_classification,
models_count=len(tpot.evaluated_individuals_),
training_duration=training.duration,
- predict_duration=predict.duration)
+ predict_duration=predict.duration,
+ inference_times=inference_times,
+ )
def save_artifacts(estimator, config):
diff --git a/frameworks/constantpredictor/exec.py b/frameworks/constantpredictor/exec.py
index 6a7aae69c..4a2c7cf9d 100644
--- a/frameworks/constantpredictor/exec.py
+++ b/frameworks/constantpredictor/exec.py
@@ -1,11 +1,13 @@
import logging
+import pandas as pd
from sklearn.dummy import DummyClassifier, DummyRegressor
from amlb.benchmark import TaskConfig
from amlb.data import Dataset
from amlb.results import save_predictions
from amlb.utils import Timer, unsparsify
+from frameworks.shared.callee import measure_inference_times
log = logging.getLogger(__name__)
@@ -29,6 +31,12 @@ def run(dataset: Dataset, config: TaskConfig):
predictions = predictor.predict(X_test)
probabilities = predictor.predict_proba(X_test) if is_classification else None
+ def infer(path):
+ data = pd.read_parquet(path)
+ return predictor.predict(data)
+
+ inference_times = measure_inference_times(infer, dataset.inference_subsample_files(fmt="parquet"))
+
save_predictions(dataset=dataset,
output_file=config.output_predictions_file,
probabilities=probabilities,
@@ -39,5 +47,6 @@ def run(dataset: Dataset, config: TaskConfig):
return dict(
models_count=1,
training_duration=training.duration,
- predict_duration=predict.duration
+ predict_duration=predict.duration,
+ inference_times=inference_times,
)
diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py
index c596e01c5..3c13d144a 100644
--- a/frameworks/shared/callee.py
+++ b/frameworks/shared/callee.py
@@ -1,11 +1,14 @@
import logging
import os
+import pathlib
import re
import signal
import sys
+from collections import defaultdict
+from typing import Callable, Any, Tuple
from .utils import InterruptTimeout, Namespace as ns, json_dump, json_loads, kill_proc_tree, touch
-from .utils import deserialize_data, serialize_data
+from .utils import deserialize_data, serialize_data, Timer
log = logging.getLogger(__name__)
@@ -86,3 +89,12 @@ def load_data(name, path, **_):
kill_proc_tree(include_parent=False, timeout=5)
json_dump(res, config.result_file, style='compact')
+
+def measure_inference_times(predict_fn: Callable[[str], Any], files: list[Tuple[int, str]]) -> dict[int, list[float]]:
+ inference_times = defaultdict(list)
+ for subsample_size, subsample_path in files:
+ for _ in range(10):
+ with Timer() as predict:
+ predict_fn(subsample_path)
+ inference_times[subsample_size].append(predict.duration)
+ return inference_times
\ No newline at end of file
From 94ba496420128cfc8dc595244735fb2498ecfab9 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 10:43:45 +0300
Subject: [PATCH 03/39] Store all measured inference times to disk
---
frameworks/shared/callee.py | 5 +++++
frameworks/shared/caller.py | 9 +++++++++
2 files changed, 14 insertions(+)
diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py
index 3c13d144a..ca807024c 100644
--- a/frameworks/shared/callee.py
+++ b/frameworks/shared/callee.py
@@ -88,6 +88,11 @@ def load_data(name, path, **_):
# ensure there's no subprocess left
kill_proc_tree(include_parent=False, timeout=5)
+ inference_measurements = res.get("others", {}).get("inference_times")
+ if inference_measurements:
+ inference_file = pathlib.Path(config.result_file).parent / "inference_times.json"
+ json_dump(inference_measurements, inference_file, style="compact")
+ res["others"]["inference_times"] = str(inference_file)
json_dump(res, config.result_file, style='compact')
def measure_inference_times(predict_fn: Callable[[str], Any], files: list[Tuple[int, str]]) -> dict[int, list[float]]:
diff --git a/frameworks/shared/caller.py b/frameworks/shared/caller.py
index c833b994b..8422b98d4 100644
--- a/frameworks/shared/caller.py
+++ b/frameworks/shared/caller.py
@@ -1,6 +1,7 @@
import gc
import logging
import os
+import pathlib
import re
from tempfile import TemporaryDirectory, mktemp
from typing import List, Optional, Union
@@ -11,6 +12,7 @@
from amlb.data import Dataset
from amlb.resources import config as rconfig
from amlb.results import NoResultError, save_predictions
+from amlb.utils import json_dump, Namespace
from .utils import Namespace as ns, Timer, dir_of, run_cmd, json_dumps, json_load, profile
from .utils import is_serializable_data, deserialize_data, serialize_data
@@ -152,6 +154,13 @@ def run_in_venv(caller_file, script_file: str, *args,
for name in ['predictions', 'truth', 'probabilities', 'optional_columns']:
res[name] = deserialize_data(res[name], config=ser_config) if res[name] is not None else None
+ inference_filepath = Namespace.dict(res.others).get("inference_times")
+ if inference_filepath:
+ inference_times = json_load(inference_filepath)
+ inference_filepath = pathlib.Path(res.output_file).parent / "inference.json"
+ json_dump(inference_times, inference_filepath)
+ res["others"]["inference_times"] = inference_times
+
if callable(process_results):
res = process_results(res)
From 0c7bf7d91938006028e708291f8502eaa6c5ee9d Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 10:46:42 +0300
Subject: [PATCH 04/39] Also accept a dataframe to allow to infer without disk
load
---
frameworks/AutoGluon/exec.py | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py
index 2a50ecf37..53f8a42f0 100644
--- a/frameworks/AutoGluon/exec.py
+++ b/frameworks/AutoGluon/exec.py
@@ -4,6 +4,8 @@
import warnings
import sys
import tempfile
+from typing import Union
+
warnings.simplefilter("ignore")
if sys.platform == 'darwin':
@@ -65,26 +67,24 @@ def run(dataset, config):
**training_params
)
- test_data = TabularDataset(test_path)
# Persist model in memory that is going to be predicting to get correct inference latency
predictor.persist_models('best')
- def inference_time_classification(path: str):
- data = TabularDataset(path)
+ def inference_time_classification(data: Union[str, pd.DataFrame]):
return None, predictor.predict_proba(data, as_multiclass=True)
- def inference_time_regression(path: str):
- data = TabularDataset(path)
+ def inference_time_regression(data: Union[str, pd.DataFrame]):
return predictor.predict(data, as_pandas=False), None
infer = inference_time_classification if is_classification else inference_time_regression
+ inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
+
+ test_data = TabularDataset(test_path)
with Timer() as predict:
predictions, probabilities = infer(test_data)
if is_classification:
predictions = probabilities.idxmax(axis=1).to_numpy()
- inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
-
prob_labels = probabilities.columns.values.astype(str).tolist() if probabilities is not None else None
_leaderboard_extra_info = config.framework_params.get('_leaderboard_extra_info', False) # whether to get extra model info (very verbose)
From df46e1b63cee2d8ca3f9ef88081e06940391f2e5 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 12:20:43 +0300
Subject: [PATCH 05/39] Make repeats and batch sizes configurable
---
amlb/datasets/openml.py | 3 ++-
frameworks/shared/callee.py | 7 +++----
resources/config.yaml | 5 +++++
3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py
index 445cf46c0..bccbd6318 100644
--- a/amlb/datasets/openml.py
+++ b/amlb/datasets/openml.py
@@ -96,7 +96,8 @@ def test(self):
def inference_subsample_files(self, fmt: str) -> list[Tuple[int, str]]:
return [
(n, str(self._inference_subsample(fmt=fmt, n=n)))
- for n in [1, 1000, 10_000]
+ for n in rconfig().inference_time_measurements.batch_sizes
+ for _ in range(rconfig().inference_time_measurements.repeats)
]
@profile(logger=log)
diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py
index ca807024c..a77be3e18 100644
--- a/frameworks/shared/callee.py
+++ b/frameworks/shared/callee.py
@@ -98,8 +98,7 @@ def load_data(name, path, **_):
def measure_inference_times(predict_fn: Callable[[str], Any], files: list[Tuple[int, str]]) -> dict[int, list[float]]:
inference_times = defaultdict(list)
for subsample_size, subsample_path in files:
- for _ in range(10):
- with Timer() as predict:
- predict_fn(subsample_path)
- inference_times[subsample_size].append(predict.duration)
+ with Timer() as predict:
+ predict_fn(subsample_path)
+ inference_times[subsample_size].append(predict.duration)
return inference_times
\ No newline at end of file
diff --git a/resources/config.yaml b/resources/config.yaml
index 91ff68642..73bc81d64 100644
--- a/resources/config.yaml
+++ b/resources/config.yaml
@@ -84,6 +84,11 @@ results: # configuration namespace for the results.csv file.
global_lock_timeout: 5 # the timeout used to wait for the lock on the global results file.
incremental_save: true # if true save results after each job., otherwise save results only when all jobs are completed.
+inference_time_measurements: # configuration namespace for performing additional inference time measurements on various batch sizes
+ enabled: true
+ batch_sizes: [1, 10, 100, 1000, 10000] # the batch sizes for which inference speed should be measured
+ repeats: 100 # the number of times to repeat the inference measurement for each batch size
+
openml: # configuration namespace for openML.
apikey: c1994bdb7ecb3c6f3c8f3b35f4b47f1f
infer_dtypes: False
From 0621ee24ef6668910b196abef2ba75adb6e3f2b5 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 14:52:07 +0300
Subject: [PATCH 06/39] Forward inference measurement configuration through
task config
---
amlb/benchmark.py | 4 +++-
frameworks/AutoGluon/exec.py | 4 +++-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/amlb/benchmark.py b/amlb/benchmark.py
index d2c87ce6a..dee7c5c14 100644
--- a/amlb/benchmark.py
+++ b/amlb/benchmark.py
@@ -381,7 +381,7 @@ class TaskConfig:
def __init__(self, name, fold, metrics, seed,
max_runtime_seconds, cores, max_mem_size_mb, min_vol_size_mb,
- input_dir, output_dir):
+ input_dir, output_dir, measure_inference_time: bool = False):
self.framework = None
self.framework_params = None
self.framework_version = None
@@ -397,6 +397,7 @@ def __init__(self, name, fold, metrics, seed,
self.input_dir = input_dir
self.output_dir = output_dir
self.output_predictions_file = os.path.join(output_dir, "predictions.csv")
+ self.measure_inference_time = measure_inference_time
self.ext = ns() # used if frameworks require extra config points
def __setattr__(self, name, value):
@@ -477,6 +478,7 @@ def __init__(self, benchmark: Benchmark, task_def, fold):
min_vol_size_mb=task_def.min_vol_size_mb,
input_dir=rconfig().input_dir,
output_dir=benchmark.output_dirs.session,
+ measure_inference_time=rconfig().inference_time_measurements.enabled,
)
# allowing to override some task parameters through command line, e.g.: -Xt.max_runtime_seconds=60
if rconfig()['t'] is not None:
diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py
index 53f8a42f0..79c4d37d6 100644
--- a/frameworks/AutoGluon/exec.py
+++ b/frameworks/AutoGluon/exec.py
@@ -77,7 +77,9 @@ def inference_time_regression(data: Union[str, pd.DataFrame]):
return predictor.predict(data, as_pandas=False), None
infer = inference_time_classification if is_classification else inference_time_regression
- inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
+ inference_times = None
+ if config.measure_inference_time:
+ inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
test_data = TabularDataset(test_path)
with Timer() as predict:
From 1a72ae16531a8d56f59ab71df693dfdf2901971e Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 15:08:54 +0300
Subject: [PATCH 07/39] Randomize samples within the same batch size
---
amlb/datasets/openml.py | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py
index bccbd6318..e926b8e01 100644
--- a/amlb/datasets/openml.py
+++ b/amlb/datasets/openml.py
@@ -94,14 +94,15 @@ def test(self):
return self._test
def inference_subsample_files(self, fmt: str) -> list[Tuple[int, str]]:
+ seed = rget().seed(self.fold)
return [
- (n, str(self._inference_subsample(fmt=fmt, n=n)))
+ (n, str(self._inference_subsample(fmt=fmt, n=n, seed=seed + i)))
for n in rconfig().inference_time_measurements.batch_sizes
- for _ in range(rconfig().inference_time_measurements.repeats)
+ for i, _ in enumerate(range(rconfig().inference_time_measurements.repeats))
]
@profile(logger=log)
- def _inference_subsample(self, fmt: str, n: int) -> pathlib.Path:
+ def _inference_subsample(self, fmt: str, n: int, seed: int = 0) -> pathlib.Path:
""" Write subset of `n` samples from the test split to disk in `fmt` format """
# Just a hack for now, the splitters all work specifically with openml tasks.
# The important thing is that we split to disk and can load it later.
@@ -114,7 +115,7 @@ def _inference_subsample(self, fmt: str, n: int) -> pathlib.Path:
subsample = self._test.X.sample(
n=n,
replace=True,
- random_state=rget().seed(self.fold)
+ random_state=seed,
)
_, test_path = self._get_split_paths()
From 7100391b7df7e4815775f8bca099d12bdc6d3102 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 15:17:04 +0300
Subject: [PATCH 08/39] Rename inference_X_rows column to infer_batch_size_X
---
amlb/results.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/amlb/results.py b/amlb/results.py
index 48bd6e447..3d6cffd8c 100644
--- a/amlb/results.py
+++ b/amlb/results.py
@@ -447,7 +447,7 @@ def compute_score(self, result=None, meta_result=None):
if inference_times := Namespace.get(meta_result, "inference_times"):
for n_samples, measured_times in Namespace.dict(inference_times).items():
- entry[f"inference_{n_samples}_rows"] = np.mean(measured_times)
+ entry[f"infer_batch_size_{n_samples}"] = np.mean(measured_times)
result = self.get_result() if result is None else result
scoring_errors = []
From f76e75550a9a67fc9c57f8167346ae930b379a96 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 15:23:55 +0300
Subject: [PATCH 09/39] Add docstring and move value checking of `fmt`
Moving the value check makes it less error prone if there are changes
in accepted values.
---
amlb/datasets/openml.py | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py
index e926b8e01..1f8adde43 100644
--- a/amlb/datasets/openml.py
+++ b/amlb/datasets/openml.py
@@ -94,6 +94,14 @@ def test(self):
return self._test
def inference_subsample_files(self, fmt: str) -> list[Tuple[int, str]]:
+ """Generates n subsamples of size k from the test dataset in `fmt` data format.
+
+ We measure the inference time of the models for various batch sizes
+ (number of rows). We generate config.inference_time_measurements.repeats
+ subsamples for each of the config.inference_time_measurements.batch_sizes.
+ These subsamples are stored to file in the `fmt` format (parquet, arff, or csv).
+ The function returns a list of tuples of (batch size, file path).
+ """
seed = rget().seed(self.fold)
return [
(n, str(self._inference_subsample(fmt=fmt, n=n, seed=seed + i)))
@@ -106,9 +114,6 @@ def _inference_subsample(self, fmt: str, n: int, seed: int = 0) -> pathlib.Path:
""" Write subset of `n` samples from the test split to disk in `fmt` format """
# Just a hack for now, the splitters all work specifically with openml tasks.
# The important thing is that we split to disk and can load it later.
- if fmt not in ["csv", "arff", "parquet"]:
- msg = f"{fmt=}, but must be one of 'csv', 'arff', or 'parquet'."
- raise ValueError(msg)
# We should consider taking a stratified sample if n is large enough,
# inference time might differ based on class
@@ -131,6 +136,9 @@ def _inference_subsample(self, fmt: str, n: int, seed: int = 0) -> pathlib.Path:
)
elif fmt == "parquet":
subsample.to_parquet(subsample_path)
+ else:
+ msg = f"{fmt=}, but must be one of 'csv', 'arff', or 'parquet'."
+ raise ValueError(msg)
return subsample_path
From bf8cbc750961a526e6968406c487e848e9b99bc0 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 18:02:11 +0300
Subject: [PATCH 10/39] Add inference time measurements for flaml
---
frameworks/flaml/__init__.py | 3 ++-
frameworks/flaml/exec.py | 16 ++++++++++++++--
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/frameworks/flaml/__init__.py b/frameworks/flaml/__init__.py
index c911edf3b..bca1b6893 100644
--- a/frameworks/flaml/__init__.py
+++ b/frameworks/flaml/__init__.py
@@ -17,7 +17,8 @@ def run(dataset, config):
X=dataset.test.X,
y=dataset.test.y
),
- problem_type=dataset.type.name
+ problem_type=dataset.type.name,
+ inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"),
)
options = dict(
serialization=dict(sparse_dataframe_deserialized_format='dense')
diff --git a/frameworks/flaml/exec.py b/frameworks/flaml/exec.py
index a8a5131af..d0acebde1 100644
--- a/frameworks/flaml/exec.py
+++ b/frameworks/flaml/exec.py
@@ -1,9 +1,11 @@
import logging
import os
+import pandas as pd
from flaml import AutoML, __version__
-from frameworks.shared.callee import call_run, result, output_subdir
+from frameworks.shared.callee import call_run, result, output_subdir, \
+ measure_inference_times
from frameworks.shared.utils import Timer
log = logging.getLogger(__name__)
@@ -49,7 +51,16 @@ def run(dataset, config):
n_jobs=n_jobs,
log_file_name= flaml_log_file_name,
time_budget=time_budget, **training_params)
-
+
+ def infer(path: str):
+ data = pd.read_parquet(path)
+ predict_fn = aml.predict_proba if is_classification else aml.predict
+ return predict_fn(data)
+
+ inference_times = None
+ if config.measure_inference_time:
+ inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
+
with Timer() as predict:
predictions = aml.predict(X_test)
probabilities = aml.predict_proba(X_test) if is_classification else None
@@ -65,6 +76,7 @@ def run(dataset, config):
training_duration=training.duration,
predict_duration=predict.duration,
probabilities_labels=labels,
+ inference_times=inference_times,
)
From cb04989cbc638ecbb75dc00fde4b35f943e9f87f Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 21:53:58 +0300
Subject: [PATCH 11/39] Add inference time measurements
---
frameworks/GAMA/__init__.py | 1 +
frameworks/GAMA/exec.py | 21 +++++++++++++++++----
2 files changed, 18 insertions(+), 4 deletions(-)
diff --git a/frameworks/GAMA/__init__.py b/frameworks/GAMA/__init__.py
index f660e2f8f..5476600cb 100644
--- a/frameworks/GAMA/__init__.py
+++ b/frameworks/GAMA/__init__.py
@@ -22,6 +22,7 @@ def run(dataset: Dataset, config: TaskConfig):
X=dataset.test.X,
y=dataset.test.y
),
+ inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"),
)
options = dict(
serialization=dict(sparse_dataframe_deserialized_format='dense')
diff --git a/frameworks/GAMA/exec.py b/frameworks/GAMA/exec.py
index e0880bf34..54498c8ac 100644
--- a/frameworks/GAMA/exec.py
+++ b/frameworks/GAMA/exec.py
@@ -3,6 +3,8 @@
import sys
import tempfile as tmp
+import pandas as pd
+
if sys.platform == 'darwin':
os.environ['OBJC_DISABLE_INITIALIZE_FORK_SAFETY'] = 'YES'
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
@@ -18,7 +20,8 @@
from gama.data_loading import file_to_pandas
from gama import GamaClassifier, GamaRegressor, __version__
-from frameworks.shared.callee import call_run, result, output_subdir
+from frameworks.shared.callee import call_run, result, output_subdir, \
+ measure_inference_times
from frameworks.shared.utils import Timer, touch
@@ -83,12 +86,21 @@ def run(dataset, config):
# data = file_to_pandas(dataset.test.path, encoding='utf-8')
# X_test, y_test = data.loc[:, data.columns != dataset.target], data.loc[:, dataset.target]
+ def infer(path: str):
+ test_data = pd.read_parquet(path)
+ predict_fn = gama_automl.predict_proba if is_classification else gama_automl.predict
+ return predict_fn(test_data)
+
+ inference_times = None
+ if config.measure_inference_time:
+ inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
+
with Timer() as predict_timer:
predictions = gama_automl.predict(X_test)
+
+ probabilities = None
if is_classification:
probabilities = gama_automl.predict_proba(X_test)
- else:
- probabilities = None
return result(
output_file=config.output_predictions_file,
@@ -98,7 +110,8 @@ def run(dataset, config):
target_is_encoded=False,
models_count=len(gama_automl._final_pop),
training_duration=training_timer.duration,
- predict_duration=predict_timer.duration
+ predict_duration=predict_timer.duration,
+ inference_times=inference_times,
)
From 8ea90338c214b19d5d5c4d39b201c87c9034824b Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 22:07:19 +0300
Subject: [PATCH 12/39] Add inference time measurements
---
frameworks/lightautoml/__init__.py | 3 ++-
frameworks/lightautoml/exec.py | 15 ++++++++++++++-
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/frameworks/lightautoml/__init__.py b/frameworks/lightautoml/__init__.py
index 97c09fa0e..fedabacf3 100644
--- a/frameworks/lightautoml/__init__.py
+++ b/frameworks/lightautoml/__init__.py
@@ -21,7 +21,8 @@ def run(dataset: Dataset, config: TaskConfig):
target=dict(
name=dataset.target.name,
),
- problem_type=dataset.type.name
+ problem_type=dataset.type.name,
+ inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"),
)
options = dict(
serialization=dict(sparse_dataframe_deserialized_format='dense')
diff --git a/frameworks/lightautoml/exec.py b/frameworks/lightautoml/exec.py
index aee255902..56cbe87c5 100644
--- a/frameworks/lightautoml/exec.py
+++ b/frameworks/lightautoml/exec.py
@@ -5,13 +5,16 @@
import matplotlib
import numpy as np
+import pandas as pd
+
matplotlib.use("agg") # no need for tk
from lightautoml.tasks import Task
from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML
from lightautoml import __version__
-from frameworks.shared.callee import call_run, result, output_subdir
+from frameworks.shared.callee import call_run, result, output_subdir, \
+ measure_inference_times
from frameworks.shared.utils import Timer
log = logging.getLogger(__name__)
@@ -37,6 +40,15 @@ def run(dataset, config):
with Timer() as training:
automl.fit_predict(train_data=df_train, roles={'target': label})
+ def infer(path: str):
+ batch = pd.read_parquet(path)
+ return automl.predict(batch)
+
+ inference_times = None
+ if config.measure_inference_time:
+ inference_times = measure_inference_times(infer,
+ dataset.inference_subsample_files)
+
X_test, y_test = dataset.test.X, dataset.test.y
log.info("Predicting on the test set...")
with Timer() as predict:
@@ -75,6 +87,7 @@ def run(dataset, config):
predictions=predictions,
training_duration=training.duration,
predict_duration=predict.duration,
+ inference_times=inference_times,
)
From 338fc94e2851d32eee337400a222ec5974f78895 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 22:09:39 +0300
Subject: [PATCH 13/39] Add inference time measurements
---
frameworks/mljarsupervised/__init__.py | 3 ++-
frameworks/mljarsupervised/exec.py | 18 ++++++++++++++++--
2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/frameworks/mljarsupervised/__init__.py b/frameworks/mljarsupervised/__init__.py
index 9bee9f4a5..3cd6003ce 100644
--- a/frameworks/mljarsupervised/__init__.py
+++ b/frameworks/mljarsupervised/__init__.py
@@ -19,7 +19,8 @@ def run(dataset: Dataset, config: TaskConfig):
X=dataset.test.X,
y=dataset.test.y
),
- problem_type=dataset.type.name
+ problem_type=dataset.type.name,
+ inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"),
)
options = dict(
serialization=dict(sparse_dataframe_deserialized_format='dense')
diff --git a/frameworks/mljarsupervised/exec.py b/frameworks/mljarsupervised/exec.py
index 653d9cfd6..3287e5be3 100644
--- a/frameworks/mljarsupervised/exec.py
+++ b/frameworks/mljarsupervised/exec.py
@@ -4,12 +4,15 @@
import numpy as np
import matplotlib
+import pandas as pd
+
matplotlib.use("agg") # no need for tk
import supervised
from supervised.automl import AutoML
-from frameworks.shared.callee import call_run, result, output_subdir
+from frameworks.shared.callee import call_run, result, output_subdir, \
+ measure_inference_times
from frameworks.shared.utils import Timer
log = logging.getLogger(os.path.basename(__file__))
@@ -56,6 +59,16 @@ def run(dataset, config):
with Timer() as training:
automl.fit(X_train, y_train)
+
+ def infer(path: str):
+ batch = pd.read_parquet(path)
+ return automl.predict_all(batch)
+
+ inference_times = None
+ if config.measure_inference_time:
+ inference_times = measure_inference_times(infer,
+ dataset.inference_subsample_files)
+
with Timer() as predict:
preds = automl.predict_all(X_test)
@@ -88,7 +101,8 @@ def run(dataset, config):
probabilities_labels=probabilities_labels,
models_count=len(automl._models),
training_duration=training.duration,
- predict_duration=predict.duration
+ predict_duration=predict.duration,
+ inference_times=inference_times,
)
From 9b024e2a46c63ca849b237c50d47ac181ac33eef Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 22:11:44 +0300
Subject: [PATCH 14/39] Document shortcoming of measuring inference time for
tpot
---
frameworks/TPOT/exec.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/frameworks/TPOT/exec.py b/frameworks/TPOT/exec.py
index a7e0858ed..ff445eae7 100644
--- a/frameworks/TPOT/exec.py
+++ b/frameworks/TPOT/exec.py
@@ -80,7 +80,10 @@ def infer(path):
return tpot.predict(data)
return tpot.predict(data)
- inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
+ inference_times = None
+ if config.measure_inference_time:
+ log.info("TPOT inference time measurements exclude preprocessing time of AMLB.")
+ inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
try:
probabilities = tpot.predict_proba(X_test) if is_classification else None
From 7b3455f8ffb0b04c8b3877378a60c3df9d0b6b2d Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 22:29:38 +0300
Subject: [PATCH 15/39] Add inference time measurement
---
frameworks/autosklearn/__init__.py | 3 ++-
frameworks/autosklearn/exec.py | 21 +++++++++++++++++----
2 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/frameworks/autosklearn/__init__.py b/frameworks/autosklearn/__init__.py
index 3e31b6d64..92e059ef7 100644
--- a/frameworks/autosklearn/__init__.py
+++ b/frameworks/autosklearn/__init__.py
@@ -21,7 +21,8 @@ def run(dataset: Dataset, config: TaskConfig):
X=X_test,
y=y_test
),
- predictors_type=['Numerical' if p.is_numerical() else 'Categorical' for p in dataset.predictors]
+ predictors_type=['Numerical' if p.is_numerical() else 'Categorical' for p in dataset.predictors],
+ inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"),
)
return run_in_venv(__file__, "exec.py",
diff --git a/frameworks/autosklearn/exec.py b/frameworks/autosklearn/exec.py
index 11690e09c..39f301fc5 100644
--- a/frameworks/autosklearn/exec.py
+++ b/frameworks/autosklearn/exec.py
@@ -5,6 +5,8 @@
import tempfile as tmp
import warnings
+import pandas as pd
+
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
@@ -15,7 +17,8 @@
import autosklearn.metrics as metrics
from packaging import version
-from frameworks.shared.callee import call_run, result, output_subdir
+from frameworks.shared.callee import call_run, result, output_subdir, \
+ measure_inference_times
from frameworks.shared.utils import Timer, system_memory_mb, walk_apply, zip_path
log = logging.getLogger(__name__)
@@ -130,10 +133,18 @@ def run(dataset, config):
with Timer() as training:
auto_sklearn.fit(X_train, y_train, feat_type=predictors_type, **fit_extra_params)
+ def infer(path: str):
+ test_data = pd.read_parquet(path)
+ predict_fn = auto_sklearn.predict_proba if is_classification else auto_sklearn.predict
+ return predict_fn(test_data)
+
+ inference_times = None
+ if config.measure_inference_time:
+ inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
+
# Convert output to strings for classification
log.info("Predicting on the test set.")
X_test = dataset.test.X
- y_test = dataset.test.y
with Timer() as predict:
predictions = auto_sklearn.predict(X_test)
probabilities = auto_sklearn.predict_proba(X_test) if is_classification else None
@@ -142,12 +153,14 @@ def run(dataset, config):
return result(output_file=config.output_predictions_file,
predictions=predictions,
- truth=y_test,
+ truth=dataset.test.y,
probabilities=probabilities,
target_is_encoded=is_classification,
models_count=len(auto_sklearn.get_models_with_weights()),
training_duration=training.duration,
- predict_duration=predict.duration)
+ predict_duration=predict.duration,
+ inference_times=inference_times,
+ )
def save_artifacts(estimator, config):
From 573322338ccba61d4605961387a4b65969175632 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 22:30:02 +0300
Subject: [PATCH 16/39] Bump ubuntu base to 22.04
---
amlb/runners/docker.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/amlb/runners/docker.py b/amlb/runners/docker.py
index 27cfedcf3..4dc601d63 100644
--- a/amlb/runners/docker.py
+++ b/amlb/runners/docker.py
@@ -116,7 +116,7 @@ def _upload_image(self, image):
log.info(f"Successfully published docker image {image}.")
def _generate_script(self, custom_commands):
- docker_content = """FROM ubuntu:18.04
+ docker_content = """FROM ubuntu:22.04
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get update
From 25a1bd443ac04096549fcecfb5463fb7534aee54 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 23:02:43 +0300
Subject: [PATCH 17/39] Add inference measurement for dataframe
---
amlb/results.py | 5 +++--
frameworks/AutoGluon/exec.py | 9 +++++++--
frameworks/shared/callee.py | 7 +++++--
3 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/amlb/results.py b/amlb/results.py
index 3d6cffd8c..ab3568430 100644
--- a/amlb/results.py
+++ b/amlb/results.py
@@ -446,8 +446,9 @@ def compute_score(self, result=None, meta_result=None):
entry[m] = meta_result[m] if m in meta_result else nan
if inference_times := Namespace.get(meta_result, "inference_times"):
- for n_samples, measured_times in Namespace.dict(inference_times).items():
- entry[f"infer_batch_size_{n_samples}"] = np.mean(measured_times)
+ for data_type, measurements in Namespace.dict(inference_times).items():
+ for n_samples, measured_times in Namespace.dict(measurements).items():
+ entry[f"infer_batch_size_{data_type}_{n_samples}"] = np.mean(measured_times)
result = self.get_result() if result is None else result
scoring_errors = []
diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py
index 79c4d37d6..1884c7919 100644
--- a/frameworks/AutoGluon/exec.py
+++ b/frameworks/AutoGluon/exec.py
@@ -77,9 +77,14 @@ def inference_time_regression(data: Union[str, pd.DataFrame]):
return predictor.predict(data, as_pandas=False), None
infer = inference_time_classification if is_classification else inference_time_regression
- inference_times = None
+ inference_times = {}
if config.measure_inference_time:
- inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
+ inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
+ test_data = pd.read_parquet(dataset.test.path)
+ inference_times["df"] = measure_inference_times(
+ infer,
+ [(1, test_data.sample(1, random_state=i)) for i in range(100)],
+ )
test_data = TabularDataset(test_path)
with Timer() as predict:
diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py
index a77be3e18..2f94e5262 100644
--- a/frameworks/shared/callee.py
+++ b/frameworks/shared/callee.py
@@ -5,7 +5,9 @@
import signal
import sys
from collections import defaultdict
-from typing import Callable, Any, Tuple
+from typing import Callable, Any, Tuple, Union, TypeVar
+
+import pandas as pd
from .utils import InterruptTimeout, Namespace as ns, json_dump, json_loads, kill_proc_tree, touch
from .utils import deserialize_data, serialize_data, Timer
@@ -95,7 +97,8 @@ def load_data(name, path, **_):
res["others"]["inference_times"] = str(inference_file)
json_dump(res, config.result_file, style='compact')
-def measure_inference_times(predict_fn: Callable[[str], Any], files: list[Tuple[int, str]]) -> dict[int, list[float]]:
+DATA_INPUT = TypeVar("DATA_INPUT", bound=Union[str, pd.DataFrame])
+def measure_inference_times(predict_fn: Callable[[DATA_INPUT], Any], files: list[Tuple[int, DATA_INPUT]]) -> dict[int, list[float]]:
inference_times = defaultdict(list)
for subsample_size, subsample_path in files:
with Timer() as predict:
From a6f7a5213e273fd7d7a8d3c1ffa7bc1b0a1d8a4a Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 23:07:45 +0300
Subject: [PATCH 18/39] Defaults to display median inference time
This mitigates the effect of outliers, such as cold-start runs.
---
amlb/results.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/amlb/results.py b/amlb/results.py
index ab3568430..6e1a5bc60 100644
--- a/amlb/results.py
+++ b/amlb/results.py
@@ -448,7 +448,7 @@ def compute_score(self, result=None, meta_result=None):
if inference_times := Namespace.get(meta_result, "inference_times"):
for data_type, measurements in Namespace.dict(inference_times).items():
for n_samples, measured_times in Namespace.dict(measurements).items():
- entry[f"infer_batch_size_{data_type}_{n_samples}"] = np.mean(measured_times)
+ entry[f"infer_batch_size_{data_type}_{n_samples}"] = np.median(measured_times)
result = self.get_result() if result is None else result
scoring_errors = []
From 94f11f6b12fadb67da936a30e09dce488d573552 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 23:17:19 +0300
Subject: [PATCH 19/39] Add seed to filename
Otherwise only one file is actually kept and used for experiments,
thus not actually mitigating the variances of sampling.
---
amlb/datasets/openml.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py
index 1f8adde43..1a5d0f805 100644
--- a/amlb/datasets/openml.py
+++ b/amlb/datasets/openml.py
@@ -125,7 +125,7 @@ def _inference_subsample(self, fmt: str, n: int, seed: int = 0) -> pathlib.Path:
_, test_path = self._get_split_paths()
test_path = pathlib.Path(test_path)
- subsample_path = test_path.parent / f"{test_path.stem}_{n}.{fmt}"
+ subsample_path = test_path.parent / f"{test_path.stem}_{n}_{seed}.{fmt}"
if fmt == "csv":
subsample.to_csv(subsample_path, header=True, index=False)
elif fmt == "arff":
From 4119d5664e2b48c6b55bae135dc7d5ebc965228d Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 13 Jun 2023 23:41:48 +0300
Subject: [PATCH 20/39] Start with inference measurements (broken)
---
amlb/datasets/openml.py | 9 +++++----
frameworks/H2OAutoML/__init__.py | 13 ++-----------
frameworks/H2OAutoML/exec.py | 18 ++++++++++++++++--
3 files changed, 23 insertions(+), 17 deletions(-)
diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py
index 1a5d0f805..3471fe7eb 100644
--- a/amlb/datasets/openml.py
+++ b/amlb/datasets/openml.py
@@ -93,7 +93,7 @@ def test(self):
self._ensure_split_created()
return self._test
- def inference_subsample_files(self, fmt: str) -> list[Tuple[int, str]]:
+ def inference_subsample_files(self, fmt: str, with_labels: bool = False) -> list[Tuple[int, str]]:
"""Generates n subsamples of size k from the test dataset in `fmt` data format.
We measure the inference time of the models for various batch sizes
@@ -104,20 +104,21 @@ def inference_subsample_files(self, fmt: str) -> list[Tuple[int, str]]:
"""
seed = rget().seed(self.fold)
return [
- (n, str(self._inference_subsample(fmt=fmt, n=n, seed=seed + i)))
+ (n, str(self._inference_subsample(fmt=fmt, n=n, seed=seed + i, with_labels=with_labels)))
for n in rconfig().inference_time_measurements.batch_sizes
for i, _ in enumerate(range(rconfig().inference_time_measurements.repeats))
]
@profile(logger=log)
- def _inference_subsample(self, fmt: str, n: int, seed: int = 0) -> pathlib.Path:
+ def _inference_subsample(self, fmt: str, n: int, seed: int = 0, with_labels: bool = False) -> pathlib.Path:
""" Write subset of `n` samples from the test split to disk in `fmt` format """
# Just a hack for now, the splitters all work specifically with openml tasks.
# The important thing is that we split to disk and can load it later.
# We should consider taking a stratified sample if n is large enough,
# inference time might differ based on class
- subsample = self._test.X.sample(
+ test = self._test.data if with_labels else self._test.X
+ subsample = test.sample(
n=n,
replace=True,
random_state=seed,
diff --git a/frameworks/H2OAutoML/__init__.py b/frameworks/H2OAutoML/__init__.py
index 2b45dc6d3..ce51582ef 100644
--- a/frameworks/H2OAutoML/__init__.py
+++ b/frameworks/H2OAutoML/__init__.py
@@ -8,25 +8,16 @@ def setup(*args, **kwargs):
call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs)
-# def version():
-# from frameworks.shared.caller import run_cmd_in_venv
-# out, err = run_cmd_in_venv(__file__, """{py} -c "from h2o import __version__; print(__version__)" | grep "^\d\." """)
-# if err:
-# raise ValueError(err)
-# return out
-
-
def run(dataset: Dataset, config: TaskConfig):
from frameworks.shared.caller import run_in_venv
-
data = dict(
train=dict(path=dataset.train.path),
test=dict(path=dataset.test.path),
target=dict(index=dataset.target.index),
domains=dict(cardinalities=[0 if f.values is None else len(f.values) for f in dataset.features]),
- format=dataset.train.format
+ format=dataset.train.format,
+ inference_subsample_files=dataset.inference_subsample_files(fmt=dataset.train.format, with_labels=True),
)
-
config.ext.monitoring = rconfig().monitoring
return run_in_venv(__file__, "exec.py",
input_data=data, dataset=dataset, config=config)
diff --git a/frameworks/H2OAutoML/exec.py b/frameworks/H2OAutoML/exec.py
index 026d1d062..5a9e9d7fa 100644
--- a/frameworks/H2OAutoML/exec.py
+++ b/frameworks/H2OAutoML/exec.py
@@ -1,6 +1,8 @@
import contextlib
import logging
import os
+import pathlib
+
import psutil
import re
@@ -10,7 +12,8 @@
import h2o
from h2o.automl import H2OAutoML
-from frameworks.shared.callee import FrameworkError, call_run, output_subdir, result
+from frameworks.shared.callee import FrameworkError, call_run, output_subdir, result, \
+ measure_inference_times
from frameworks.shared.utils import Monitoring, Namespace as ns, Timer, clean_dir, touch, zip_path
log = logging.getLogger(__name__)
@@ -115,6 +118,16 @@ def run(dataset, config):
if not aml.leader:
raise FrameworkError("H2O could not produce any model in the requested time.")
+ def infer(path: str):
+ filename = pathlib.Path(path).name
+ batch = h2o.import_file(path, destination_frame=frame_name(filename, config), **import_kwargs)
+ return aml.predict(batch)
+
+ inference_times = None
+ if config.measure_inference_time:
+ inference_times = measure_inference_times(infer,
+ dataset.inference_subsample_files)
+
with Timer() as predict:
preds = aml.predict(test)
@@ -129,7 +142,8 @@ def run(dataset, config):
probabilities_labels=preds.probabilities_labels,
models_count=len(aml.leaderboard),
training_duration=training.duration,
- predict_duration=predict.duration
+ predict_duration=predict.duration,
+ inference_times=inference_times,
)
finally:
From 0000c63cce54301eae940213c1ca957fdb858c20 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 14 Jun 2023 11:10:18 +0300
Subject: [PATCH 21/39] Add inference measurement on dataframe
---
frameworks/GAMA/exec.py | 21 +++++++++------------
1 file changed, 9 insertions(+), 12 deletions(-)
diff --git a/frameworks/GAMA/exec.py b/frameworks/GAMA/exec.py
index 54498c8ac..12fa75111 100644
--- a/frameworks/GAMA/exec.py
+++ b/frameworks/GAMA/exec.py
@@ -75,27 +75,24 @@ def run(dataset, config):
gama_automl = estimator(**kwargs)
X_train, y_train = dataset.train.X, dataset.train.y
- # data = file_to_pandas(dataset.train.path, encoding='utf-8')
- # X_train, y_train = data.loc[:, data.columns != dataset.target], data.loc[:, dataset.target]
-
with Timer() as training_timer:
gama_automl.fit(X_train, y_train)
log.info('Predicting on the test set.')
- X_test, y_test = dataset.test.X, dataset.test.y
- # data = file_to_pandas(dataset.test.path, encoding='utf-8')
- # X_test, y_test = data.loc[:, data.columns != dataset.target], data.loc[:, dataset.target]
-
- def infer(path: str):
- test_data = pd.read_parquet(path)
+ def infer(data):
+ test_data = pd.read_parquet(data) if isinstance(data, str) else data
predict_fn = gama_automl.predict_proba if is_classification else gama_automl.predict
return predict_fn(test_data)
- inference_times = None
+ inference_times = {}
if config.measure_inference_time:
- inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
-
+ inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
+ inference_times["df"] = measure_inference_times(
+ infer,
+ [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)],
+ )
with Timer() as predict_timer:
+ X_test, y_test = dataset.test.X, dataset.test.y
predictions = gama_automl.predict(X_test)
probabilities = None
From 1d2054a3d7d97938bfcc65746d4d084deff08155 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 14 Jun 2023 11:22:01 +0300
Subject: [PATCH 22/39] Add inference time measurement with dataframes
---
frameworks/flaml/exec.py | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/frameworks/flaml/exec.py b/frameworks/flaml/exec.py
index d0acebde1..510f02b9c 100644
--- a/frameworks/flaml/exec.py
+++ b/frameworks/flaml/exec.py
@@ -1,5 +1,6 @@
import logging
import os
+from typing import Union
import pandas as pd
from flaml import AutoML, __version__
@@ -15,7 +16,6 @@ def run(dataset, config):
log.info(f"\n**** FLAML [v{__version__}] ****\n")
X_train, y_train = dataset.train.X, dataset.train.y.squeeze()
- X_test, y_test = dataset.test.X, dataset.test.y.squeeze()
is_classification = config.type == 'classification'
time_budget = config.max_runtime_seconds
@@ -52,16 +52,21 @@ def run(dataset, config):
log_file_name= flaml_log_file_name,
time_budget=time_budget, **training_params)
- def infer(path: str):
- data = pd.read_parquet(path)
+ def infer(data: Union[str, pd.DataFrame]):
+ data = pd.read_parquet(data) if isinstance(data, str) else data
predict_fn = aml.predict_proba if is_classification else aml.predict
return predict_fn(data)
- inference_times = None
+ inference_times = {}
if config.measure_inference_time:
- inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
+ inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
+ inference_times["df"] = measure_inference_times(
+ infer,
+ [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)],
+ )
with Timer() as predict:
+ X_test, y_test = dataset.test.X, dataset.test.y.squeeze()
predictions = aml.predict(X_test)
probabilities = aml.predict_proba(X_test) if is_classification else None
labels = None
From 357a63fd9087038bc6b3e93e71d1fbd6e4e30fa7 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 14 Jun 2023 11:40:52 +0300
Subject: [PATCH 23/39] Add dataframe inference time measurement
---
frameworks/mljarsupervised/exec.py | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/frameworks/mljarsupervised/exec.py b/frameworks/mljarsupervised/exec.py
index 3287e5be3..66d2c6d64 100644
--- a/frameworks/mljarsupervised/exec.py
+++ b/frameworks/mljarsupervised/exec.py
@@ -1,6 +1,7 @@
import os
import shutil
import logging
+from typing import Union
import numpy as np
import matplotlib
@@ -45,7 +46,6 @@ def run(dataset, config):
}
X_train, y_train = dataset.train.X, dataset.train.y.squeeze()
- X_test, y_test = dataset.test.X, dataset.test.y.squeeze()
automl = AutoML(
results_path=results_path,
@@ -60,16 +60,20 @@ def run(dataset, config):
automl.fit(X_train, y_train)
- def infer(path: str):
- batch = pd.read_parquet(path)
+ def infer(data: Union[str, pd.DataFrame]):
+ batch = pd.read_parquet(data) if isinstance(data, str) else data
return automl.predict_all(batch)
- inference_times = None
+ inference_times = {}
if config.measure_inference_time:
- inference_times = measure_inference_times(infer,
- dataset.inference_subsample_files)
+ inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
+ inference_times["df"] = measure_inference_times(
+ infer,
+ [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)],
+ )
with Timer() as predict:
+ X_test, y_test = dataset.test.X, dataset.test.y.squeeze()
preds = automl.predict_all(X_test)
predictions, probabilities, probabilities_labels = None, None, None
From 2dd1ffc7d26eaf3c800860a17c6b4fb221dcb175 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 14 Jun 2023 11:42:33 +0300
Subject: [PATCH 24/39] Add dataframe inference time measurement
---
frameworks/GAMA/exec.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/frameworks/GAMA/exec.py b/frameworks/GAMA/exec.py
index 12fa75111..d9e89a8e8 100644
--- a/frameworks/GAMA/exec.py
+++ b/frameworks/GAMA/exec.py
@@ -2,6 +2,7 @@
import os
import sys
import tempfile as tmp
+from typing import Union
import pandas as pd
@@ -79,7 +80,7 @@ def run(dataset, config):
gama_automl.fit(X_train, y_train)
log.info('Predicting on the test set.')
- def infer(data):
+ def infer(data: Union[str, pd.DataFrame]):
test_data = pd.read_parquet(data) if isinstance(data, str) else data
predict_fn = gama_automl.predict_proba if is_classification else gama_automl.predict
return predict_fn(test_data)
From 1e5a61dd965768fdb3b79f0709b384e705a6b05c Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 14 Jun 2023 11:58:53 +0300
Subject: [PATCH 25/39] Add dataframe inference measurement (ignores encoding)
---
frameworks/TPOT/exec.py | 27 +++++++++++++++++----------
1 file changed, 17 insertions(+), 10 deletions(-)
diff --git a/frameworks/TPOT/exec.py b/frameworks/TPOT/exec.py
index ff445eae7..56e72609e 100644
--- a/frameworks/TPOT/exec.py
+++ b/frameworks/TPOT/exec.py
@@ -5,6 +5,7 @@
import tempfile as tmp
import pandas as pd
+from numpy.random import default_rng
if sys.platform == 'darwin':
os.environ['OBJC_DISABLE_INITIALIZE_FORK_SAFETY'] = 'YES'
@@ -65,14 +66,8 @@ def run(dataset, config):
with Timer() as training:
tpot.fit(X_train, y_train)
- log.info('Predicting on the test set.')
- X_test = dataset.test.X
- y_test = dataset.test.y
- with Timer() as predict:
- predictions = tpot.predict(X_test)
-
- def infer(path):
- data = pd.read_parquet(path)
+ def infer(data):
+ data = pd.read_parquet(data) if isinstance(data, str) else data
if is_classification:
try:
return tpot.predict_proba(data)
@@ -80,10 +75,22 @@ def infer(path):
return tpot.predict(data)
return tpot.predict(data)
- inference_times = None
+ inference_times = {}
if config.measure_inference_time:
log.info("TPOT inference time measurements exclude preprocessing time of AMLB.")
- inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
+ inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
+ inference_times["df"] = measure_inference_times(
+ infer, [
+ (1, dataset.test.X[default_rng(seed=i).integers(len(dataset.test.X)), :].reshape(1, -1))
+ for i in range(100)
+ ],
+ )
+
+ log.info('Predicting on the test set.')
+ y_test = dataset.test.y
+ with Timer() as predict:
+ X_test = dataset.test.X
+ predictions = tpot.predict(X_test)
try:
probabilities = tpot.predict_proba(X_test) if is_classification else None
From b48c5edc4c91c96cbf2ce74626b74fb2a7da43c1 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 14 Jun 2023 12:04:59 +0300
Subject: [PATCH 26/39] Add dataframe inference measurement
---
frameworks/autosklearn/exec.py | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/frameworks/autosklearn/exec.py b/frameworks/autosklearn/exec.py
index 39f301fc5..e7ca55bab 100644
--- a/frameworks/autosklearn/exec.py
+++ b/frameworks/autosklearn/exec.py
@@ -4,8 +4,10 @@
import shutil
import tempfile as tmp
import warnings
+from typing import Union
import pandas as pd
+from numpy.random import default_rng
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
os.environ['OMP_NUM_THREADS'] = '1'
@@ -133,19 +135,25 @@ def run(dataset, config):
with Timer() as training:
auto_sklearn.fit(X_train, y_train, feat_type=predictors_type, **fit_extra_params)
- def infer(path: str):
- test_data = pd.read_parquet(path)
+ def infer(data: Union[str, pd.DataFrame]):
+ test_data = pd.read_parquet(data) if isinstance(data, str) else data
predict_fn = auto_sklearn.predict_proba if is_classification else auto_sklearn.predict
return predict_fn(test_data)
- inference_times = None
+ inference_times = {}
if config.measure_inference_time:
- inference_times = measure_inference_times(infer, dataset.inference_subsample_files)
+ inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
+ inference_times["df"] = measure_inference_times(
+ infer, [
+ (1, dataset.test.X[default_rng(seed=i).integers(len(dataset.test.X)), :].reshape(1, -1))
+ for i in range(100)
+ ],
+ )
# Convert output to strings for classification
log.info("Predicting on the test set.")
- X_test = dataset.test.X
with Timer() as predict:
+ X_test = dataset.test.X
predictions = auto_sklearn.predict(X_test)
probabilities = auto_sklearn.predict_proba(X_test) if is_classification else None
From 42d38aca074f59672de655e032655da0af3f49b2 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 14 Jun 2023 12:38:19 +0300
Subject: [PATCH 27/39] Add inference time measurements
It seems lightautoml inference is considerably slower than that of
any other framework.
---
frameworks/lightautoml/exec.py | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/frameworks/lightautoml/exec.py b/frameworks/lightautoml/exec.py
index 56cbe87c5..01217abd1 100644
--- a/frameworks/lightautoml/exec.py
+++ b/frameworks/lightautoml/exec.py
@@ -2,6 +2,7 @@
import os
import pickle
import warnings
+from typing import Union
import matplotlib
import numpy as np
@@ -40,18 +41,21 @@ def run(dataset, config):
with Timer() as training:
automl.fit_predict(train_data=df_train, roles={'target': label})
- def infer(path: str):
- batch = pd.read_parquet(path)
+ def infer(data: Union[str, pd.DataFrame]):
+ batch = pd.read_parquet(data) if isinstance(data, str) else data
return automl.predict(batch)
- inference_times = None
+ inference_times = {}
if config.measure_inference_time:
- inference_times = measure_inference_times(infer,
- dataset.inference_subsample_files)
+ inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
+ inference_times["df"] = measure_inference_times(
+ infer,
+ [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)],
+ )
- X_test, y_test = dataset.test.X, dataset.test.y
log.info("Predicting on the test set...")
with Timer() as predict:
+ X_test, y_test = dataset.test.X, dataset.test.y
preds = automl.predict(X_test).data
probabilities_labels = None
From 4a38f6cad3cc66e561cacac38b9dd9944d7180b1 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 14 Jun 2023 13:04:25 +0300
Subject: [PATCH 28/39] Add inference time measurements
---
frameworks/H2OAutoML/exec.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/frameworks/H2OAutoML/exec.py b/frameworks/H2OAutoML/exec.py
index 5a9e9d7fa..f6603c886 100644
--- a/frameworks/H2OAutoML/exec.py
+++ b/frameworks/H2OAutoML/exec.py
@@ -123,10 +123,16 @@ def infer(path: str):
batch = h2o.import_file(path, destination_frame=frame_name(filename, config), **import_kwargs)
return aml.predict(batch)
- inference_times = None
+ inference_times = {}
if config.measure_inference_time:
- inference_times = measure_inference_times(infer,
- dataset.inference_subsample_files)
+ # H2O can't do inference on single row arff:
+ # https://github.com/h2oai/h2o-3/issues/15572
+ without_single_row_files = [
+ (subsample_size, subsample_path)
+ for subsample_size, subsample_path in dataset.inference_subsample_files
+ if subsample_size > 1
+ ]
+ inference_times["file"] = measure_inference_times(infer, without_single_row_files)
with Timer() as predict:
preds = aml.predict(test)
From 6c9ccb36bb5d7ca235064517dcc8cd5d621c6a26 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers
Date: Fri, 16 Jun 2023 10:04:04 +0300
Subject: [PATCH 29/39] Allow newer autosklearn versions to use the pandas data
instead (#534)
---
frameworks/autosklearn/__init__.py | 14 ++++++++------
frameworks/autosklearn/exec.py | 28 ++++++++++++++++++----------
2 files changed, 26 insertions(+), 16 deletions(-)
diff --git a/frameworks/autosklearn/__init__.py b/frameworks/autosklearn/__init__.py
index 92e059ef7..a00a7d833 100644
--- a/frameworks/autosklearn/__init__.py
+++ b/frameworks/autosklearn/__init__.py
@@ -10,16 +10,18 @@ def setup(*args, **kwargs):
def run(dataset: Dataset, config: TaskConfig):
from frameworks.shared.caller import run_in_venv
- X_train, X_test = dataset.train.X_enc, dataset.test.X_enc
- y_train, y_test = unsparsify(dataset.train.y_enc, dataset.test.y_enc)
data = dict(
train=dict(
- X=X_train,
- y=y_train
+ X=dataset.train.X,
+ y=dataset.train.y,
+ X_enc=dataset.train.X_enc,
+ y_enc=unsparsify(dataset.train.y_enc),
),
test=dict(
- X=X_test,
- y=y_test
+ X=dataset.test.X,
+ y=dataset.test.y,
+ X_enc=dataset.test.X_enc,
+ y_enc=unsparsify(dataset.test.y_enc),
),
predictors_type=['Numerical' if p.is_numerical() else 'Categorical' for p in dataset.predictors],
inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"),
diff --git a/frameworks/autosklearn/exec.py b/frameworks/autosklearn/exec.py
index e7ca55bab..6c98fc199 100644
--- a/frameworks/autosklearn/exec.py
+++ b/frameworks/autosklearn/exec.py
@@ -67,8 +67,9 @@ def run(dataset, config):
)
log.info("Environment: %s", os.environ)
- X_train = dataset.train.X
- y_train = dataset.train.y
+ use_pandas = askl_version >= version.parse("0.15")
+ X_train = dataset.train.X if use_pandas else dataset.train.X_enc
+ y_train = dataset.train.y if use_pandas else dataset.train.y_enc
predictors_type = dataset.predictors_type
log.debug("predictors_type=%s", predictors_type)
@@ -123,6 +124,10 @@ def run(dataset, config):
else:
fit_extra_params['metric'] = perf_metric
+ if not use_pandas:
+ fit_extra_params["feat_type"] = predictors_type
+
+
constr_params["time_left_for_this_task"] = config.max_runtime_seconds
constr_params["n_jobs"] = n_jobs
constr_params["seed"] = config.seed
@@ -133,7 +138,7 @@ def run(dataset, config):
auto_sklearn = estimator(**constr_params, **training_params)
with Timer() as training:
- auto_sklearn.fit(X_train, y_train, feat_type=predictors_type, **fit_extra_params)
+ auto_sklearn.fit(X_train, y_train, **fit_extra_params)
def infer(data: Union[str, pd.DataFrame]):
test_data = pd.read_parquet(data) if isinstance(data, str) else data
@@ -143,17 +148,20 @@ def infer(data: Union[str, pd.DataFrame]):
inference_times = {}
if config.measure_inference_time:
inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
+ test_data = dataset.test.X if use_pandas else dataset.test.X_enc
+ def sample_one_test_row(seed: int):
+ if use_pandas:
+ return test_data.sample(1, random_state=seed)
+ return test_data[default_rng(seed=seed).integers(len(test_data)), :]
+
inference_times["df"] = measure_inference_times(
- infer, [
- (1, dataset.test.X[default_rng(seed=i).integers(len(dataset.test.X)), :].reshape(1, -1))
- for i in range(100)
- ],
+ infer, [(1, sample_one_test_row(seed=i)) for i in range(100)],
)
# Convert output to strings for classification
log.info("Predicting on the test set.")
with Timer() as predict:
- X_test = dataset.test.X
+ X_test = dataset.test.X if use_pandas else dataset.test.X_enc
predictions = auto_sklearn.predict(X_test)
probabilities = auto_sklearn.predict_proba(X_test) if is_classification else None
@@ -161,9 +169,9 @@ def infer(data: Union[str, pd.DataFrame]):
return result(output_file=config.output_predictions_file,
predictions=predictions,
- truth=dataset.test.y,
+ truth=dataset.test.y if use_pandas else dataset.test.y_enc,
probabilities=probabilities,
- target_is_encoded=is_classification,
+ target_is_encoded=is_classification and not use_pandas,
models_count=len(auto_sklearn.get_models_with_weights()),
training_duration=training.duration,
predict_duration=predict.duration,
From b24c12c801698b22dd583fe771b9dc2f2fc714e1 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 16 Jun 2023 15:45:33 +0300
Subject: [PATCH 30/39] Add single row file inference for H2O
---
frameworks/H2OAutoML/exec.py | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/frameworks/H2OAutoML/exec.py b/frameworks/H2OAutoML/exec.py
index f6603c886..d0288903a 100644
--- a/frameworks/H2OAutoML/exec.py
+++ b/frameworks/H2OAutoML/exec.py
@@ -120,19 +120,14 @@ def run(dataset, config):
def infer(path: str):
filename = pathlib.Path(path).name
- batch = h2o.import_file(path, destination_frame=frame_name(filename, config), **import_kwargs)
+ # H2O can't do inference on single row arff, it needs columns explicitly:
+ # https://github.com/h2oai/h2o-3/issues/15572
+ batch = h2o.import_file(path, col_names=train.names, destination_frame=frame_name(filename, config), **import_kwargs)
return aml.predict(batch)
inference_times = {}
if config.measure_inference_time:
- # H2O can't do inference on single row arff:
- # https://github.com/h2oai/h2o-3/issues/15572
- without_single_row_files = [
- (subsample_size, subsample_path)
- for subsample_size, subsample_path in dataset.inference_subsample_files
- if subsample_size > 1
- ]
- inference_times["file"] = measure_inference_times(infer, without_single_row_files)
+ inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
with Timer() as predict:
preds = aml.predict(test)
From d5da2bd63cfae3d066d2d8f76223125667ed3bbd Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 16 Jun 2023 15:47:19 +0300
Subject: [PATCH 31/39] Update inference measurement to record its from file
---
frameworks/constantpredictor/exec.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/frameworks/constantpredictor/exec.py b/frameworks/constantpredictor/exec.py
index 4a2c7cf9d..332de6592 100644
--- a/frameworks/constantpredictor/exec.py
+++ b/frameworks/constantpredictor/exec.py
@@ -35,7 +35,8 @@ def infer(path):
data = pd.read_parquet(path)
return predictor.predict(data)
- inference_times = measure_inference_times(infer, dataset.inference_subsample_files(fmt="parquet"))
+ inference_times = {}
+ inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files(fmt="parquet"))
save_predictions(dataset=dataset,
output_file=config.output_predictions_file,
From cdb08284ea20af4b2f6c0a2f39b65d5b61155743 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 16 Jun 2023 19:28:09 +0300
Subject: [PATCH 32/39] Dynamically set type depending on presence of pandas
---
frameworks/shared/callee.py | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py
index 2f94e5262..84fe1c6dc 100644
--- a/frameworks/shared/callee.py
+++ b/frameworks/shared/callee.py
@@ -5,13 +5,12 @@
import signal
import sys
from collections import defaultdict
-from typing import Callable, Any, Tuple, Union, TypeVar
-
-import pandas as pd
+from typing import Callable, Any, Tuple, Union, TypeVar, TYPE_CHECKING
from .utils import InterruptTimeout, Namespace as ns, json_dump, json_loads, kill_proc_tree, touch
from .utils import deserialize_data, serialize_data, Timer
+
log = logging.getLogger(__name__)
@@ -97,7 +96,14 @@ def load_data(name, path, **_):
res["others"]["inference_times"] = str(inference_file)
json_dump(res, config.result_file, style='compact')
-DATA_INPUT = TypeVar("DATA_INPUT", bound=Union[str, pd.DataFrame])
+try:
+ import pandas as pd
+ DATA_TYPES = Union[str, pd.DataFrame]
+except ImportError:
+ DATA_TYPES = str
+
+DATA_INPUT = TypeVar("DATA_INPUT", bound=DATA_TYPES)
+
def measure_inference_times(predict_fn: Callable[[DATA_INPUT], Any], files: list[Tuple[int, DATA_INPUT]]) -> dict[int, list[float]]:
inference_times = defaultdict(list)
for subsample_size, subsample_path in files:
From 4c14dbee45585306eb37407a987cc979bfa0aa16 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 16 Jun 2023 19:43:34 +0300
Subject: [PATCH 33/39] Add time to job timeout if inference measurements
enabled
---
amlb/benchmark.py | 7 +++++--
resources/config.yaml | 1 +
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/amlb/benchmark.py b/amlb/benchmark.py
index dee7c5c14..2f913a8df 100644
--- a/amlb/benchmark.py
+++ b/amlb/benchmark.py
@@ -404,8 +404,11 @@ def __setattr__(self, name, value):
if name == 'metrics':
self.metric = value[0] if isinstance(value, list) else value
elif name == 'max_runtime_seconds':
- self.job_timeout_seconds = min(value * 2,
- value + rconfig().benchmarks.overhead_time_seconds)
+ inference_time_extension = 0
+ if rconfig().inference_time_measurements.enabled:
+ inference_time_extension = rconfig().inference_time_measurements.additional_job_time
+ self.job_timeout_seconds = min(value * 2 + inference_time_extension,
+ value + rconfig().benchmarks.overhead_time_seconds + inference_time_extension)
super().__setattr__(name, value)
def __json__(self):
diff --git a/resources/config.yaml b/resources/config.yaml
index 73bc81d64..5fbbd968b 100644
--- a/resources/config.yaml
+++ b/resources/config.yaml
@@ -88,6 +88,7 @@ inference_time_measurements: # configuration namespace for performing additiona
enabled: true
batch_sizes: [1, 10, 100, 1000, 10000] # the batch sizes for which inference speed should be measured
repeats: 100 # the number of times to repeat the inference measurement for each batch size
+ additional_job_time: 300 # the time in seconds that will be added to the maximum job time if inference time is measured
openml: # configuration namespace for openML.
apikey: c1994bdb7ecb3c6f3c8f3b35f4b47f1f
From acae403c5f7842ffd5fd73f17fbcb1632928e699 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 16 Jun 2023 20:56:41 +0300
Subject: [PATCH 34/39] Disable inference time measurements for CI
---
.github/workflows/run_all_frameworks.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/run_all_frameworks.yml b/.github/workflows/run_all_frameworks.yml
index b11ce4629..ed772acab 100644
--- a/.github/workflows/run_all_frameworks.yml
+++ b/.github/workflows/run_all_frameworks.yml
@@ -156,6 +156,6 @@ jobs:
- name: Run ${{ matrix.framework }} on ${{ matrix.task }}
run: |
source venv/bin/activate
- python runbenchmark.py ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e
+ python runbenchmark.py ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e --Xinference_time_measurements.enabled=False
env:
GITHUB_PAT: ${{ secrets.PUBLIC_ACCESS_GITHUB_PAT }}
From 8caf62918a1b807b40ba8392394c5fdb517cc9ef Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sat, 17 Jun 2023 16:10:17 +0200
Subject: [PATCH 35/39] Remove one dash
---
.github/workflows/run_all_frameworks.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/run_all_frameworks.yml b/.github/workflows/run_all_frameworks.yml
index ed772acab..15750019a 100644
--- a/.github/workflows/run_all_frameworks.yml
+++ b/.github/workflows/run_all_frameworks.yml
@@ -156,6 +156,6 @@ jobs:
- name: Run ${{ matrix.framework }} on ${{ matrix.task }}
run: |
source venv/bin/activate
- python runbenchmark.py ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e --Xinference_time_measurements.enabled=False
+ python runbenchmark.py ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e -Xinference_time_measurements.enabled=False
env:
GITHUB_PAT: ${{ secrets.PUBLIC_ACCESS_GITHUB_PAT }}
From f0cbfc055b3b489640bbb516b83148cb0ed11f60 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sat, 17 Jun 2023 16:30:49 +0200
Subject: [PATCH 36/39] Disable inference time measurement by default
---
resources/config.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/resources/config.yaml b/resources/config.yaml
index 5fbbd968b..e0d526a7a 100644
--- a/resources/config.yaml
+++ b/resources/config.yaml
@@ -85,7 +85,7 @@ results: # configuration namespace for the results.csv file.
incremental_save: true # if true save results after each job., otherwise save results only when all jobs are completed.
inference_time_measurements: # configuration namespace for performing additional inference time measurements on various batch sizes
- enabled: true
+ enabled: false
batch_sizes: [1, 10, 100, 1000, 10000] # the batch sizes for which inference speed should be measured
repeats: 100 # the number of times to repeat the inference measurement for each batch size
additional_job_time: 300 # the time in seconds that will be added to the maximum job time if inference time is measured
From b2c9b385fe58568e09bc254826479a83cf23efdd Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sat, 17 Jun 2023 16:36:24 +0200
Subject: [PATCH 37/39] Make measuring inference time optional, also measure
single row df
---
frameworks/constantpredictor/exec.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/frameworks/constantpredictor/exec.py b/frameworks/constantpredictor/exec.py
index 332de6592..049e19b76 100644
--- a/frameworks/constantpredictor/exec.py
+++ b/frameworks/constantpredictor/exec.py
@@ -31,12 +31,18 @@ def run(dataset: Dataset, config: TaskConfig):
predictions = predictor.predict(X_test)
probabilities = predictor.predict_proba(X_test) if is_classification else None
- def infer(path):
- data = pd.read_parquet(path)
+ def infer(data):
+ data = pd.read_parquet(data) if isinstance(data, str) else data
return predictor.predict(data)
inference_times = {}
- inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files(fmt="parquet"))
+ if config.measure_inference_time:
+ inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files(fmt="parquet"))
+ test_data = X_test if isinstance(X_test, pd.DataFrame) else pd.DataFrame(X_test)
+ inference_times["df"] = measure_inference_times(
+ infer,
+ [(1, test_data.sample(1, random_state=i)) for i in range(100)],
+ )
save_predictions(dataset=dataset,
output_file=config.output_predictions_file,
From 7a3f433674666450331c8b860349f9425ba614bc Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sat, 17 Jun 2023 17:10:55 +0200
Subject: [PATCH 38/39] Add inference time measurement to (T)RF baselines
---
frameworks/RandomForest/__init__.py | 3 ++-
frameworks/RandomForest/exec.py | 20 ++++++++++++++++++--
frameworks/TunedRandomForest/__init__.py | 3 ++-
frameworks/TunedRandomForest/exec.py | 18 ++++++++++++++++--
4 files changed, 38 insertions(+), 6 deletions(-)
diff --git a/frameworks/RandomForest/__init__.py b/frameworks/RandomForest/__init__.py
index a25cbeee7..3de306f59 100644
--- a/frameworks/RandomForest/__init__.py
+++ b/frameworks/RandomForest/__init__.py
@@ -23,7 +23,8 @@ def run(dataset: Dataset, config: TaskConfig):
test=dict(
X=X_test,
y=y_test
- )
+ ),
+ inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"),
)
return run_in_venv(__file__, "exec.py",
diff --git a/frameworks/RandomForest/exec.py b/frameworks/RandomForest/exec.py
index 77bdc99ef..dd23a763d 100644
--- a/frameworks/RandomForest/exec.py
+++ b/frameworks/RandomForest/exec.py
@@ -3,6 +3,8 @@
import tempfile as tmp
from typing import List
+import pandas as pd
+
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
@@ -12,7 +14,7 @@
import sklearn
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from frameworks.shared.callee import call_run, result
+from frameworks.shared.callee import call_run, result, measure_inference_times
from frameworks.shared.utils import Timer
log = logging.getLogger(os.path.basename(__file__))
@@ -86,6 +88,19 @@ def run(dataset, config):
predictions = rf.predict(X_test)
probabilities = rf.predict_proba(X_test) if is_classification else None
+ def infer(data):
+ data = pd.read_parquet(data) if isinstance(data, str) else data
+ return rf.predict(data)
+
+ inference_times = {}
+ if config.measure_inference_time:
+ inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
+ test_data = X_test if isinstance(X_test, pd.DataFrame) else pd.DataFrame(X_test)
+ inference_times["df"] = measure_inference_times(
+ infer,
+ [(1, test_data.sample(1, random_state=i)) for i in range(100)],
+ )
+
return result(output_file=config.output_predictions_file,
predictions=predictions,
truth=y_test,
@@ -93,7 +108,8 @@ def run(dataset, config):
target_is_encoded=encode,
models_count=len(rf),
training_duration=training.duration,
- predict_duration=predict.duration)
+ predict_duration=predict.duration,
+ inference_times=inference_times,)
if __name__ == '__main__':
diff --git a/frameworks/TunedRandomForest/__init__.py b/frameworks/TunedRandomForest/__init__.py
index 561678497..dc0cad908 100644
--- a/frameworks/TunedRandomForest/__init__.py
+++ b/frameworks/TunedRandomForest/__init__.py
@@ -21,7 +21,8 @@ def run(dataset: Dataset, config: TaskConfig):
test=dict(
X=X_test,
y=y_test
- )
+ ),
+ inference_subsample_files=dataset.inference_subsample_files(fmt="parquet"),
)
return run_in_venv(__file__, "exec.py",
diff --git a/frameworks/TunedRandomForest/exec.py b/frameworks/TunedRandomForest/exec.py
index 7c7a7dc15..c724487d0 100644
--- a/frameworks/TunedRandomForest/exec.py
+++ b/frameworks/TunedRandomForest/exec.py
@@ -21,7 +21,7 @@
import sklearn
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from frameworks.shared.callee import call_run, result
+from frameworks.shared.callee import call_run, result, measure_inference_times
from frameworks.shared.utils import Timer
from custom_validate import cross_validate
@@ -211,6 +211,19 @@ def run(dataset, config):
predictions = rf.predict(X_test)
probabilities = rf.predict_proba(X_test) if is_classification else None
+ def infer(data):
+ data = pd.read_parquet(data) if isinstance(data, str) else data
+ return rf.predict(data)
+
+ inference_times = {}
+ if config.measure_inference_time:
+ inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
+ test_data = X_test if isinstance(X_test, pd.DataFrame) else pd.DataFrame(X_test)
+ inference_times["df"] = measure_inference_times(
+ infer,
+ [(1, test_data.sample(1, random_state=i)) for i in range(100)],
+ )
+
return result(
output_file=config.output_predictions_file,
predictions=predictions,
@@ -219,7 +232,8 @@ def run(dataset, config):
target_is_encoded=is_classification,
models_count=len(rf),
training_duration=training.duration,
- predict_duration=predict.duration
+ predict_duration=predict.duration,
+ inference_times=inference_times,
)
From 54a2dd6257fdc9cf1f7097add446affc6930c0fc Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sat, 17 Jun 2023 17:13:35 +0200
Subject: [PATCH 39/39] Remove skip inference measurement override since its
default now
---
.github/workflows/run_all_frameworks.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/run_all_frameworks.yml b/.github/workflows/run_all_frameworks.yml
index 15750019a..b11ce4629 100644
--- a/.github/workflows/run_all_frameworks.yml
+++ b/.github/workflows/run_all_frameworks.yml
@@ -156,6 +156,6 @@ jobs:
- name: Run ${{ matrix.framework }} on ${{ matrix.task }}
run: |
source venv/bin/activate
- python runbenchmark.py ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e -Xinference_time_measurements.enabled=False
+ python runbenchmark.py ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e
env:
GITHUB_PAT: ${{ secrets.PUBLIC_ACCESS_GITHUB_PAT }}