From 4029472948c50239029affcceceb70832c718691 Mon Sep 17 00:00:00 2001 From: Nick Erickson Date: Mon, 10 Oct 2022 09:01:11 -0700 Subject: [PATCH] AutoGluon TimeSeries Support (first version) (#494) * Add AutoGluon TimeSeries Prototype * AutoMLBenchmark TimeSeries Prototype. (#6) * fixed loading test & train, changed pred.-l. 5->30 * ignore launch.json of vscode * ensuring timestamp parsing * pass config, save pred, add results * remove unused code * add readability, remove slice from timer * ensure autogluonts has required info * add comments for readability * setting defaults for timeseries task * remove outer context manipulation * corrected spelling error for quantiles * adding mape, correct available metrics * beautify config options * fixed config for public access * Update readme * Autogluon timeseries, addressed comments by sebhrusen (#7) * fixed loading test & train, changed pred.-l. 5->30 * ignore launch.json of vscode * ensuring timestamp parsing * pass config, save pred, add results * remove unused code * add readability, remove slice from timer * ensure autogluonts has required info * add comments for readability * setting defaults for timeseries task * remove outer context manipulation * corrected spelling error for quantiles * adding mape, correct available metrics * beautify config options * fixed config for public access * no outer context manipulation, add dataset subdir * add more datasets * include error raising for too large pred. length. * mergin AutoGluonTS framework folder into AutoGluon * renaming ts.yaml to timeseries.yaml, plus ext. * removing presets, correct latest config for AGTS * move dataset timeseries ext to datasets/file.py * dont bypass test mode * move quantiles and y_past_period_error to opt_cols * remove whitespaces * deleting merge artifacts * delete merge artifacts * renaming prediction_length to forecast_range_in_steps * use public dataset, reduced range to maximum * fix format string works * fix key error bug, remove magic time limit * Addressed minor comments, and fixed version call for tabular and timeseries modularities (#8) * fixed loading test & train, changed pred.-l. 5->30 * ignore launch.json of vscode * ensuring timestamp parsing * pass config, save pred, add results * remove unused code * add readability, remove slice from timer * ensure autogluonts has required info * add comments for readability * setting defaults for timeseries task * remove outer context manipulation * corrected spelling error for quantiles * adding mape, correct available metrics * beautify config options * fixed config for public access * no outer context manipulation, add dataset subdir * add more datasets * include error raising for too large pred. length. * mergin AutoGluonTS framework folder into AutoGluon * renaming ts.yaml to timeseries.yaml, plus ext. * removing presets, correct latest config for AGTS * move dataset timeseries ext to datasets/file.py * dont bypass test mode * move quantiles and y_past_period_error to opt_cols * remove whitespaces * deleting merge artifacts * delete merge artifacts * renaming prediction_length to forecast_range_in_steps * use public dataset, reduced range to maximum * fix format string works * fix key error bug, remove magic time limit * swapped timeseries and tabular to set version * make warning message more explicit * remove outer context manipulation * split timeseries / tabular into functions Co-authored-by: Leo --- .gitignore | 1 + amlb/benchmark.py | 12 +- amlb/data.py | 1 + amlb/datasets/file.py | 76 +++++++++--- amlb/datautils.py | 10 +- amlb/results.py | 82 ++++++++++++- frameworks/AutoGluon/README.md | 16 +++ frameworks/AutoGluon/__init__.py | 43 ++++++- frameworks/AutoGluon/exec_ts.py | 172 +++++++++++++++++++++++++++ frameworks/AutoGluon/setup.sh | 9 +- frameworks/shared/callee.py | 3 +- frameworks/shared/caller.py | 3 +- resources/benchmarks/timeseries.yaml | 13 ++ resources/config.yaml | 1 + resources/frameworks.yaml | 21 +++- resources/frameworks_latest.yaml | 12 +- 16 files changed, 437 insertions(+), 38 deletions(-) create mode 100644 frameworks/AutoGluon/README.md create mode 100644 frameworks/AutoGluon/exec_ts.py create mode 100644 resources/benchmarks/timeseries.yaml diff --git a/.gitignore b/.gitignore index 4dba33db1..bc9c76adc 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ venv/ .idea/ *.iml *.swp +launch.json # tmp files .ipynb_checkpoints/ diff --git a/amlb/benchmark.py b/amlb/benchmark.py index b9975efdc..7c54a344c 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -489,7 +489,9 @@ def load_data(self): # TODO raise NotImplementedError("OpenML datasets without task_id are not supported yet.") elif hasattr(self._task_def, 'dataset'): - self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=self._task_def.dataset, fold=self.fold) + dataset_name_and_config = copy(self._task_def.dataset) + dataset_name_and_config.name = self._task_def.name + self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=dataset_name_and_config, fold=self.fold) else: raise ValueError("Tasks should have one property among [openml_task_id, openml_dataset_id, dataset].") @@ -522,7 +524,12 @@ def run(self): predictions_dir=self.benchmark.output_dirs.predictions) framework_def = self.benchmark.framework_def task_config = copy(self.task_config) - task_config.type = 'regression' if self._dataset.type == DatasetType.regression else 'classification' + if self._dataset.type == DatasetType.regression: + task_config.type = 'regression' + elif self._dataset.type == DatasetType.timeseries: + task_config.type = 'timeseries' + else: + task_config.type = 'classification' task_config.type_ = self._dataset.type.name task_config.framework = self.benchmark.framework_name task_config.framework_params = framework_def.params @@ -552,4 +559,3 @@ def run(self): finally: self._dataset.release() return results.compute_score(result=result, meta_result=meta_result) - diff --git a/amlb/data.py b/amlb/data.py index 4e4cea879..acca17841 100644 --- a/amlb/data.py +++ b/amlb/data.py @@ -172,6 +172,7 @@ class DatasetType(Enum): binary = 1 multiclass = 2 regression = 3 + timeseries = 4 class Dataset(ABC): diff --git a/amlb/datasets/file.py b/amlb/datasets/file.py index 6ddca4042..526c131e7 100644 --- a/amlb/datasets/file.py +++ b/amlb/datasets/file.py @@ -16,7 +16,7 @@ from ..utils import Namespace as ns, as_list, lazy_property, list_all_files, memoize, path_from_split, profile, repr_def, split_path from .fileutils import is_archive, is_valid_url, unarchive_file, get_file_handler - +from copy import deepcopy log = logging.getLogger(__name__) @@ -33,7 +33,7 @@ def __init__(self, cache_dir=None): def load(self, dataset, fold=0): dataset = dataset if isinstance(dataset, ns) else ns(path=dataset) log.debug("Loading dataset %s", dataset) - paths = self._extract_train_test_paths(dataset.path if 'path' in dataset else dataset, fold=fold) + paths = self._extract_train_test_paths(dataset.path if 'path' in dataset else dataset, fold=fold, name=dataset['name'] if 'name' in dataset else None) assert fold < len(paths['train']), f"No training dataset available for fold {fold} among dataset files {paths['train']}" # seed = rget().seed(fold) # if len(paths['test']) == 0: @@ -51,21 +51,28 @@ def load(self, dataset, fold=0): if ext == '.arff': return ArffDataset(train_path, test_path, target=target, features=features, type=type_) elif ext == '.csv': - return CsvDataset(train_path, test_path, target=target, features=features, type=type_) + if DatasetType[dataset['type']] == DatasetType.timeseries and dataset['timestamp_column'] is None: + log.warning("Warning: For timeseries task setting undefined timestamp column to `timestamp`.") + dataset = deepcopy(dataset) + dataset['timestamp_column'] = "timestamp" + csv_dataset = CsvDataset(train_path, test_path, target=target, features=features, type=type_, timestamp_column=dataset['timestamp_column'] if 'timestamp_column' in dataset else None) + if csv_dataset.type == DatasetType.timeseries: + csv_dataset = self.extend_dataset_with_timeseries_config(csv_dataset, dataset) + return csv_dataset else: raise ValueError(f"Unsupported file type: {ext}") - def _extract_train_test_paths(self, dataset, fold=None): + def _extract_train_test_paths(self, dataset, fold=None, name=None): if isinstance(dataset, (tuple, list)): assert len(dataset) % 2 == 0, "dataset list must contain an even number of paths: [train_0, test_0, train_1, test_1, ...]." return self._extract_train_test_paths(ns(train=[p for i, p in enumerate(dataset) if i % 2 == 0], test=[p for i, p in enumerate(dataset) if i % 2 == 1]), - fold=fold) + fold=fold, name=name) elif isinstance(dataset, ns): - return dict(train=[self._extract_train_test_paths(p)['train'][0] + return dict(train=[self._extract_train_test_paths(p, name=name)['train'][0] if i == fold else None for i, p in enumerate(as_list(dataset.train))], - test=[self._extract_train_test_paths(p)['train'][0] + test=[self._extract_train_test_paths(p, name=name)['train'][0] if i == fold else None for i, p in enumerate(as_list(dataset.test))]) else: @@ -116,7 +123,10 @@ def _extract_train_test_paths(self, dataset, fold=None): assert len(paths) > 0, f"No dataset file found in {dataset}: they should follow the naming xxxx_train.ext, xxxx_test.ext or xxxx_train_0.ext, xxxx_test_0.ext, xxxx_train_1.ext, ..." return paths elif is_valid_url(dataset): - cached_file = os.path.join(self._cache_dir, os.path.basename(dataset)) + if name is None: + cached_file = os.path.join(self._cache_dir, os.path.basename(dataset)) + else: + cached_file = os.path.join(self._cache_dir, name, os.path.basename(dataset)) if not os.path.exists(cached_file): # don't download if previously done handler = get_file_handler(dataset) assert handler.exists(dataset), f"Invalid path/url: {dataset}" @@ -129,6 +139,40 @@ def __repr__(self): return repr_def(self) + def extend_dataset_with_timeseries_config(self, dataset, dataset_config): + dataset = deepcopy(dataset) + dataset_config = deepcopy(dataset_config) + if dataset_config['id_column'] is None: + log.warning("Warning: For timeseries task setting undefined `id_column` to `item_id`.") + dataset_config['id_column'] = "item_id" + if dataset_config['forecast_range_in_steps'] is None: + log.warning("Warning: For timeseries task setting undefined `forecast_range_in_steps` to `1`.") + dataset_config['forecast_range_in_steps'] = "1" + + dataset.timestamp_column=dataset_config['timestamp_column'] + dataset.id_column=dataset_config['id_column'] + dataset.forecast_range_in_steps=int(dataset_config['forecast_range_in_steps']) + + train_seqs_lengths = dataset.train.X.groupby(dataset.id_column).count() + test_seqs_lengths = dataset.test.X.groupby(dataset.id_column).count() + forecast_range_in_steps_mean_diff_train_test = int((test_seqs_lengths - train_seqs_lengths).mean()) + forecast_range_in_steps_max_min_train_test = int(min(int(test_seqs_lengths.min()), int(train_seqs_lengths.min()))) - 1 + if not dataset.forecast_range_in_steps == forecast_range_in_steps_mean_diff_train_test: + msg = f"Warning: Forecast range {dataset.forecast_range_in_steps}, does not equal mean difference between test and train sequence lengths {forecast_range_in_steps_mean_diff_train_test}." + log.warning(msg) + if not (test_seqs_lengths - train_seqs_lengths).var().item() == 0.: + msg = f"Error: Not all sequences of train and test set have same sequence length difference." + raise ValueError(msg) + if dataset.forecast_range_in_steps > forecast_range_in_steps_mean_diff_train_test: + msg = f"Error: Forecast range {dataset.forecast_range_in_steps} longer than difference between test and train sequence lengths {forecast_range_in_steps_mean_diff_train_test}." + raise ValueError(msg) + if dataset.forecast_range_in_steps > forecast_range_in_steps_max_min_train_test: + msg = f"Error: Forecast range {dataset.forecast_range_in_steps} longer than minimum sequence length + 1, {forecast_range_in_steps_max_min_train_test}." + raise ValueError(msg) + return dataset + + + class FileDataset(Dataset): def __init__(self, train: Datasplit, test: Datasplit, @@ -302,25 +346,26 @@ def release(self, properties=None): class CsvDataset(FileDataset): def __init__(self, train_path, test_path, - target=None, features=None, type=None): + target=None, features=None, type=None, timestamp_column=None): # todo: handle auto-split (if test_path is None): requires loading the training set, split, save super().__init__(None, None, target=target, features=features, type=type) - self._train = CsvDatasplit(self, train_path) - self._test = CsvDatasplit(self, test_path) + self._train = CsvDatasplit(self, train_path, timestamp_column=timestamp_column) + self._test = CsvDatasplit(self, test_path, timestamp_column=timestamp_column) self._dtypes = None class CsvDatasplit(FileDatasplit): - def __init__(self, dataset, path): + def __init__(self, dataset, path, timestamp_column=None): super().__init__(dataset, format='csv', path=path) self._ds = None + self.timestamp_column = timestamp_column def _ensure_loaded(self): if self._ds is None: if self.dataset._dtypes is None: - df = read_csv(self.path) + df = read_csv(self.path, timestamp_column=self.timestamp_column) # df = df.convert_dtypes() dt_conversions = {name: 'category' for name, dtype in zip(df.dtypes.index, df.dtypes.values) @@ -336,8 +381,9 @@ def _ensure_loaded(self): self._ds = df self.dataset._dtypes = self._ds.dtypes + else: - self._ds = read_csv(self.path, dtype=self.dataset._dtypes.to_dict()) + self._ds = read_csv(self.path, dtype=self.dataset._dtypes.to_dict(), timestamp_column=self.timestamp_column) @profile(logger=log) def load_metadata(self): @@ -348,7 +394,7 @@ def load_metadata(self): else 'number' if pat.is_numeric_dtype(dt) else 'category' if pat.is_categorical_dtype(dt) else 'string' if pat.is_string_dtype(dt) - # else 'datetime' if pat.is_datetime64_dtype(dt) + else 'datetime' if pat.is_datetime64_dtype(dt) else 'object') features = [Feature(i, col, to_feature_type(dtypes[i])) for i, col in enumerate(self._ds.columns)] diff --git a/amlb/datautils.py b/amlb/datautils.py index f3eeeb2a5..7946fdc1c 100644 --- a/amlb/datautils.py +++ b/amlb/datautils.py @@ -26,7 +26,7 @@ log = logging.getLogger(__name__) -def read_csv(path, nrows=None, header=True, index=False, as_data_frame=True, dtype=None): +def read_csv(path, nrows=None, header=True, index=False, as_data_frame=True, dtype=None, timestamp_column=None): """ read csv file to DataFrame. @@ -37,13 +37,19 @@ def read_csv(path, nrows=None, header=True, index=False, as_data_frame=True, dty :param header: if the columns header should be read. :param as_data_frame: if the result should be returned as a data frame (default) or a numpy array. :param dtype: data type for columns. + :param timestamp_column: column name for timestamp, to ensure dates are correctly parsed by pandas. :return: a DataFrame """ + if dtype is not None and timestamp_column is not None and timestamp_column in dtype: + dtype = dtype.copy() # to avoid outer context manipulation + del dtype[timestamp_column] + df = pd.read_csv(path, nrows=nrows, header=0 if header else None, index_col=0 if index else None, - dtype=dtype) + dtype=dtype, + parse_dates=[timestamp_column] if timestamp_column is not None else None) return df if as_data_frame else df.values diff --git a/amlb/results.py b/amlb/results.py index 2f547b4ec..3887203f6 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -228,12 +228,17 @@ def load_predictions(predictions_file): try: df = read_csv(predictions_file, dtype=object) log.debug("Predictions preview:\n %s\n", df.head(10).to_string()) + if rconfig().test_mode: TaskResult.validate_predictions(df) - if df.shape[1] > 2: - return ClassificationResult(df) + + if 'y_past_period_error' in df.columns: + return TimeSeriesResult(df) else: - return RegressionResult(df) + if df.shape[1] > 2: + return ClassificationResult(df) + else: + return RegressionResult(df) except Exception as e: return ErrorResult(ResultError(e)) else: @@ -254,6 +259,7 @@ def load_metadata(metadata_file): def save_predictions(dataset: Dataset, output_file: str, predictions: Union[A, DF, S] = None, truth: Union[A, DF, S] = None, probabilities: Union[A, DF] = None, probabilities_labels: Union[list, A] = None, + optional_columns: Union[A, DF] = None, target_is_encoded: bool = False, preview: bool = True): """ Save class probabilities and predicted labels to file in csv format. @@ -264,6 +270,7 @@ def save_predictions(dataset: Dataset, output_file: str, :param predictions: :param truth: :param probabilities_labels: + :param optional_columns: :param target_is_encoded: :param preview: :return: None @@ -308,6 +315,10 @@ def save_predictions(dataset: Dataset, output_file: str, df = df.assign(predictions=preds) df = df.assign(truth=truth) + + if optional_columns is not None: + df = pd.concat([df, optional_columns], axis=1) + if preview: log.info("Predictions preview:\n %s\n", df.head(20).to_string()) backup_file(output_file) @@ -656,6 +667,71 @@ def r2(self): """R^2""" return float(r2_score(self.truth, self.predictions)) +class TimeSeriesResult(RegressionResult): + + def __init__(self, predictions_df, info=None): + super().__init__(predictions_df, info) + self.truth = self.df['truth'].values if self.df is not None else None #.iloc[:, 1].values if self.df is not None else None + self.predictions = self.df['predictions'].values if self.df is not None else None #.iloc[:, -2].values if self.df is not None else None + self.y_past_period_error = self.df['y_past_period_error'].values + self.quantiles = self.df.iloc[:, 2:-1].values + self.quantiles_probs = np.array([float(q) for q in self.df.columns[2:-1]]) + self.truth = self.truth.astype(float, copy=False) + self.predictions = self.predictions.astype(float, copy=False) + self.quantiles = self.quantiles.astype(float, copy=False) + self.y_past_period_error = self.y_past_period_error.astype(float, copy=False) + + self.target = Feature(0, 'target', 'real', is_target=True) + self.type = DatasetType.timeseries + + @metric(higher_is_better=False) + def mase(self): + """Mean Absolute Scaled Error""" + return float(np.nanmean(np.abs(self.truth/self.y_past_period_error - self.predictions/self.y_past_period_error))) + + @metric(higher_is_better=False) + def smape(self): + """Symmetric Mean Absolute Percentage Error""" + num = np.abs(self.truth - self.predictions) + denom = (np.abs(self.truth) + np.abs(self.predictions)) / 2 + # If the denominator is 0, we set it to float('inf') such that any division yields 0 (this + # might not be fully mathematically correct, but at least we don't get NaNs) + denom[denom == 0] = math.inf + return np.mean(num / denom) + + @metric(higher_is_better=False) + def mape(self): + """Symmetric Mean Absolute Percentage Error""" + num = np.abs(self.truth - self.predictions) + denom = np.abs(self.truth) + # If the denominator is 0, we set it to float('inf') such that any division yields 0 (this + # might not be fully mathematically correct, but at least we don't get NaNs) + denom[denom == 0] = math.inf + return np.mean(num / denom) + + @metric(higher_is_better=False) + def nrmse(self): + """Normalized Root Mean Square Error""" + return self.rmse() / np.mean(np.abs(self.truth)) + + @metric(higher_is_better=False) + def wape(self): + """Weighted Average Percentage Error""" + return np.sum(np.abs(self.truth - self.predictions)) / np.sum(np.abs(self.truth)) + + @metric(higher_is_better=False) + def ncrps(self): + """Normalized Continuous Ranked Probability Score""" + quantile_losses = 2 * np.sum( + np.abs( + (self.quantiles - self.truth[:, None]) + * ((self.quantiles >= self.truth[:, None]) - self.quantiles_probs[None, :]) + ), + axis=0, + ) + denom = np.sum(np.abs(self.truth)) # shape [num_time_series, num_quantiles] + weighted_losses = quantile_losses.sum(0) / denom # shape [num_quantiles] + return weighted_losses.mean() _encode_predictions_and_truth_ = False diff --git a/frameworks/AutoGluon/README.md b/frameworks/AutoGluon/README.md new file mode 100644 index 000000000..51286533e --- /dev/null +++ b/frameworks/AutoGluon/README.md @@ -0,0 +1,16 @@ +# AutoGluon + +To run v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluon ...``` + +To run mainline: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest ...``` + + +# AutoGluonTS + +AutoGluonTS stands for autogluon.timeseries. This framework handles time series problems. + +## Run Steps + +To run v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluonts timeseries ...``` + +To run mainline: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest timeseries ...``` diff --git a/frameworks/AutoGluon/__init__.py b/frameworks/AutoGluon/__init__.py index be2c15147..c8694148c 100644 --- a/frameworks/AutoGluon/__init__.py +++ b/frameworks/AutoGluon/__init__.py @@ -1,15 +1,23 @@ -from amlb.benchmark import TaskConfig -from amlb.data import Dataset + from amlb.utils import call_script_in_same_dir +from amlb.benchmark import TaskConfig +from amlb.data import Dataset, DatasetType +from copy import deepcopy def setup(*args, **kwargs): call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs) - def run(dataset: Dataset, config: TaskConfig): - from frameworks.shared.caller import run_in_venv + if dataset.type is not DatasetType.timeseries: + return run_autogluon_tabular(dataset, config) + + else: + return run_autogluon_timeseries(dataset, config) + +def run_autogluon_tabular(dataset: Dataset, config: TaskConfig): + from frameworks.shared.caller import run_in_venv data = dict( train=dict(path=dataset.train.data_path('parquet')), test=dict(path=dataset.test.data_path('parquet')), @@ -23,3 +31,30 @@ def run(dataset: Dataset, config: TaskConfig): return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config) +def run_autogluon_timeseries(dataset: Dataset, config: TaskConfig): + from frameworks.shared.caller import run_in_venv + dataset = deepcopy(dataset) + if not hasattr(dataset, 'timestamp_column'): + dataset.timestamp_column = None + if not hasattr(dataset, 'id_column'): + dataset.id_column = None + if not hasattr(dataset, 'forecast_range_in_steps'): + raise AttributeError("Unspecified `forecast_range_in_steps`.") + + data = dict( + # train=dict(path=dataset.train.data_path('parquet')), + # test=dict(path=dataset.test.data_path('parquet')), + train=dict(path=dataset.train.path), + test=dict(path=dataset.test.path), + target=dict( + name=dataset.target.name, + classes=dataset.target.values + ), + problem_type=dataset.type.name, # AutoGluon problem_type is using same names as amlb.data.DatasetType + timestamp_column=dataset.timestamp_column, + id_column=dataset.id_column, + forecast_range_in_steps=dataset.forecast_range_in_steps + ) + + return run_in_venv(__file__, "exec_ts.py", + input_data=data, dataset=dataset, config=config) diff --git a/frameworks/AutoGluon/exec_ts.py b/frameworks/AutoGluon/exec_ts.py new file mode 100644 index 000000000..ab7c4110f --- /dev/null +++ b/frameworks/AutoGluon/exec_ts.py @@ -0,0 +1,172 @@ +import logging +import os +import shutil +import warnings +import sys +import tempfile +import numpy as np +warnings.simplefilter("ignore") + +if sys.platform == 'darwin': + os.environ['OMP_NUM_THREADS'] = '1' + +import pandas as pd + +from autogluon.core.utils.savers import save_pd, save_pkl +from autogluon.tabular import TabularDataset +from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame +from autogluon.timeseries.version import __version__ + +from frameworks.shared.callee import call_run, result, output_subdir +from frameworks.shared.utils import Timer, zip_path + +log = logging.getLogger(__name__) + + +def run(dataset, config): + log.info(f"\n**** AutoGluon TimeSeries [v{__version__}] ****\n") + + timestamp_column = dataset.timestamp_column + id_column = dataset.id_column + prediction_length = dataset.forecast_range_in_steps + + eval_metric = get_eval_metric(config) + label = dataset.target.name + time_limit = config.max_runtime_seconds + + training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')} + + train_data, test_data = load_data(train_path=dataset.train.path, + test_path=dataset.test.path, + timestamp_column=timestamp_column, + id_column=id_column) + test_data_past = test_data.copy().slice_by_timestep(slice(None, -prediction_length)) + + predictor_path = tempfile.mkdtemp() + os.sep + with Timer() as training: + predictor = TimeSeriesPredictor( + target=label, + path=predictor_path, + prediction_length=prediction_length, + eval_metric=eval_metric, + ) + predictor.fit( + train_data=train_data, + time_limit=time_limit, + **training_params, + ) + + with Timer() as predict: + predictions = predictor.predict(test_data_past) + log.info(predictions) + + predictions_only = predictions['mean'].values + test_data_future = test_data.copy().slice_by_timestep(slice(-prediction_length, None)) + truth_only = test_data_future[label].values + + log.info(predictions_only) + log.info(truth_only) + + leaderboard = predictor.leaderboard(test_data, silent=True) + + with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): + log.info(leaderboard) + + num_models_trained = len(leaderboard) + + save_artifacts(predictor=predictor, leaderboard=leaderboard, config=config) + shutil.rmtree(predictor.path, ignore_errors=True) + + quantiles = predictions.drop(columns=['mean']).reset_index(drop=True) + period_length = 1 # TODO: This period length could be adapted to the Dataset, but then we need to pass this information as well. As of now this works. + + # we aim to calculate the mean period error from the past for each sequence: 1/N sum_{i=1}^N |x(t_i) - x(t_i - T)| + # 1. retrieve item_ids for each sequence/item + #dataset..X /. y + item_ids, inverse_item_ids = np.unique(test_data.reset_index()["item_id"].squeeze().to_numpy(), return_index=False, return_inverse=True) + # 2. capture sequences in a list + y_past = [test_data[label].squeeze().to_numpy()[inverse_item_ids == i][:-prediction_length] for i in range(len(item_ids))] + # 3. calculate period error per sequence + y_past_period_error = [np.abs(y_past_item[period_length:] - y_past_item[:-period_length]).mean() for y_past_item in y_past] + # 4. repeat period error for each sequence, to save one for each element + y_past_period_error_rep = np.repeat(y_past_period_error, prediction_length) + + optional_columns = quantiles + optional_columns = optional_columns.assign(y_past_period_error=y_past_period_error_rep) + + return result(output_file=config.output_predictions_file, + predictions=predictions_only, + truth=truth_only, + probabilities=None, + probabilities_labels=None, + target_is_encoded=False, + models_count=num_models_trained, + training_duration=training.duration, + predict_duration=predict.duration, + optional_columns=optional_columns) + +def load_data(train_path, test_path, timestamp_column, id_column): + + train_df = pd.read_csv( + train_path, + parse_dates=[timestamp_column], + ) + + train_data = TimeSeriesDataFrame.from_data_frame( + train_df, + id_column=id_column, + timestamp_column=timestamp_column, + ) + + test_df = pd.read_csv( + test_path, + parse_dates=[timestamp_column], + ) + + test_data = TimeSeriesDataFrame.from_data_frame( + test_df, + id_column=id_column, + timestamp_column=timestamp_column, + ) + + return train_data, test_data + + +def get_eval_metric(config): + # TODO: Support more metrics + metrics_mapping = dict( + mape="MAPE", + smape="sMAPE", + mase="MASE", + mse="MSE", + rmse="RMSE", + ) + + eval_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None + if eval_metric is None: + log.warning("Performance metric %s not supported.", config.metric) + return eval_metric + + +def save_artifacts(predictor, leaderboard, config): + artifacts = config.framework_params.get('_save_artifacts', ['leaderboard']) + try: + if 'leaderboard' in artifacts: + leaderboard_dir = output_subdir("leaderboard", config) + save_pd.save(path=os.path.join(leaderboard_dir, "leaderboard.csv"), df=leaderboard) + + if 'info' in artifacts: + ag_info = predictor.info() + info_dir = output_subdir("info", config) + save_pkl.save(path=os.path.join(info_dir, "info.pkl"), object=ag_info) + + if 'models' in artifacts: + shutil.rmtree(os.path.join(predictor.path, "utils"), ignore_errors=True) + models_dir = output_subdir("models", config) + zip_path(predictor.path, os.path.join(models_dir, "models.zip")) + except Exception: + log.warning("Error when saving artifacts.", exc_info=True) + + +if __name__ == '__main__': + call_run(run) diff --git a/frameworks/AutoGluon/setup.sh b/frameworks/AutoGluon/setup.sh index 6ef50ed8c..967c9d40c 100755 --- a/frameworks/AutoGluon/setup.sh +++ b/frameworks/AutoGluon/setup.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash + HERE=$(dirname "$0") VERSION=${1:-"stable"} REPO=${2:-"https://github.com/awslabs/autogluon.git"} @@ -36,4 +37,10 @@ else PIP install -e tabular/[skex] fi -PY -c "from autogluon.tabular.version import __version__; print(__version__)" >> "${HERE}/.setup/installed" +if [[ ${MODULE} == "timeseries" ]]; then + PY -c "from autogluon.timeseries.version import __version__; print(__version__)" >> "${HERE}/.setup/installed" + # TODO: GPU version install + PIP install "mxnet<2.0" +else + PY -c "from autogluon.tabular.version import __version__; print(__version__)" >> "${HERE}/.setup/installed" +fi diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py index 3bf70dd3c..c596e01c5 100644 --- a/frameworks/shared/callee.py +++ b/frameworks/shared/callee.py @@ -17,6 +17,7 @@ class FrameworkError(Exception): def result(output_file=None, predictions=None, truth=None, probabilities=None, probabilities_labels=None, + optional_columns=None, target_is_encoded=False, error_message=None, models_count=None, @@ -69,7 +70,7 @@ def load_data(name, path, **_): wait_retry_secs=10): result = run_fn(ds, config) res = dict(result) - for name in ['predictions', 'truth', 'probabilities']: + for name in ['predictions', 'truth', 'probabilities', 'optional_columns']: arr = result[name] if arr is not None: path = os.path.join(config.result_dir, '.'.join([name, 'data'])) diff --git a/frameworks/shared/caller.py b/frameworks/shared/caller.py index da8cea0e5..09654dc32 100644 --- a/frameworks/shared/caller.py +++ b/frameworks/shared/caller.py @@ -149,7 +149,7 @@ def run_in_venv(caller_file, script_file: str, *args, if res.error_message is not None: raise NoResultError(res.error_message) - for name in ['predictions', 'truth', 'probabilities']: + for name in ['predictions', 'truth', 'probabilities', 'optional_columns']: res[name] = deserialize_data(res[name], config=ser_config) if res[name] is not None else None if callable(process_results): @@ -164,6 +164,7 @@ def run_in_venv(caller_file, script_file: str, *args, else dataset.test.y), probabilities=res.probabilities, probabilities_labels=res.probabilities_labels, + optional_columns=res.optional_columns, target_is_encoded=res.target_is_encoded) return dict( diff --git a/resources/benchmarks/timeseries.yaml b/resources/benchmarks/timeseries.yaml new file mode 100644 index 000000000..26af06497 --- /dev/null +++ b/resources/benchmarks/timeseries.yaml @@ -0,0 +1,13 @@ +--- + +- name: covid + dataset: + train: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv + test: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv + target: ConfirmedCases + type: timeseries + forecast_range_in_steps: 19 + id_column: name + timestamp_column: Date + + folds: 1 diff --git a/resources/config.yaml b/resources/config.yaml index ba3a9f930..0e237584e 100644 --- a/resources/config.yaml +++ b/resources/config.yaml @@ -54,6 +54,7 @@ benchmarks: # configuration namespace for the benchmarks def binary: ['auc', 'logloss', 'acc', 'balacc'] # available metrics: auc (AUC), acc (Accuracy), balacc (Balanced Accuracy), pr_auc (Precision Recall AUC), logloss (Log Loss), f1, f2, f05 (F-beta scores with beta=1, 2, or 0.5), max_pce, mean_pce (Max/Mean Per-Class Error). multiclass: ['logloss', 'acc', 'balacc'] # available metrics: same as for binary, except auc, replaced by auc_ovo (AUC One-vs-One), auc_ovr (AUC One-vs-Rest). AUC metrics and F-beta metrics are computed with weighted average. regression: ['rmse', 'r2', 'mae'] # available metrics: mae (Mean Absolute Error), mse (Mean Squared Error), msle (Mean Squared Logarithmic Error), rmse (Root Mean Square Error), rmsle (Root Mean Square Logarithmic Error), r2 (R^2). + timeseries: ['mase', 'mape', 'smape', 'rmse', 'mse', 'nrmse', 'wape', 'ncrps'] defaults: # the default constraints, usually overridden by a constraint. folds: 10 # the amount of fold-runs executed for each dataset. max_runtime_seconds: 3600 # default time allocated to the framework to train a model. diff --git a/resources/frameworks.yaml b/resources/frameworks.yaml index 9b0f14827..513c99586 100644 --- a/resources/frameworks.yaml +++ b/resources/frameworks.yaml @@ -86,9 +86,9 @@ autoxgboost: flaml: version: 'stable' description: | - FLAML is a lightweight Python library that finds accurate machine learning models - automatically, efficiently and economically. It frees users from selecting learners - and hyperparameters for each learner. It is fast and cheap. + FLAML is a lightweight Python library that finds accurate machine learning models + automatically, efficiently and economically. It frees users from selecting learners + and hyperparameters for each learner. It is fast and cheap. project: https://github.com/microsoft/FLAML refs: [https://arxiv.org/pdf/1911.04706.pdf] @@ -139,12 +139,12 @@ mljarsupervised_compete: description: "MLJAR is using 'Compete' mode to provide the most accurate predictor" params: mode: Compete # set mode for Compete, default mode is Explain - + MLNet: version: 'latest' description: | MLNET.CLI is a automated machine learning tool implemented by ml.net. - + MLPlan: version: 'stable' abstract: true @@ -191,7 +191,17 @@ TPOT: # population_size: 25 # verbosity: 2 +#################################### +### TimeSeries AutoML frameworks ### +#################################### +AutoGluonTS: + extends: AutoGluon + version: "stable" + description: | + AutoGluon-TimeSeries + setup_env: + MODULE: timeseries ####################################### ### Non AutoML reference frameworks ### @@ -234,4 +244,3 @@ TunedRandomForest: # _n_jobs: 1 # cf. RandomForest # _tuning: # n_estimators: 500 - diff --git a/resources/frameworks_latest.yaml b/resources/frameworks_latest.yaml index d2e8afff3..becdc4e3e 100644 --- a/resources/frameworks_latest.yaml +++ b/resources/frameworks_latest.yaml @@ -80,8 +80,17 @@ oboe: TPOT: version: 'latest' +#################################### +### TimeSeries AutoML frameworks ### +#################################### - +AutoGluonTS: + extends: AutoGluon + version: "latest" + description: | + AutoGluon-TimeSeries + setup_env: + MODULE: timeseries ####################################### ### Non AutoML reference frameworks ### @@ -104,4 +113,3 @@ TunedRandomForest: version: 'latest' params: n_estimators: 2000 -