From fdac87d788e219575b68bec8b5d5b3fde115fa72 Mon Sep 17 00:00:00 2001 From: sommerle Date: Wed, 14 Sep 2022 13:49:37 +0000 Subject: [PATCH 01/30] fixed loading test & train, changed pred.-l. 5->30 --- frameworks/AutoGluonTS/exec.py | 49 ++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/frameworks/AutoGluonTS/exec.py b/frameworks/AutoGluonTS/exec.py index 9b172e129..88c46c0fb 100644 --- a/frameworks/AutoGluonTS/exec.py +++ b/frameworks/AutoGluonTS/exec.py @@ -32,7 +32,7 @@ def run(dataset, config): # TODO: Need to pass the following info somehow timestamp_column = "Date" id_column = "name" - prediction_length = 5 + prediction_length = 30 ################# eval_metric = get_eval_metric(config) @@ -41,10 +41,10 @@ def run(dataset, config): training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')} - train_data, test_data, test_data_leaderboard = load_data(train_path=dataset.train.path, - timestamp_column=timestamp_column, - id_column=id_column, - prediction_length=prediction_length) + train_data, test_data = load_data(train_path=dataset.train.path, + test_path=dataset.test.path, + timestamp_column=timestamp_column, + id_column=id_column) predictor_path = tempfile.mkdtemp() + os.sep with Timer() as training: @@ -61,16 +61,18 @@ def run(dataset, config): ) with Timer() as predict: - predictions = predictor.predict(train_data) + test_data_past = test_data.copy().slice_by_timestep(slice(None, -prediction_length)) + predictions = predictor.predict(test_data_past) log.info(predictions) predictions_only = predictions['mean'].values - truth_only = test_data[label].values + test_data_future = test_data.copy().slice_by_timestep(slice(-prediction_length, None)) + truth_only = test_data_future[label].values log.info(predictions_only) log.info(truth_only) - leaderboard = predictor.leaderboard(test_data_leaderboard) + leaderboard = predictor.leaderboard(test_data) with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): log.info(leaderboard) @@ -91,18 +93,31 @@ def run(dataset, config): predict_duration=predict.duration) -def load_data(train_path, timestamp_column, id_column, prediction_length): - df = TabularDataset(train_path) - df[timestamp_column] = pd.to_datetime(df[timestamp_column].astype('object')) - train_data = TimeSeriesDataFrame.from_data_frame(df, id_column=id_column, timestamp_column=timestamp_column) +def load_data(train_path, test_path, timestamp_column, id_column): - test_data_leaderboard = train_data.copy() - # the data set with the last prediction_length time steps included, i.e., akin to `a[:-5]` - train_data = train_data.slice_by_timestep(slice(None, -prediction_length)) + train_df = pd.read_csv( + train_path, + parse_dates=[timestamp_column], + ) + + train_data = TimeSeriesDataFrame.from_data_frame( + train_df, + id_column=id_column, + timestamp_column=timestamp_column, + ) - test_data = test_data_leaderboard.slice_by_timestep(slice(-prediction_length, None)) + test_df = pd.read_csv( + test_path, + parse_dates=[timestamp_column], + ) + + test_data = TimeSeriesDataFrame.from_data_frame( + test_df, + id_column=id_column, + timestamp_column=timestamp_column, + ) - return train_data, test_data, test_data_leaderboard + return train_data, test_data def get_eval_metric(config): From acae465e418f94e7a9f69c53b6354f11f1df1bc6 Mon Sep 17 00:00:00 2001 From: sommerle Date: Wed, 14 Sep 2022 13:51:22 +0000 Subject: [PATCH 02/30] ignore launch.json of vscode --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 4dba33db1..bc9c76adc 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ venv/ .idea/ *.iml *.swp +launch.json # tmp files .ipynb_checkpoints/ From b5723cfa4f5ede93beadf97d31825fe1902ca674 Mon Sep 17 00:00:00 2001 From: sommerle Date: Fri, 16 Sep 2022 15:41:08 +0000 Subject: [PATCH 03/30] ensuring timestamp parsing --- amlb/datautils.py | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/amlb/datautils.py b/amlb/datautils.py index f3eeeb2a5..1c489ef8b 100644 --- a/amlb/datautils.py +++ b/amlb/datautils.py @@ -26,7 +26,7 @@ log = logging.getLogger(__name__) -def read_csv(path, nrows=None, header=True, index=False, as_data_frame=True, dtype=None): +def read_csv(path, nrows=None, header=True, index=False, as_data_frame=True, dtype=None, timestamp_column=None): """ read csv file to DataFrame. @@ -39,11 +39,15 @@ def read_csv(path, nrows=None, header=True, index=False, as_data_frame=True, dty :param dtype: data type for columns. :return: a DataFrame """ + if dtype is not None and timestamp_column is not None and timestamp_column in dtype: + del dtype[timestamp_column] + df = pd.read_csv(path, nrows=nrows, header=0 if header else None, index_col=0 if index else None, - dtype=dtype) + dtype=dtype, + parse_dates=[timestamp_column] if timestamp_column is not None else None) return df if as_data_frame else df.values @@ -344,3 +348,39 @@ def _restore_dtypes(X_np, X_ori): return X_np.astype(X_ori.dtype, copy=False) else: return X_np + + +DEFAULT_SEASONALITIES = { + "S": 3600, # 1 hour + "T": 1440, # 1 day + "H": 24, # 1 day + "D": 1, # 1 day + "W": 1, # 1 week + "M": 12, + "B": 5, + "Q": 4, +} + + +def norm_freq_str(freq_str: str) -> str: + return freq_str.split("-")[0] + +def get_seasonality(freq: str, seasonalities=DEFAULT_SEASONALITIES) -> int: + """ + Return the seasonality of a given frequency: + >>> get_seasonality("2H") + 12 + """ + offset = pd.tseries.frequencies.to_offset(freq) + + base_seasonality = seasonalities.get(norm_freq_str(offset.name), 1) + + seasonality, remainder = divmod(base_seasonality, offset.n) + if not remainder: + return seasonality + + log.warning( + f"Multiple {offset.n} does not divide base seasonality " + f"{base_seasonality}. Falling back to seasonality 1." + ) + return 1 From 55c63e9cb6167d2afe4f34ce2b60c86a2826bf3c Mon Sep 17 00:00:00 2001 From: sommerle Date: Fri, 16 Sep 2022 15:44:03 +0000 Subject: [PATCH 04/30] pass config, save pred, add results --- amlb/benchmark.py | 14 +++- amlb/data.py | 1 + amlb/datasets/file.py | 20 ++--- amlb/results.py | 114 +++++++++++++++++++++++++++-- frameworks/AutoGluonTS/__init__.py | 6 +- frameworks/AutoGluonTS/exec.py | 10 ++- frameworks/shared/callee.py | 3 +- frameworks/shared/caller.py | 5 +- resources/benchmarks/ts.yaml | 8 +- resources/config.yaml | 1 + 10 files changed, 152 insertions(+), 30 deletions(-) diff --git a/amlb/benchmark.py b/amlb/benchmark.py index b9975efdc..43f64e9f9 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -489,7 +489,11 @@ def load_data(self): # TODO raise NotImplementedError("OpenML datasets without task_id are not supported yet.") elif hasattr(self._task_def, 'dataset'): - self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=self._task_def.dataset, fold=self.fold) + self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=self._task_def.dataset, fold=self.fold, timestamp_column=self._task_def.dataset['timestamp_column']) + if self._dataset.type == DatasetType.timeseries: + self._dataset.timestamp_column=self._task_def.dataset['timestamp_column'] + self._dataset.id_column=self._task_def.dataset['id_column'] + self._dataset.prediction_length=self._task_def.dataset['prediction_length'] else: raise ValueError("Tasks should have one property among [openml_task_id, openml_dataset_id, dataset].") @@ -522,7 +526,12 @@ def run(self): predictions_dir=self.benchmark.output_dirs.predictions) framework_def = self.benchmark.framework_def task_config = copy(self.task_config) - task_config.type = 'regression' if self._dataset.type == DatasetType.regression else 'classification' + if self._dataset.type == DatasetType.regression: + task_config.type = 'regression' + elif self._dataset.type == DatasetType.timeseries: + task_config.type = 'timeseries' + else: + task_config.type = 'classification' task_config.type_ = self._dataset.type.name task_config.framework = self.benchmark.framework_name task_config.framework_params = framework_def.params @@ -552,4 +561,3 @@ def run(self): finally: self._dataset.release() return results.compute_score(result=result, meta_result=meta_result) - diff --git a/amlb/data.py b/amlb/data.py index 4e4cea879..acca17841 100644 --- a/amlb/data.py +++ b/amlb/data.py @@ -172,6 +172,7 @@ class DatasetType(Enum): binary = 1 multiclass = 2 regression = 3 + timeseries = 4 class Dataset(ABC): diff --git a/amlb/datasets/file.py b/amlb/datasets/file.py index 6ddca4042..0bfa9453b 100644 --- a/amlb/datasets/file.py +++ b/amlb/datasets/file.py @@ -30,7 +30,7 @@ def __init__(self, cache_dir=None): self._cache_dir = cache_dir if cache_dir else tempfile.mkdtemp(prefix='amlb_cache') @profile(logger=log) - def load(self, dataset, fold=0): + def load(self, dataset, fold=0, timestamp_column=None): dataset = dataset if isinstance(dataset, ns) else ns(path=dataset) log.debug("Loading dataset %s", dataset) paths = self._extract_train_test_paths(dataset.path if 'path' in dataset else dataset, fold=fold) @@ -51,7 +51,7 @@ def load(self, dataset, fold=0): if ext == '.arff': return ArffDataset(train_path, test_path, target=target, features=features, type=type_) elif ext == '.csv': - return CsvDataset(train_path, test_path, target=target, features=features, type=type_) + return CsvDataset(train_path, test_path, target=target, features=features, type=type_, timestamp_column=timestamp_column) else: raise ValueError(f"Unsupported file type: {ext}") @@ -302,25 +302,26 @@ def release(self, properties=None): class CsvDataset(FileDataset): def __init__(self, train_path, test_path, - target=None, features=None, type=None): + target=None, features=None, type=None, timestamp_column=None): # todo: handle auto-split (if test_path is None): requires loading the training set, split, save super().__init__(None, None, target=target, features=features, type=type) - self._train = CsvDatasplit(self, train_path) - self._test = CsvDatasplit(self, test_path) + self._train = CsvDatasplit(self, train_path, timestamp_column=timestamp_column) + self._test = CsvDatasplit(self, test_path, timestamp_column=timestamp_column) self._dtypes = None class CsvDatasplit(FileDatasplit): - def __init__(self, dataset, path): + def __init__(self, dataset, path, timestamp_column=None): super().__init__(dataset, format='csv', path=path) self._ds = None + self.timestamp_column = timestamp_column def _ensure_loaded(self): if self._ds is None: if self.dataset._dtypes is None: - df = read_csv(self.path) + df = read_csv(self.path, timestamp_column=self.timestamp_column) # df = df.convert_dtypes() dt_conversions = {name: 'category' for name, dtype in zip(df.dtypes.index, df.dtypes.values) @@ -336,8 +337,9 @@ def _ensure_loaded(self): self._ds = df self.dataset._dtypes = self._ds.dtypes + else: - self._ds = read_csv(self.path, dtype=self.dataset._dtypes.to_dict()) + self._ds = read_csv(self.path, dtype=self.dataset._dtypes.to_dict(), timestamp_column=self.timestamp_column) @profile(logger=log) def load_metadata(self): @@ -348,7 +350,7 @@ def load_metadata(self): else 'number' if pat.is_numeric_dtype(dt) else 'category' if pat.is_categorical_dtype(dt) else 'string' if pat.is_string_dtype(dt) - # else 'datetime' if pat.is_datetime64_dtype(dt) + else 'datetime' if pat.is_datetime64_dtype(dt) else 'object') features = [Feature(i, col, to_feature_type(dtypes[i])) for i, col in enumerate(self._ds.columns)] diff --git a/amlb/results.py b/amlb/results.py index 2f547b4ec..f86a7eebe 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -20,7 +20,7 @@ from .data import Dataset, DatasetType, Feature from .datautils import accuracy_score, auc, average_precision_score, balanced_accuracy_score, confusion_matrix, fbeta_score, log_loss, \ mean_absolute_error, mean_squared_error, mean_squared_log_error, precision_recall_curve, r2_score, roc_auc_score, \ - read_csv, write_csv, is_data_frame, to_data_frame + read_csv, write_csv, is_data_frame, to_data_frame, get_seasonality from .resources import get as rget, config as rconfig, output_dirs from .utils import Namespace, backup_file, cached, datetime_iso, get_metadata, json_load, memoize, profile, set_metadata @@ -228,12 +228,16 @@ def load_predictions(predictions_file): try: df = read_csv(predictions_file, dtype=object) log.debug("Predictions preview:\n %s\n", df.head(10).to_string()) - if rconfig().test_mode: - TaskResult.validate_predictions(df) - if df.shape[1] > 2: - return ClassificationResult(df) + if 'y_past_period_error' in df.columns: + return TimeSeriesResult(df) else: - return RegressionResult(df) + if rconfig().test_mode: + TaskResult.validate_predictions(df) + + if df.shape[1] > 2: + return ClassificationResult(df) + else: + return RegressionResult(df) except Exception as e: return ErrorResult(ResultError(e)) else: @@ -255,7 +259,8 @@ def save_predictions(dataset: Dataset, output_file: str, predictions: Union[A, DF, S] = None, truth: Union[A, DF, S] = None, probabilities: Union[A, DF] = None, probabilities_labels: Union[list, A] = None, target_is_encoded: bool = False, - preview: bool = True): + preview: bool = True, + quantiles: Union[A, DF] = None): """ Save class probabilities and predicted labels to file in csv format. :param dataset: @@ -308,6 +313,16 @@ def save_predictions(dataset: Dataset, output_file: str, df = df.assign(predictions=preds) df = df.assign(truth=truth) + if quantiles is not None: + quantiles.reset_index(drop=True, inplace=True) + df = pd.concat([df, quantiles], axis=1) + if dataset.type == DatasetType.timeseries: + period_length = 1 # this period length could be adapted to the Dataset, but then we need to pass this information as well. As of now this should be fine. + item_ids, inverse_item_ids = np.unique(dataset.test.X[dataset.id_column].squeeze().to_numpy(), return_index=False, return_inverse=True) + y_past = [dataset.test.y.squeeze().to_numpy()[inverse_item_ids == i][:-dataset.prediction_length] for i in range(len(item_ids))] + y_past_period_error = [np.abs(y_past_item[period_length:] - y_past_item[:-period_length]).mean() for y_past_item in y_past] + y_past_period_error_rep = np.repeat(y_past_period_error, dataset.prediction_length) + df = df.assign(y_past_period_error=y_past_period_error_rep) if preview: log.info("Predictions preview:\n %s\n", df.head(20).to_string()) backup_file(output_file) @@ -656,6 +671,91 @@ def r2(self): """R^2""" return float(r2_score(self.truth, self.predictions)) +class TimeSeriesResult(Result): + + def __init__(self, predictions_df, info=None): + super().__init__(predictions_df, info) + self.truth = self.df['truth'].values if self.df is not None else None #.iloc[:, 1].values if self.df is not None else None + self.predictions = self.df['predictions'].values if self.df is not None else None #.iloc[:, -2].values if self.df is not None else None + self.y_past_period_error = self.df['y_past_period_error'].values + self.quantiles = self.df.iloc[:, 2:-1].values + self.quantiles_probs = np.array([float(q) for q in self.df.columns[2:-1]]) + self.truth = self.truth.astype(float, copy=False) + self.predictions = self.predictions.astype(float, copy=False) + self.quantiles = self.quantiles.astype(float, copy=False) + self.y_past_period_error = self.y_past_period_error.astype(float, copy=False) + + self.target = Feature(0, 'target', 'real', is_target=True) + self.type = DatasetType.timeseries + + @metric(higher_is_better=False) + def mae(self): + """Mean Absolute Error""" + return float(mean_absolute_error(self.truth, self.predictions)) + + @metric(higher_is_better=False) + def mse(self): + """Mean Squared Error""" + return float(mean_squared_error(self.truth, self.predictions)) + + @metric(higher_is_better=False) + def msle(self): + """Mean Squared Logarithmic Error""" + return float(mean_squared_log_error(self.truth, self.predictions)) + + @metric(higher_is_better=False) + def rmse(self): + """Root Mean Square Error""" + return math.sqrt(self.mse()) + + @metric(higher_is_better=False) + def rmsle(self): + """Root Mean Square Logarithmic Error""" + return math.sqrt(self.msle()) + + @metric(higher_is_better=True) + def r2(self): + """R^2""" + return float(r2_score(self.truth, self.predictions)) + + @metric(higher_is_better=False) + def mase(self): + """Mean Absolute Scaled Error""" + return float(np.nanmean(np.abs(self.truth/self.y_past_period_error - self.predictions/self.y_past_period_error))) + + @metric(higher_is_better=False) + def smape(self): + """Symmetric Mean Absolute Percentage Error""" + num = np.abs(self.truth - self.predictions) + denom = (np.abs(self.truth) + np.abs(self.predictions)) / 2 + # If the denominator is 0, we set it to float('inf') such that any division yields 0 (this + # might not be fully mathematically correct, but at least we don't get NaNs) + denom[denom == 0] = math.inf + return np.mean(num / denom) + + @metric(higher_is_better=False) + def nrmse(self): + """Normalized Root Mean Square Error""" + return self.rmse() / np.mean(np.abs(self.truth)) + + @metric(higher_is_better=False) + def nd(self): + """nd = ?""" + return np.sum(np.abs(self.truth - self.predictions)) / np.sum(np.abs(self.truth)) + + @metric(higher_is_better=False) + def ncrps(self): + """Normalized Continuous Ranked Probability Score""" + quantile_losses = 2 * np.sum( + np.abs( + (self.quantiles - self.truth[:, None]) + * ((self.quantiles >= self.truth[:, None]) - self.quantiles_probs[None, :]) + ), + axis=0, + ) + denom = np.sum(np.abs(self.truth)) # shape [num_time_series, num_quantiles] + weighted_losses = quantile_losses.sum(0) / denom # shape [num_quantiles] + return weighted_losses.mean() _encode_predictions_and_truth_ = False diff --git a/frameworks/AutoGluonTS/__init__.py b/frameworks/AutoGluonTS/__init__.py index 4e3f16e1f..3e1744ee7 100644 --- a/frameworks/AutoGluonTS/__init__.py +++ b/frameworks/AutoGluonTS/__init__.py @@ -19,9 +19,11 @@ def run(dataset: Dataset, config: TaskConfig): name=dataset.target.name, classes=dataset.target.values ), - problem_type=dataset.type.name # AutoGluon problem_type is using same names as amlb.data.DatasetType + problem_type=dataset.type.name, # AutoGluon problem_type is using same names as amlb.data.DatasetType + timestamp_column=dataset.timestamp_column if dataset.timestamp_column is not None else None, + id_column=dataset.id_column if dataset.id_column is not None else None, + prediction_length=dataset.prediction_length if dataset.prediction_length is not None else None ) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config) - diff --git a/frameworks/AutoGluonTS/exec.py b/frameworks/AutoGluonTS/exec.py index 88c46c0fb..f14579515 100644 --- a/frameworks/AutoGluonTS/exec.py +++ b/frameworks/AutoGluonTS/exec.py @@ -30,9 +30,9 @@ def run(dataset, config): ################# # TODO: Need to pass the following info somehow - timestamp_column = "Date" - id_column = "name" - prediction_length = 30 + timestamp_column = dataset.timestamp_column + id_column = dataset.id_column + prediction_length = dataset.prediction_length ################# eval_metric = get_eval_metric(config) @@ -90,7 +90,8 @@ def run(dataset, config): target_is_encoded=False, models_count=num_models_trained, training_duration=training.duration, - predict_duration=predict.duration) + predict_duration=predict.duration, + quantiles=predictions.iloc[:, 1:]) def load_data(train_path, test_path, timestamp_column, id_column): @@ -125,6 +126,7 @@ def get_eval_metric(config): metrics_mapping = dict( mse="MSE", rmse="RMSE", + mase="MASE", ) eval_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py index 3bf70dd3c..70b5a3be0 100644 --- a/frameworks/shared/callee.py +++ b/frameworks/shared/callee.py @@ -22,6 +22,7 @@ def result(output_file=None, models_count=None, training_duration=None, predict_duration=None, + quantiles=None, **others): return locals() @@ -69,7 +70,7 @@ def load_data(name, path, **_): wait_retry_secs=10): result = run_fn(ds, config) res = dict(result) - for name in ['predictions', 'truth', 'probabilities']: + for name in ['predictions', 'truth', 'probabilities', 'quantiles']: arr = result[name] if arr is not None: path = os.path.join(config.result_dir, '.'.join([name, 'data'])) diff --git a/frameworks/shared/caller.py b/frameworks/shared/caller.py index da8cea0e5..68963a820 100644 --- a/frameworks/shared/caller.py +++ b/frameworks/shared/caller.py @@ -149,7 +149,7 @@ def run_in_venv(caller_file, script_file: str, *args, if res.error_message is not None: raise NoResultError(res.error_message) - for name in ['predictions', 'truth', 'probabilities']: + for name in ['predictions', 'truth', 'probabilities', 'quantiles']: res[name] = deserialize_data(res[name], config=ser_config) if res[name] is not None else None if callable(process_results): @@ -164,7 +164,8 @@ def run_in_venv(caller_file, script_file: str, *args, else dataset.test.y), probabilities=res.probabilities, probabilities_labels=res.probabilities_labels, - target_is_encoded=res.target_is_encoded) + target_is_encoded=res.target_is_encoded, + quantiles=res.quantiles) return dict( models_count=res.models_count if res.models_count is not None else 1, diff --git a/resources/benchmarks/ts.yaml b/resources/benchmarks/ts.yaml index 5a67366b7..04cb86bb9 100644 --- a/resources/benchmarks/ts.yaml +++ b/resources/benchmarks/ts.yaml @@ -3,7 +3,11 @@ - name: covid dataset: train: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv - test: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv + test: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv target: ConfirmedCases - folds: 1 + type: timeseries + prediction_length: 30 + id_column: name + timestamp_column: Date + folds: 1 diff --git a/resources/config.yaml b/resources/config.yaml index ba3a9f930..835758c76 100644 --- a/resources/config.yaml +++ b/resources/config.yaml @@ -54,6 +54,7 @@ benchmarks: # configuration namespace for the benchmarks def binary: ['auc', 'logloss', 'acc', 'balacc'] # available metrics: auc (AUC), acc (Accuracy), balacc (Balanced Accuracy), pr_auc (Precision Recall AUC), logloss (Log Loss), f1, f2, f05 (F-beta scores with beta=1, 2, or 0.5), max_pce, mean_pce (Max/Mean Per-Class Error). multiclass: ['logloss', 'acc', 'balacc'] # available metrics: same as for binary, except auc, replaced by auc_ovo (AUC One-vs-One), auc_ovr (AUC One-vs-Rest). AUC metrics and F-beta metrics are computed with weighted average. regression: ['rmse', 'r2', 'mae'] # available metrics: mae (Mean Absolute Error), mse (Mean Squared Error), msle (Mean Squared Logarithmic Error), rmse (Root Mean Square Error), rmsle (Root Mean Square Logarithmic Error), r2 (R^2). + timeseries: ['mase', 'smape', 'nrmse', 'nd', 'ncrps', 'rmse'] defaults: # the default constraints, usually overridden by a constraint. folds: 10 # the amount of fold-runs executed for each dataset. max_runtime_seconds: 3600 # default time allocated to the framework to train a model. From 0f3898678f2d6ba6c1ed751f0cb2fe7167520d24 Mon Sep 17 00:00:00 2001 From: sommerle Date: Fri, 16 Sep 2022 15:58:48 +0000 Subject: [PATCH 05/30] remove unused code --- amlb/datautils.py | 36 ------------------------------------ amlb/results.py | 2 +- 2 files changed, 1 insertion(+), 37 deletions(-) diff --git a/amlb/datautils.py b/amlb/datautils.py index 1c489ef8b..efc5b2c6d 100644 --- a/amlb/datautils.py +++ b/amlb/datautils.py @@ -348,39 +348,3 @@ def _restore_dtypes(X_np, X_ori): return X_np.astype(X_ori.dtype, copy=False) else: return X_np - - -DEFAULT_SEASONALITIES = { - "S": 3600, # 1 hour - "T": 1440, # 1 day - "H": 24, # 1 day - "D": 1, # 1 day - "W": 1, # 1 week - "M": 12, - "B": 5, - "Q": 4, -} - - -def norm_freq_str(freq_str: str) -> str: - return freq_str.split("-")[0] - -def get_seasonality(freq: str, seasonalities=DEFAULT_SEASONALITIES) -> int: - """ - Return the seasonality of a given frequency: - >>> get_seasonality("2H") - 12 - """ - offset = pd.tseries.frequencies.to_offset(freq) - - base_seasonality = seasonalities.get(norm_freq_str(offset.name), 1) - - seasonality, remainder = divmod(base_seasonality, offset.n) - if not remainder: - return seasonality - - log.warning( - f"Multiple {offset.n} does not divide base seasonality " - f"{base_seasonality}. Falling back to seasonality 1." - ) - return 1 diff --git a/amlb/results.py b/amlb/results.py index f86a7eebe..186c5a6c7 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -20,7 +20,7 @@ from .data import Dataset, DatasetType, Feature from .datautils import accuracy_score, auc, average_precision_score, balanced_accuracy_score, confusion_matrix, fbeta_score, log_loss, \ mean_absolute_error, mean_squared_error, mean_squared_log_error, precision_recall_curve, r2_score, roc_auc_score, \ - read_csv, write_csv, is_data_frame, to_data_frame, get_seasonality + read_csv, write_csv, is_data_frame, to_data_frame from .resources import get as rget, config as rconfig, output_dirs from .utils import Namespace, backup_file, cached, datetime_iso, get_metadata, json_load, memoize, profile, set_metadata From f93266994634c23fb3d1466481419ca60f8d8c37 Mon Sep 17 00:00:00 2001 From: sommerle Date: Tue, 20 Sep 2022 14:02:57 +0000 Subject: [PATCH 06/30] add readability, remove slice from timer --- frameworks/AutoGluonTS/exec.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/frameworks/AutoGluonTS/exec.py b/frameworks/AutoGluonTS/exec.py index f14579515..4720ac26a 100644 --- a/frameworks/AutoGluonTS/exec.py +++ b/frameworks/AutoGluonTS/exec.py @@ -22,18 +22,12 @@ log = logging.getLogger(__name__) -# FIXME: Why does leaderboard claim a different test score than AMLB for RMSE? -# FIXME: Currently ignoring test_path, just using train data for evaluation -# TODO: How to evaluate more complex metrics? def run(dataset, config): log.info(f"\n**** AutoGluon TimeSeries [v{__version__}] ****\n") - ################# - # TODO: Need to pass the following info somehow timestamp_column = dataset.timestamp_column id_column = dataset.id_column prediction_length = dataset.prediction_length - ################# eval_metric = get_eval_metric(config) label = dataset.target.name @@ -45,6 +39,7 @@ def run(dataset, config): test_path=dataset.test.path, timestamp_column=timestamp_column, id_column=id_column) + test_data_past = test_data.copy().slice_by_timestep(slice(None, -prediction_length)) predictor_path = tempfile.mkdtemp() + os.sep with Timer() as training: @@ -61,7 +56,6 @@ def run(dataset, config): ) with Timer() as predict: - test_data_past = test_data.copy().slice_by_timestep(slice(None, -prediction_length)) predictions = predictor.predict(test_data_past) log.info(predictions) @@ -72,7 +66,7 @@ def run(dataset, config): log.info(predictions_only) log.info(truth_only) - leaderboard = predictor.leaderboard(test_data) + leaderboard = predictor.leaderboard(test_data, silent=True) with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): log.info(leaderboard) @@ -91,7 +85,7 @@ def run(dataset, config): models_count=num_models_trained, training_duration=training.duration, predict_duration=predict.duration, - quantiles=predictions.iloc[:, 1:]) + quantiles=predictions.drop(columns=['mean'])) def load_data(train_path, test_path, timestamp_column, id_column): From 16a165b890f9499dd4e07af6113876fde547b5cc Mon Sep 17 00:00:00 2001 From: sommerle Date: Tue, 20 Sep 2022 14:04:41 +0000 Subject: [PATCH 07/30] ensure autogluonts has required info --- frameworks/AutoGluonTS/__init__.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/frameworks/AutoGluonTS/__init__.py b/frameworks/AutoGluonTS/__init__.py index 3e1744ee7..70283c3e5 100644 --- a/frameworks/AutoGluonTS/__init__.py +++ b/frameworks/AutoGluonTS/__init__.py @@ -1,5 +1,5 @@ from amlb.benchmark import TaskConfig -from amlb.data import Dataset +from amlb.data import Dataset, DatasetType from amlb.utils import call_script_in_same_dir @@ -10,6 +10,15 @@ def setup(*args, **kwargs): def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv + if hasattr(dataset, 'timestamp_column') is False: + dataset.timestamp_column = None + if hasattr(dataset, 'id_column') is False: + dataset.id_column = None + if hasattr(dataset, 'prediction_length') is False: + raise AttributeError("Unspecified `prediction_length`.") + if dataset.type is not DatasetType.timeseries: + raise ValueError("AutoGluonTS only supports timeseries.") + data = dict( # train=dict(path=dataset.train.data_path('parquet')), # test=dict(path=dataset.test.data_path('parquet')), @@ -20,9 +29,9 @@ def run(dataset: Dataset, config: TaskConfig): classes=dataset.target.values ), problem_type=dataset.type.name, # AutoGluon problem_type is using same names as amlb.data.DatasetType - timestamp_column=dataset.timestamp_column if dataset.timestamp_column is not None else None, - id_column=dataset.id_column if dataset.id_column is not None else None, - prediction_length=dataset.prediction_length if dataset.prediction_length is not None else None + timestamp_column=dataset.timestamp_column, + id_column=dataset.id_column, + prediction_length=dataset.prediction_length ) return run_in_venv(__file__, "exec.py", From 758b92d25d38e0978f9f0ee6d23318e91f1bc666 Mon Sep 17 00:00:00 2001 From: sommerle Date: Tue, 20 Sep 2022 14:05:04 +0000 Subject: [PATCH 08/30] add comments for readability --- amlb/results.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/amlb/results.py b/amlb/results.py index 186c5a6c7..814c204ff 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -271,6 +271,7 @@ def save_predictions(dataset: Dataset, output_file: str, :param probabilities_labels: :param target_is_encoded: :param preview: + :param qunatiles: :return: None """ log.debug("Saving predictions to `%s`.", output_file) @@ -313,14 +314,22 @@ def save_predictions(dataset: Dataset, output_file: str, df = df.assign(predictions=preds) df = df.assign(truth=truth) - if quantiles is not None: - quantiles.reset_index(drop=True, inplace=True) - df = pd.concat([df, quantiles], axis=1) + if dataset.type == DatasetType.timeseries: - period_length = 1 # this period length could be adapted to the Dataset, but then we need to pass this information as well. As of now this should be fine. + if quantiles is not None: + quantiles = quantiles.reset_index(drop=True) + df = pd.concat([df, quantiles], axis=1) + + period_length = 1 # TODO: This period length could be adapted to the Dataset, but then we need to pass this information as well. As of now this works. + + # we aim to calculate the mean period error from the past for each sequence: 1/N sum_{i=1}^N |x(t_i) - x(t_i - T)| + # 1. retrieve item_ids for each sequence/item item_ids, inverse_item_ids = np.unique(dataset.test.X[dataset.id_column].squeeze().to_numpy(), return_index=False, return_inverse=True) + # 2. capture sequences in a list y_past = [dataset.test.y.squeeze().to_numpy()[inverse_item_ids == i][:-dataset.prediction_length] for i in range(len(item_ids))] + # 3. calculate period error per sequence y_past_period_error = [np.abs(y_past_item[period_length:] - y_past_item[:-period_length]).mean() for y_past_item in y_past] + # 4. repeat period error for each sequence, to save one for each element y_past_period_error_rep = np.repeat(y_past_period_error, dataset.prediction_length) df = df.assign(y_past_period_error=y_past_period_error_rep) if preview: From 04872e7b96a6d25836a876da1d098e3d02adcd74 Mon Sep 17 00:00:00 2001 From: sommerle Date: Tue, 20 Sep 2022 14:05:52 +0000 Subject: [PATCH 09/30] setting defaults for timeseries task --- amlb/benchmark.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/amlb/benchmark.py b/amlb/benchmark.py index 43f64e9f9..4ed79d6a1 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -489,8 +489,17 @@ def load_data(self): # TODO raise NotImplementedError("OpenML datasets without task_id are not supported yet.") elif hasattr(self._task_def, 'dataset'): + if self._task_def.dataset['type'] == 'timeseries' and self._task_def.dataset['timestamp_column'] is None: + log.warning("Warning: For timeseries task setting undefined timestamp column to `timestamp`.") + self._task_def.dataset['timestamp_column'] = "timestamp" self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=self._task_def.dataset, fold=self.fold, timestamp_column=self._task_def.dataset['timestamp_column']) if self._dataset.type == DatasetType.timeseries: + if self._task_def.dataset['id_column'] is None: + log.warning("Warning: For timeseries task setting undefined itemid column to `item_id`.") + self._task_def.dataset['id_column'] = "item_id" + if self._task_def.dataset['prediction_length'] is None: + log.warning("Warning: For timeseries task setting undefined prediction length to `1`.") + self._task_def.dataset['prediction_length'] = "1" self._dataset.timestamp_column=self._task_def.dataset['timestamp_column'] self._dataset.id_column=self._task_def.dataset['id_column'] self._dataset.prediction_length=self._task_def.dataset['prediction_length'] From 888a1cb8f44c4b647d8d4b885a155fbee3a37efb Mon Sep 17 00:00:00 2001 From: sommerle Date: Tue, 20 Sep 2022 14:06:24 +0000 Subject: [PATCH 10/30] remove outer context manipulation --- amlb/datautils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/amlb/datautils.py b/amlb/datautils.py index efc5b2c6d..7946fdc1c 100644 --- a/amlb/datautils.py +++ b/amlb/datautils.py @@ -37,9 +37,11 @@ def read_csv(path, nrows=None, header=True, index=False, as_data_frame=True, dty :param header: if the columns header should be read. :param as_data_frame: if the result should be returned as a data frame (default) or a numpy array. :param dtype: data type for columns. + :param timestamp_column: column name for timestamp, to ensure dates are correctly parsed by pandas. :return: a DataFrame """ if dtype is not None and timestamp_column is not None and timestamp_column in dtype: + dtype = dtype.copy() # to avoid outer context manipulation del dtype[timestamp_column] df = pd.read_csv(path, From e15de3eb4c4b5be5cc27eb3b5789abe04f2c367b Mon Sep 17 00:00:00 2001 From: sommerle Date: Tue, 20 Sep 2022 14:08:59 +0000 Subject: [PATCH 11/30] corrected spelling error for quantiles --- amlb/results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amlb/results.py b/amlb/results.py index 814c204ff..3f7320fa2 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -271,7 +271,7 @@ def save_predictions(dataset: Dataset, output_file: str, :param probabilities_labels: :param target_is_encoded: :param preview: - :param qunatiles: + :param quantiles: :return: None """ log.debug("Saving predictions to `%s`.", output_file) From 866492fddfa110828201d4e0b77cf9f324ce1a9b Mon Sep 17 00:00:00 2001 From: sommerle Date: Wed, 21 Sep 2022 12:34:51 +0000 Subject: [PATCH 12/30] adding mape, correct available metrics --- amlb/results.py | 46 ++++++++++------------------------ frameworks/AutoGluonTS/exec.py | 4 ++- resources/benchmarks/ts.yaml | 10 ++++---- resources/config.yaml | 2 +- 4 files changed, 22 insertions(+), 40 deletions(-) diff --git a/amlb/results.py b/amlb/results.py index 3f7320fa2..91228ca4e 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -680,7 +680,7 @@ def r2(self): """R^2""" return float(r2_score(self.truth, self.predictions)) -class TimeSeriesResult(Result): +class TimeSeriesResult(RegressionResult): def __init__(self, predictions_df, info=None): super().__init__(predictions_df, info) @@ -697,36 +697,6 @@ def __init__(self, predictions_df, info=None): self.target = Feature(0, 'target', 'real', is_target=True) self.type = DatasetType.timeseries - @metric(higher_is_better=False) - def mae(self): - """Mean Absolute Error""" - return float(mean_absolute_error(self.truth, self.predictions)) - - @metric(higher_is_better=False) - def mse(self): - """Mean Squared Error""" - return float(mean_squared_error(self.truth, self.predictions)) - - @metric(higher_is_better=False) - def msle(self): - """Mean Squared Logarithmic Error""" - return float(mean_squared_log_error(self.truth, self.predictions)) - - @metric(higher_is_better=False) - def rmse(self): - """Root Mean Square Error""" - return math.sqrt(self.mse()) - - @metric(higher_is_better=False) - def rmsle(self): - """Root Mean Square Logarithmic Error""" - return math.sqrt(self.msle()) - - @metric(higher_is_better=True) - def r2(self): - """R^2""" - return float(r2_score(self.truth, self.predictions)) - @metric(higher_is_better=False) def mase(self): """Mean Absolute Scaled Error""" @@ -742,14 +712,24 @@ def smape(self): denom[denom == 0] = math.inf return np.mean(num / denom) + @metric(higher_is_better=False) + def mape(self): + """Symmetric Mean Absolute Percentage Error""" + num = np.abs(self.truth - self.predictions) + denom = np.abs(self.truth) + # If the denominator is 0, we set it to float('inf') such that any division yields 0 (this + # might not be fully mathematically correct, but at least we don't get NaNs) + denom[denom == 0] = math.inf + return np.mean(num / denom) + @metric(higher_is_better=False) def nrmse(self): """Normalized Root Mean Square Error""" return self.rmse() / np.mean(np.abs(self.truth)) @metric(higher_is_better=False) - def nd(self): - """nd = ?""" + def wape(self): + """Weighted Average Percentage Error""" return np.sum(np.abs(self.truth - self.predictions)) / np.sum(np.abs(self.truth)) @metric(higher_is_better=False) diff --git a/frameworks/AutoGluonTS/exec.py b/frameworks/AutoGluonTS/exec.py index 4720ac26a..87e7f44f3 100644 --- a/frameworks/AutoGluonTS/exec.py +++ b/frameworks/AutoGluonTS/exec.py @@ -118,9 +118,11 @@ def load_data(train_path, test_path, timestamp_column, id_column): def get_eval_metric(config): # TODO: Support more metrics metrics_mapping = dict( + mape="MAPE", + smape="sMAPE", + mase="MASE", mse="MSE", rmse="RMSE", - mase="MASE", ) eval_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None diff --git a/resources/benchmarks/ts.yaml b/resources/benchmarks/ts.yaml index 04cb86bb9..1b4850a44 100644 --- a/resources/benchmarks/ts.yaml +++ b/resources/benchmarks/ts.yaml @@ -2,12 +2,12 @@ - name: covid dataset: - train: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv - test: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv - target: ConfirmedCases + train: s3://autogluon-ts-bench/data/covid_deaths/csv/train.csv # /tmp/gluonts/train_df.csv # https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv + test: s3://autogluon-ts-bench/data/covid_deaths/csv/test.csv # /tmp/gluonts/test_df.csv # https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv + #target: target #target ConfirmedCases type: timeseries prediction_length: 30 - id_column: name - timestamp_column: Date + #id_column: item_id # item_id name + #timestamp_column: timestamp # timestamp Date folds: 1 diff --git a/resources/config.yaml b/resources/config.yaml index 835758c76..0e237584e 100644 --- a/resources/config.yaml +++ b/resources/config.yaml @@ -54,7 +54,7 @@ benchmarks: # configuration namespace for the benchmarks def binary: ['auc', 'logloss', 'acc', 'balacc'] # available metrics: auc (AUC), acc (Accuracy), balacc (Balanced Accuracy), pr_auc (Precision Recall AUC), logloss (Log Loss), f1, f2, f05 (F-beta scores with beta=1, 2, or 0.5), max_pce, mean_pce (Max/Mean Per-Class Error). multiclass: ['logloss', 'acc', 'balacc'] # available metrics: same as for binary, except auc, replaced by auc_ovo (AUC One-vs-One), auc_ovr (AUC One-vs-Rest). AUC metrics and F-beta metrics are computed with weighted average. regression: ['rmse', 'r2', 'mae'] # available metrics: mae (Mean Absolute Error), mse (Mean Squared Error), msle (Mean Squared Logarithmic Error), rmse (Root Mean Square Error), rmsle (Root Mean Square Logarithmic Error), r2 (R^2). - timeseries: ['mase', 'smape', 'nrmse', 'nd', 'ncrps', 'rmse'] + timeseries: ['mase', 'mape', 'smape', 'rmse', 'mse', 'nrmse', 'wape', 'ncrps'] defaults: # the default constraints, usually overridden by a constraint. folds: 10 # the amount of fold-runs executed for each dataset. max_runtime_seconds: 3600 # default time allocated to the framework to train a model. From 9252835982b58cb6fdaf23b1c2de4cdbf18c1050 Mon Sep 17 00:00:00 2001 From: sommerle Date: Wed, 21 Sep 2022 12:39:53 +0000 Subject: [PATCH 13/30] beautify config options --- resources/benchmarks/ts.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/resources/benchmarks/ts.yaml b/resources/benchmarks/ts.yaml index 1b4850a44..463f243c9 100644 --- a/resources/benchmarks/ts.yaml +++ b/resources/benchmarks/ts.yaml @@ -2,12 +2,14 @@ - name: covid dataset: - train: s3://autogluon-ts-bench/data/covid_deaths/csv/train.csv # /tmp/gluonts/train_df.csv # https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv - test: s3://autogluon-ts-bench/data/covid_deaths/csv/test.csv # /tmp/gluonts/test_df.csv # https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv - #target: target #target ConfirmedCases + train: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv + # s3://autogluon-ts-bench/data/covid_deaths/csv/train.csv | https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv + test: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv + # s3://autogluon-ts-bench/data/covid_deaths/csv/test.csv | https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv + target: target # target | ConfirmedCases type: timeseries prediction_length: 30 - #id_column: item_id # item_id name - #timestamp_column: timestamp # timestamp Date + id_column: item_id # item_id | name + timestamp_column: timestamp # timestamp | Date folds: 1 From 18cc6aff7873e69e0887946561933c71235e0066 Mon Sep 17 00:00:00 2001 From: sommerle Date: Wed, 21 Sep 2022 12:49:37 +0000 Subject: [PATCH 14/30] fixed config for public access --- resources/benchmarks/ts.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resources/benchmarks/ts.yaml b/resources/benchmarks/ts.yaml index 463f243c9..0a73c81fb 100644 --- a/resources/benchmarks/ts.yaml +++ b/resources/benchmarks/ts.yaml @@ -6,10 +6,10 @@ # s3://autogluon-ts-bench/data/covid_deaths/csv/train.csv | https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv test: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv # s3://autogluon-ts-bench/data/covid_deaths/csv/test.csv | https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv - target: target # target | ConfirmedCases + target: ConfirmedCases # target | ConfirmedCases type: timeseries prediction_length: 30 - id_column: item_id # item_id | name - timestamp_column: timestamp # timestamp | Date + id_column: name # item_id | name + timestamp_column: Date # timestamp | Date folds: 1 From 3e8945a78852b8b2a10d7bf091fb728697e25db3 Mon Sep 17 00:00:00 2001 From: sommerle Date: Fri, 23 Sep 2022 09:56:17 +0000 Subject: [PATCH 15/30] no outer context manipulation, add dataset subdir --- amlb/benchmark.py | 4 +++- amlb/datasets/file.py | 19 +++++++++++-------- frameworks/AutoGluonTS/__init__.py | 3 ++- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/amlb/benchmark.py b/amlb/benchmark.py index 4ed79d6a1..16b5a671d 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -492,7 +492,9 @@ def load_data(self): if self._task_def.dataset['type'] == 'timeseries' and self._task_def.dataset['timestamp_column'] is None: log.warning("Warning: For timeseries task setting undefined timestamp column to `timestamp`.") self._task_def.dataset['timestamp_column'] = "timestamp" - self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=self._task_def.dataset, fold=self.fold, timestamp_column=self._task_def.dataset['timestamp_column']) + dataset_name_and_config = copy(self._task_def.dataset) + dataset_name_and_config.name = self._task_def.name + self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=dataset_name_and_config, fold=self.fold) if self._dataset.type == DatasetType.timeseries: if self._task_def.dataset['id_column'] is None: log.warning("Warning: For timeseries task setting undefined itemid column to `item_id`.") diff --git a/amlb/datasets/file.py b/amlb/datasets/file.py index 0bfa9453b..7c1080de8 100644 --- a/amlb/datasets/file.py +++ b/amlb/datasets/file.py @@ -30,10 +30,10 @@ def __init__(self, cache_dir=None): self._cache_dir = cache_dir if cache_dir else tempfile.mkdtemp(prefix='amlb_cache') @profile(logger=log) - def load(self, dataset, fold=0, timestamp_column=None): + def load(self, dataset, fold=0): dataset = dataset if isinstance(dataset, ns) else ns(path=dataset) log.debug("Loading dataset %s", dataset) - paths = self._extract_train_test_paths(dataset.path if 'path' in dataset else dataset, fold=fold) + paths = self._extract_train_test_paths(dataset.path if 'path' in dataset else dataset, fold=fold, name=dataset['name'] if 'name' in dataset else None) assert fold < len(paths['train']), f"No training dataset available for fold {fold} among dataset files {paths['train']}" # seed = rget().seed(fold) # if len(paths['test']) == 0: @@ -51,21 +51,21 @@ def load(self, dataset, fold=0, timestamp_column=None): if ext == '.arff': return ArffDataset(train_path, test_path, target=target, features=features, type=type_) elif ext == '.csv': - return CsvDataset(train_path, test_path, target=target, features=features, type=type_, timestamp_column=timestamp_column) + return CsvDataset(train_path, test_path, target=target, features=features, type=type_, timestamp_column=dataset['timestamp_column'] if 'timestamp_column' in dataset else None) else: raise ValueError(f"Unsupported file type: {ext}") - def _extract_train_test_paths(self, dataset, fold=None): + def _extract_train_test_paths(self, dataset, fold=None, name=None): if isinstance(dataset, (tuple, list)): assert len(dataset) % 2 == 0, "dataset list must contain an even number of paths: [train_0, test_0, train_1, test_1, ...]." return self._extract_train_test_paths(ns(train=[p for i, p in enumerate(dataset) if i % 2 == 0], test=[p for i, p in enumerate(dataset) if i % 2 == 1]), - fold=fold) + fold=fold, name=name) elif isinstance(dataset, ns): - return dict(train=[self._extract_train_test_paths(p)['train'][0] + return dict(train=[self._extract_train_test_paths(p, name=name)['train'][0] if i == fold else None for i, p in enumerate(as_list(dataset.train))], - test=[self._extract_train_test_paths(p)['train'][0] + test=[self._extract_train_test_paths(p, name=name)['train'][0] if i == fold else None for i, p in enumerate(as_list(dataset.test))]) else: @@ -116,7 +116,10 @@ def _extract_train_test_paths(self, dataset, fold=None): assert len(paths) > 0, f"No dataset file found in {dataset}: they should follow the naming xxxx_train.ext, xxxx_test.ext or xxxx_train_0.ext, xxxx_test_0.ext, xxxx_train_1.ext, ..." return paths elif is_valid_url(dataset): - cached_file = os.path.join(self._cache_dir, os.path.basename(dataset)) + if name is None: + cached_file = os.path.join(self._cache_dir, os.path.basename(dataset)) + else: + cached_file = os.path.join(self._cache_dir, name, os.path.basename(dataset)) if not os.path.exists(cached_file): # don't download if previously done handler = get_file_handler(dataset) assert handler.exists(dataset), f"Invalid path/url: {dataset}" diff --git a/frameworks/AutoGluonTS/__init__.py b/frameworks/AutoGluonTS/__init__.py index 70283c3e5..5be567305 100644 --- a/frameworks/AutoGluonTS/__init__.py +++ b/frameworks/AutoGluonTS/__init__.py @@ -1,7 +1,7 @@ from amlb.benchmark import TaskConfig from amlb.data import Dataset, DatasetType from amlb.utils import call_script_in_same_dir - +from copy import deepcopy def setup(*args, **kwargs): call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs) @@ -10,6 +10,7 @@ def setup(*args, **kwargs): def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv + dataset = deepcopy(dataset) if hasattr(dataset, 'timestamp_column') is False: dataset.timestamp_column = None if hasattr(dataset, 'id_column') is False: From 4ca2118793a59001f1652daa40b6b730da6a1d88 Mon Sep 17 00:00:00 2001 From: sommerle Date: Fri, 23 Sep 2022 09:58:55 +0000 Subject: [PATCH 16/30] add more datasets --- resources/benchmarks/ts.yaml | 98 +++++++++++++++++++++++++++++++++--- 1 file changed, 90 insertions(+), 8 deletions(-) diff --git a/resources/benchmarks/ts.yaml b/resources/benchmarks/ts.yaml index 0a73c81fb..b800e8bf8 100644 --- a/resources/benchmarks/ts.yaml +++ b/resources/benchmarks/ts.yaml @@ -1,15 +1,97 @@ --- -- name: covid +- name: covid_deaths dataset: - train: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv - # s3://autogluon-ts-bench/data/covid_deaths/csv/train.csv | https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv - test: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv - # s3://autogluon-ts-bench/data/covid_deaths/csv/test.csv | https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv - target: ConfirmedCases # target | ConfirmedCases + train: s3://autogluon-ts-bench/data/covid_deaths/csv/train.csv + test: s3://autogluon-ts-bench/data/covid_deaths/csv/test.csv type: timeseries prediction_length: 30 - id_column: name # item_id | name - timestamp_column: Date # timestamp | Date + folds: 1 + +- name: hospital + dataset: + train: s3://autogluon-ts-bench/data/hospital/csv/train.csv + test: s3://autogluon-ts-bench/data/hospital/csv/test.csv + type: timeseries + prediction_length: 12 + folds: 1 + +- name: kdd_2018 + dataset: + train: s3://autogluon-ts-bench/data/kdd_2018/csv/train.csv + test: s3://autogluon-ts-bench/data/kdd_2018/csv/test.csv + type: timeseries + prediction_length: 48 + folds: 1 + +- name: m3_monthly + dataset: + train: s3://autogluon-ts-bench/data/m3_monthly/csv/train.csv + test: s3://autogluon-ts-bench/data/m3_monthly/csv/test.csv + type: timeseries + prediction_length: 18 + folds: 1 + +- name: m3_other + dataset: + train: s3://autogluon-ts-bench/data/m3_other/csv/train.csv + test: s3://autogluon-ts-bench/data/m3_other/csv/test.csv + type: timeseries + prediction_length: 8 + folds: 1 +- name: m3_quarterly + dataset: + train: s3://autogluon-ts-bench/data/m3_quarterly/csv/train.csv + test: s3://autogluon-ts-bench/data/m3_quarterly/csv/test.csv + type: timeseries + prediction_length: 8 + folds: 1 + +- name: m4_hourly + dataset: + train: s3://autogluon-ts-bench/data/m4_hourly/csv/train.csv + test: s3://autogluon-ts-bench/data/m4_hourly/csv/test.csv + type: timeseries + prediction_length: 48 + folds: 1 + +- name: m4_weekly + dataset: + train: s3://autogluon-ts-bench/data/m4_weekly/csv/train.csv + test: s3://autogluon-ts-bench/data/m4_weekly/csv/test.csv + type: timeseries + prediction_length: 13 + folds: 1 + +- name: nn5 + dataset: + train: s3://autogluon-ts-bench/data/nn5/csv/train.csv + test: s3://autogluon-ts-bench/data/nn5/csv/test.csv + type: timeseries + prediction_length: 56 + folds: 1 + +- name: solar + dataset: + train: s3://autogluon-ts-bench/data/solar/csv/train.csv + test: s3://autogluon-ts-bench/data/solar/csv/test.csv + type: timeseries + prediction_length: 24 + folds: 1 + +- name: tourism_monthly + dataset: + train: s3://autogluon-ts-bench/data/tourism_monthly/csv/train.csv + test: s3://autogluon-ts-bench/data/tourism_monthly/csv/test.csv + type: timeseries + prediction_length: 24 + folds: 1 + +- name: tourism_quarterly + dataset: + train: s3://autogluon-ts-bench/data/tourism_quarterly/csv/train.csv + test: s3://autogluon-ts-bench/data/tourism_quarterly/csv/test.csv + type: timeseries + prediction_length: 8 folds: 1 From f7f21fcb96fe964b341c6a3250313da337b53038 Mon Sep 17 00:00:00 2001 From: sommerle Date: Mon, 26 Sep 2022 10:13:42 +0000 Subject: [PATCH 17/30] include error raising for too large pred. length. --- amlb/benchmark.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/amlb/benchmark.py b/amlb/benchmark.py index 16b5a671d..43997513f 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -505,6 +505,20 @@ def load_data(self): self._dataset.timestamp_column=self._task_def.dataset['timestamp_column'] self._dataset.id_column=self._task_def.dataset['id_column'] self._dataset.prediction_length=self._task_def.dataset['prediction_length'] + + train_seqs_lengths = self._dataset.train.X.groupby(self._dataset.id_column).count() + test_seqs_lengths = self._dataset.test.X.groupby(self._dataset.id_column).count() + prediction_length_max_diff_train_test = int((test_seqs_lengths - train_seqs_lengths).mean()) + prediction_length_max_min_train_test = int(min(int(test_seqs_lengths.min()), int(train_seqs_lengths.min()))) - 1 + if not self._dataset.prediction_length == prediction_length_max_diff_train_test: + log.warning("Warning: Prediction length {}, does not equal difference between test and train sequence lengths {}.".format(self._dataset.prediction_length, prediction_length_max_diff_train_test)) + if not (test_seqs_lengths - train_seqs_lengths).var().item() == 0.: + raise ValueError("Error: Not all sequences of train and test set have same sequence length difference.") + if self._dataset.prediction_length > prediction_length_max_diff_train_test: + raise ValueError("Error: Prediction length {} longer than at least one difference between train and test sequence length.") + if self._dataset.prediction_length > prediction_length_max_min_train_test: + raise ValueError("Error: Prediction length {} longer than minimum sequence length + 1.".format()) + else: raise ValueError("Tasks should have one property among [openml_task_id, openml_dataset_id, dataset].") From fb429c65067c636d4a2e65146fd681e90ffbf910 Mon Sep 17 00:00:00 2001 From: sommerle Date: Wed, 5 Oct 2022 08:36:27 +0000 Subject: [PATCH 18/30] mergin AutoGluonTS framework folder into AutoGluon --- frameworks/AutoGluon/README.md | 16 +++++ frameworks/AutoGluon/__init__.py | 58 ++++++++++++++----- .../exec.py => AutoGluon/exec_ts.py} | 1 + frameworks/AutoGluon/setup.sh | 9 ++- frameworks/AutoGluonTS/README.md | 36 ------------ frameworks/AutoGluonTS/__init__.py | 39 ------------- frameworks/AutoGluonTS/setup.sh | 36 ------------ resources/frameworks.yaml | 17 +++--- 8 files changed, 78 insertions(+), 134 deletions(-) create mode 100644 frameworks/AutoGluon/README.md rename frameworks/{AutoGluonTS/exec.py => AutoGluon/exec_ts.py} (99%) delete mode 100644 frameworks/AutoGluonTS/README.md delete mode 100644 frameworks/AutoGluonTS/__init__.py delete mode 100755 frameworks/AutoGluonTS/setup.sh diff --git a/frameworks/AutoGluon/README.md b/frameworks/AutoGluon/README.md new file mode 100644 index 000000000..51286533e --- /dev/null +++ b/frameworks/AutoGluon/README.md @@ -0,0 +1,16 @@ +# AutoGluon + +To run v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluon ...``` + +To run mainline: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest ...``` + + +# AutoGluonTS + +AutoGluonTS stands for autogluon.timeseries. This framework handles time series problems. + +## Run Steps + +To run v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluonts timeseries ...``` + +To run mainline: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest timeseries ...``` diff --git a/frameworks/AutoGluon/__init__.py b/frameworks/AutoGluon/__init__.py index be2c15147..bee3b99a2 100644 --- a/frameworks/AutoGluon/__init__.py +++ b/frameworks/AutoGluon/__init__.py @@ -1,25 +1,53 @@ -from amlb.benchmark import TaskConfig -from amlb.data import Dataset + from amlb.utils import call_script_in_same_dir +from amlb.benchmark import TaskConfig +from amlb.data import Dataset, DatasetType +from copy import deepcopy def setup(*args, **kwargs): call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs) - def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv - data = dict( - train=dict(path=dataset.train.data_path('parquet')), - test=dict(path=dataset.test.data_path('parquet')), - target=dict( - name=dataset.target.name, - classes=dataset.target.values - ), - problem_type=dataset.type.name # AutoGluon problem_type is using same names as amlb.data.DatasetType - ) - - return run_in_venv(__file__, "exec.py", - input_data=data, dataset=dataset, config=config) + if dataset.type is not DatasetType.timeseries: + data = dict( + train=dict(path=dataset.train.data_path('parquet')), + test=dict(path=dataset.test.data_path('parquet')), + target=dict( + name=dataset.target.name, + classes=dataset.target.values + ), + problem_type=dataset.type.name # AutoGluon problem_type is using same names as amlb.data.DatasetType + ) + exec_file = "exec.py" + + else: + dataset = deepcopy(dataset) + if not hasattr(dataset, 'timestamp_column'): + dataset.timestamp_column = None + if not hasattr(dataset, 'id_column'): + dataset.id_column = None + if not hasattr(dataset, 'prediction_length'): + raise AttributeError("Unspecified `prediction_length`.") + + data = dict( + # train=dict(path=dataset.train.data_path('parquet')), + # test=dict(path=dataset.test.data_path('parquet')), + train=dict(path=dataset.train.path), + test=dict(path=dataset.test.path), + target=dict( + name=dataset.target.name, + classes=dataset.target.values + ), + problem_type=dataset.type.name, # AutoGluon problem_type is using same names as amlb.data.DatasetType + timestamp_column=dataset.timestamp_column, + id_column=dataset.id_column, + prediction_length=dataset.prediction_length + ) + exec_file = "exec_ts.py" + + return run_in_venv(__file__, exec_file, + input_data=data, dataset=dataset, config=config) diff --git a/frameworks/AutoGluonTS/exec.py b/frameworks/AutoGluon/exec_ts.py similarity index 99% rename from frameworks/AutoGluonTS/exec.py rename to frameworks/AutoGluon/exec_ts.py index 87e7f44f3..85593932d 100644 --- a/frameworks/AutoGluonTS/exec.py +++ b/frameworks/AutoGluon/exec_ts.py @@ -32,6 +32,7 @@ def run(dataset, config): eval_metric = get_eval_metric(config) label = dataset.target.name time_limit = config.max_runtime_seconds + time_limit = 10. training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')} diff --git a/frameworks/AutoGluon/setup.sh b/frameworks/AutoGluon/setup.sh index 6ef50ed8c..7cbccbee9 100755 --- a/frameworks/AutoGluon/setup.sh +++ b/frameworks/AutoGluon/setup.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash + HERE=$(dirname "$0") VERSION=${1:-"stable"} REPO=${2:-"https://github.com/awslabs/autogluon.git"} @@ -36,4 +37,10 @@ else PIP install -e tabular/[skex] fi -PY -c "from autogluon.tabular.version import __version__; print(__version__)" >> "${HERE}/.setup/installed" +if [[ ${MODULE} == "timeseries" ]]; then + PY -c "from autogluon.tabular.version import __version__; print(__version__)" >> "${HERE}/.setup/installed" + # TODO: GPU version install + PIP install "mxnet<2.0" +else + PY -c "from autogluon.timeseries.version import __version__; print(__version__)" >> "${HERE}/.setup/installed" +fi diff --git a/frameworks/AutoGluonTS/README.md b/frameworks/AutoGluonTS/README.md deleted file mode 100644 index 627f8231c..000000000 --- a/frameworks/AutoGluonTS/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# AutoGluonTS - -AutoGluonTS stands for autogluon.timeseries. This framework handles time series problems. - -This code is currently a prototype, since time series support is not fully defined in AutoMLBenchmark yet. -Consider the code a proof of concept. - -## Run Steps - -To run AutoGluonTS in AutoMLBenchmark on the covid dataset from the AutoGluon tutorial, do the following: - -1. Create a fresh Python environment -2. Follow automlbenchmark install instructions -3. Run the following command in terminal: ```python3 ../automlbenchmark/runbenchmark.py autogluonts ts test``` -4. Done. - -To run mainline AutoGluonTS instead of v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest ts test``` - -## TODO - -### FIXME: Why does leaderboard claim a different test score than AutoMLBenchmark for RMSE? -### FIXME: Currently ignoring test_path, just using train data for evaluation -### TODO: How to evaluate more complex metrics like MAPE? -### How to pass timestamp_column? -### How to pass id_column? -### How to pass prediction_length? - - - - - - - - - - diff --git a/frameworks/AutoGluonTS/__init__.py b/frameworks/AutoGluonTS/__init__.py deleted file mode 100644 index 5be567305..000000000 --- a/frameworks/AutoGluonTS/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -from amlb.benchmark import TaskConfig -from amlb.data import Dataset, DatasetType -from amlb.utils import call_script_in_same_dir -from copy import deepcopy - -def setup(*args, **kwargs): - call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs) - - -def run(dataset: Dataset, config: TaskConfig): - from frameworks.shared.caller import run_in_venv - - dataset = deepcopy(dataset) - if hasattr(dataset, 'timestamp_column') is False: - dataset.timestamp_column = None - if hasattr(dataset, 'id_column') is False: - dataset.id_column = None - if hasattr(dataset, 'prediction_length') is False: - raise AttributeError("Unspecified `prediction_length`.") - if dataset.type is not DatasetType.timeseries: - raise ValueError("AutoGluonTS only supports timeseries.") - - data = dict( - # train=dict(path=dataset.train.data_path('parquet')), - # test=dict(path=dataset.test.data_path('parquet')), - train=dict(path=dataset.train.path), - test=dict(path=dataset.test.path), - target=dict( - name=dataset.target.name, - classes=dataset.target.values - ), - problem_type=dataset.type.name, # AutoGluon problem_type is using same names as amlb.data.DatasetType - timestamp_column=dataset.timestamp_column, - id_column=dataset.id_column, - prediction_length=dataset.prediction_length - ) - - return run_in_venv(__file__, "exec.py", - input_data=data, dataset=dataset, config=config) diff --git a/frameworks/AutoGluonTS/setup.sh b/frameworks/AutoGluonTS/setup.sh deleted file mode 100755 index d9fc7e8da..000000000 --- a/frameworks/AutoGluonTS/setup.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -HERE=$(dirname "$0") -VERSION=${1:-"stable"} -REPO=${2:-"https://github.com/awslabs/autogluon.git"} -PKG=${3:-"autogluon"} -if [[ "$VERSION" == "latest" ]]; then - VERSION="master" -fi - -# creating local venv -. ${HERE}/../shared/setup.sh ${HERE} true - -PIP install --upgrade pip -PIP install --upgrade setuptools wheel - -if [[ "$VERSION" == "stable" ]]; then - PIP install --no-cache-dir -U "${PKG}" - PIP install --no-cache-dir -U "${PKG}.tabular[skex]" -elif [[ "$VERSION" =~ ^[0-9] ]]; then - PIP install --no-cache-dir -U "${PKG}==${VERSION}" - PIP install --no-cache-dir -U "${PKG}.tabular[skex]==${VERSION}" -else - TARGET_DIR="${HERE}/lib/${PKG}" - rm -Rf ${TARGET_DIR} - git clone --depth 1 --single-branch --branch ${VERSION} --recurse-submodules ${REPO} ${TARGET_DIR} - cd ${TARGET_DIR} - PY_EXEC_NO_ARGS="$(cut -d' ' -f1 <<<"$py_exec")" - PY_EXEC_DIR=$(dirname "$PY_EXEC_NO_ARGS") - env PATH="$PY_EXEC_DIR:$PATH" bash -c ./full_install.sh - PIP install -e tabular/[skex] -fi - -# TODO: GPU version install -PIP install "mxnet<2.0" - -PY -c "from autogluon.timeseries.version import __version__; print(__version__)" >> "${HERE}/.setup/installed" diff --git a/resources/frameworks.yaml b/resources/frameworks.yaml index eb59c44bf..4358b7515 100644 --- a/resources/frameworks.yaml +++ b/resources/frameworks.yaml @@ -86,9 +86,9 @@ autoxgboost: flaml: version: 'stable' description: | - FLAML is a lightweight Python library that finds accurate machine learning models - automatically, efficiently and economically. It frees users from selecting learners - and hyperparameters for each learner. It is fast and cheap. + FLAML is a lightweight Python library that finds accurate machine learning models + automatically, efficiently and economically. It frees users from selecting learners + and hyperparameters for each learner. It is fast and cheap. project: https://github.com/microsoft/FLAML refs: [https://arxiv.org/pdf/1911.04706.pdf] @@ -139,12 +139,12 @@ mljarsupervised_compete: description: "MLJAR is using 'Compete' mode to provide the most accurate predictor" params: mode: Compete # set mode for Compete, default mode is Explain - + MLNet: version: 'latest' description: | MLNET.CLI is a automated machine learning tool implemented by ml.net. - + MLPlan: version: 'stable' abstract: true @@ -196,10 +196,14 @@ TPOT: #################################### AutoGluonTS: + extends: AutoGluon version: "stable" description: | AutoGluon-TimeSeries - project: https://auto.gluon.ai + setup_env: + MODULE: timeseries + params: + presets: good_quality ####################################### ### Non AutoML reference frameworks ### @@ -242,4 +246,3 @@ TunedRandomForest: # _n_jobs: 1 # cf. RandomForest # _tuning: # n_estimators: 500 - From 23d057a86b0ae213c21b8f7f20c8c57d3519dd11 Mon Sep 17 00:00:00 2001 From: sommerle Date: Wed, 5 Oct 2022 08:38:42 +0000 Subject: [PATCH 19/30] renaming ts.yaml to timeseries.yaml, plus ext. --- resources/benchmarks/{ts.yaml => timeseries.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename resources/benchmarks/{ts.yaml => timeseries.yaml} (100%) diff --git a/resources/benchmarks/ts.yaml b/resources/benchmarks/timeseries.yaml similarity index 100% rename from resources/benchmarks/ts.yaml rename to resources/benchmarks/timeseries.yaml From 1396d2007baedffa9db158bbe29d8801081303e9 Mon Sep 17 00:00:00 2001 From: sommerle Date: Wed, 5 Oct 2022 09:35:28 +0000 Subject: [PATCH 20/30] removing presets, correct latest config for AGTS --- resources/frameworks.yaml | 2 -- resources/frameworks_latest.yaml | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/resources/frameworks.yaml b/resources/frameworks.yaml index 4358b7515..513c99586 100644 --- a/resources/frameworks.yaml +++ b/resources/frameworks.yaml @@ -202,8 +202,6 @@ AutoGluonTS: AutoGluon-TimeSeries setup_env: MODULE: timeseries - params: - presets: good_quality ####################################### ### Non AutoML reference frameworks ### diff --git a/resources/frameworks_latest.yaml b/resources/frameworks_latest.yaml index b23bf72b0..becdc4e3e 100644 --- a/resources/frameworks_latest.yaml +++ b/resources/frameworks_latest.yaml @@ -85,10 +85,12 @@ TPOT: #################################### AutoGluonTS: + extends: AutoGluon version: "latest" description: | AutoGluon-TimeSeries - project: https://auto.gluon.ai + setup_env: + MODULE: timeseries ####################################### ### Non AutoML reference frameworks ### @@ -111,4 +113,3 @@ TunedRandomForest: version: 'latest' params: n_estimators: 2000 - From 8332960ea36ba2ad5a5367c909d4041001017759 Mon Sep 17 00:00:00 2001 From: sommerle Date: Wed, 5 Oct 2022 09:45:56 +0000 Subject: [PATCH 21/30] move dataset timeseries ext to datasets/file.py --- amlb/benchmark.py | 26 -------------------------- amlb/datasets/file.py | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/amlb/benchmark.py b/amlb/benchmark.py index 43997513f..39458efe5 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -489,35 +489,9 @@ def load_data(self): # TODO raise NotImplementedError("OpenML datasets without task_id are not supported yet.") elif hasattr(self._task_def, 'dataset'): - if self._task_def.dataset['type'] == 'timeseries' and self._task_def.dataset['timestamp_column'] is None: - log.warning("Warning: For timeseries task setting undefined timestamp column to `timestamp`.") - self._task_def.dataset['timestamp_column'] = "timestamp" dataset_name_and_config = copy(self._task_def.dataset) dataset_name_and_config.name = self._task_def.name self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=dataset_name_and_config, fold=self.fold) - if self._dataset.type == DatasetType.timeseries: - if self._task_def.dataset['id_column'] is None: - log.warning("Warning: For timeseries task setting undefined itemid column to `item_id`.") - self._task_def.dataset['id_column'] = "item_id" - if self._task_def.dataset['prediction_length'] is None: - log.warning("Warning: For timeseries task setting undefined prediction length to `1`.") - self._task_def.dataset['prediction_length'] = "1" - self._dataset.timestamp_column=self._task_def.dataset['timestamp_column'] - self._dataset.id_column=self._task_def.dataset['id_column'] - self._dataset.prediction_length=self._task_def.dataset['prediction_length'] - - train_seqs_lengths = self._dataset.train.X.groupby(self._dataset.id_column).count() - test_seqs_lengths = self._dataset.test.X.groupby(self._dataset.id_column).count() - prediction_length_max_diff_train_test = int((test_seqs_lengths - train_seqs_lengths).mean()) - prediction_length_max_min_train_test = int(min(int(test_seqs_lengths.min()), int(train_seqs_lengths.min()))) - 1 - if not self._dataset.prediction_length == prediction_length_max_diff_train_test: - log.warning("Warning: Prediction length {}, does not equal difference between test and train sequence lengths {}.".format(self._dataset.prediction_length, prediction_length_max_diff_train_test)) - if not (test_seqs_lengths - train_seqs_lengths).var().item() == 0.: - raise ValueError("Error: Not all sequences of train and test set have same sequence length difference.") - if self._dataset.prediction_length > prediction_length_max_diff_train_test: - raise ValueError("Error: Prediction length {} longer than at least one difference between train and test sequence length.") - if self._dataset.prediction_length > prediction_length_max_min_train_test: - raise ValueError("Error: Prediction length {} longer than minimum sequence length + 1.".format()) else: raise ValueError("Tasks should have one property among [openml_task_id, openml_dataset_id, dataset].") diff --git a/amlb/datasets/file.py b/amlb/datasets/file.py index 7c1080de8..c7154a76b 100644 --- a/amlb/datasets/file.py +++ b/amlb/datasets/file.py @@ -51,7 +51,16 @@ def load(self, dataset, fold=0): if ext == '.arff': return ArffDataset(train_path, test_path, target=target, features=features, type=type_) elif ext == '.csv': - return CsvDataset(train_path, test_path, target=target, features=features, type=type_, timestamp_column=dataset['timestamp_column'] if 'timestamp_column' in dataset else None) + if DatasetType[dataset['type']] == DatasetType.timeseries and dataset['timestamp_column'] is None: + log.warning("Warning: For timeseries task setting undefined timestamp column to `timestamp`.") + dataset['timestamp_column'] = "timestamp" + + csv_dataset = CsvDataset(train_path, test_path, target=target, features=features, type=type_, timestamp_column=dataset['timestamp_column'] if 'timestamp_column' in dataset else None) + + if csv_dataset.type == DatasetType.timeseries: + csv_dataset = self.extend_dataset_with_timeseries_config(csv_dataset, dataset) + + return csv_dataset else: raise ValueError(f"Unsupported file type: {ext}") @@ -132,6 +141,34 @@ def __repr__(self): return repr_def(self) + def extend_dataset_with_timeseries_config(self, dataset, dataset_config): + if dataset_config['id_column'] is None: + log.warning("Warning: For timeseries task setting undefined itemid column to `item_id`.") + dataset_config['id_column'] = "item_id" + if dataset_config['prediction_length'] is None: + log.warning("Warning: For timeseries task setting undefined prediction length to `1`.") + dataset_config['prediction_length'] = "1" + + dataset.timestamp_column=dataset_config['timestamp_column'] + dataset.id_column=dataset_config['id_column'] + dataset.prediction_length=dataset_config['prediction_length'] + + train_seqs_lengths = dataset.train.X.groupby(dataset.id_column).count() + test_seqs_lengths = dataset.test.X.groupby(dataset.id_column).count() + prediction_length_max_diff_train_test = int((test_seqs_lengths - train_seqs_lengths).mean()) + prediction_length_max_min_train_test = int(min(int(test_seqs_lengths.min()), int(train_seqs_lengths.min()))) - 1 + if not dataset.prediction_length == prediction_length_max_diff_train_test: + log.warning("Warning: Prediction length {}, does not equal difference between test and train sequence lengths {}.".format(dataset.prediction_length, prediction_length_max_diff_train_test)) + if not (test_seqs_lengths - train_seqs_lengths).var().item() == 0.: + raise ValueError("Error: Not all sequences of train and test set have same sequence length difference.") + if dataset.prediction_length > prediction_length_max_diff_train_test: + raise ValueError("Error: Prediction length {} longer than at least one difference between train and test sequence length.") + if dataset.prediction_length > prediction_length_max_min_train_test: + raise ValueError("Error: Prediction length {} longer than minimum sequence length + 1.".format()) + return dataset + + + class FileDataset(Dataset): def __init__(self, train: Datasplit, test: Datasplit, From d41f6328965a1ca2a8e4b210a12417b6e28c7fcf Mon Sep 17 00:00:00 2001 From: sommerle Date: Wed, 5 Oct 2022 09:54:31 +0000 Subject: [PATCH 22/30] dont bypass test mode --- amlb/results.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/amlb/results.py b/amlb/results.py index 91228ca4e..2af80dca4 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -228,12 +228,13 @@ def load_predictions(predictions_file): try: df = read_csv(predictions_file, dtype=object) log.debug("Predictions preview:\n %s\n", df.head(10).to_string()) + + if rconfig().test_mode: + TaskResult.validate_predictions(df) + if 'y_past_period_error' in df.columns: return TimeSeriesResult(df) else: - if rconfig().test_mode: - TaskResult.validate_predictions(df) - if df.shape[1] > 2: return ClassificationResult(df) else: From 3935e9e688913737b001047693f0d92d923d44eb Mon Sep 17 00:00:00 2001 From: sommerle Date: Wed, 5 Oct 2022 16:21:50 +0000 Subject: [PATCH 23/30] move quantiles and y_past_period_error to opt_cols --- amlb/results.py | 26 ++++++-------------------- frameworks/AutoGluon/exec_ts.py | 21 +++++++++++++++++++-- frameworks/shared/callee.py | 4 ++-- frameworks/shared/caller.py | 6 +++--- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/amlb/results.py b/amlb/results.py index 2af80dca4..3887203f6 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -259,9 +259,9 @@ def load_metadata(metadata_file): def save_predictions(dataset: Dataset, output_file: str, predictions: Union[A, DF, S] = None, truth: Union[A, DF, S] = None, probabilities: Union[A, DF] = None, probabilities_labels: Union[list, A] = None, + optional_columns: Union[A, DF] = None, target_is_encoded: bool = False, - preview: bool = True, - quantiles: Union[A, DF] = None): + preview: bool = True): """ Save class probabilities and predicted labels to file in csv format. :param dataset: @@ -270,9 +270,9 @@ def save_predictions(dataset: Dataset, output_file: str, :param predictions: :param truth: :param probabilities_labels: + :param optional_columns: :param target_is_encoded: :param preview: - :param quantiles: :return: None """ log.debug("Saving predictions to `%s`.", output_file) @@ -316,23 +316,9 @@ def save_predictions(dataset: Dataset, output_file: str, df = df.assign(predictions=preds) df = df.assign(truth=truth) - if dataset.type == DatasetType.timeseries: - if quantiles is not None: - quantiles = quantiles.reset_index(drop=True) - df = pd.concat([df, quantiles], axis=1) - - period_length = 1 # TODO: This period length could be adapted to the Dataset, but then we need to pass this information as well. As of now this works. - - # we aim to calculate the mean period error from the past for each sequence: 1/N sum_{i=1}^N |x(t_i) - x(t_i - T)| - # 1. retrieve item_ids for each sequence/item - item_ids, inverse_item_ids = np.unique(dataset.test.X[dataset.id_column].squeeze().to_numpy(), return_index=False, return_inverse=True) - # 2. capture sequences in a list - y_past = [dataset.test.y.squeeze().to_numpy()[inverse_item_ids == i][:-dataset.prediction_length] for i in range(len(item_ids))] - # 3. calculate period error per sequence - y_past_period_error = [np.abs(y_past_item[period_length:] - y_past_item[:-period_length]).mean() for y_past_item in y_past] - # 4. repeat period error for each sequence, to save one for each element - y_past_period_error_rep = np.repeat(y_past_period_error, dataset.prediction_length) - df = df.assign(y_past_period_error=y_past_period_error_rep) + if optional_columns is not None: + df = pd.concat([df, optional_columns], axis=1) + if preview: log.info("Predictions preview:\n %s\n", df.head(20).to_string()) backup_file(output_file) diff --git a/frameworks/AutoGluon/exec_ts.py b/frameworks/AutoGluon/exec_ts.py index 85593932d..6ec5a77ae 100644 --- a/frameworks/AutoGluon/exec_ts.py +++ b/frameworks/AutoGluon/exec_ts.py @@ -4,6 +4,7 @@ import warnings import sys import tempfile +import numpy as np warnings.simplefilter("ignore") if sys.platform == 'darwin': @@ -77,6 +78,23 @@ def run(dataset, config): save_artifacts(predictor=predictor, leaderboard=leaderboard, config=config) shutil.rmtree(predictor.path, ignore_errors=True) + quantiles = predictions.drop(columns=['mean']).reset_index(drop=True) + period_length = 1 # TODO: This period length could be adapted to the Dataset, but then we need to pass this information as well. As of now this works. + + # we aim to calculate the mean period error from the past for each sequence: 1/N sum_{i=1}^N |x(t_i) - x(t_i - T)| + # 1. retrieve item_ids for each sequence/item + #dataset..X /. y + item_ids, inverse_item_ids = np.unique(test_data.reset_index()[dataset.id_column].squeeze().to_numpy(), return_index=False, return_inverse=True) + # 2. capture sequences in a list + y_past = [test_data[label].squeeze().to_numpy()[inverse_item_ids == i][:-dataset.prediction_length] for i in range(len(item_ids))] + # 3. calculate period error per sequence + y_past_period_error = [np.abs(y_past_item[period_length:] - y_past_item[:-period_length]).mean() for y_past_item in y_past] + # 4. repeat period error for each sequence, to save one for each element + y_past_period_error_rep = np.repeat(y_past_period_error, dataset.prediction_length) + + optional_columns = quantiles + optional_columns = optional_columns.assign(y_past_period_error=y_past_period_error_rep) + return result(output_file=config.output_predictions_file, predictions=predictions_only, truth=truth_only, @@ -86,8 +104,7 @@ def run(dataset, config): models_count=num_models_trained, training_duration=training.duration, predict_duration=predict.duration, - quantiles=predictions.drop(columns=['mean'])) - + optional_columns=optional_columns) def load_data(train_path, test_path, timestamp_column, id_column): diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py index 70b5a3be0..c596e01c5 100644 --- a/frameworks/shared/callee.py +++ b/frameworks/shared/callee.py @@ -17,12 +17,12 @@ class FrameworkError(Exception): def result(output_file=None, predictions=None, truth=None, probabilities=None, probabilities_labels=None, + optional_columns=None, target_is_encoded=False, error_message=None, models_count=None, training_duration=None, predict_duration=None, - quantiles=None, **others): return locals() @@ -70,7 +70,7 @@ def load_data(name, path, **_): wait_retry_secs=10): result = run_fn(ds, config) res = dict(result) - for name in ['predictions', 'truth', 'probabilities', 'quantiles']: + for name in ['predictions', 'truth', 'probabilities', 'optional_columns']: arr = result[name] if arr is not None: path = os.path.join(config.result_dir, '.'.join([name, 'data'])) diff --git a/frameworks/shared/caller.py b/frameworks/shared/caller.py index 68963a820..09654dc32 100644 --- a/frameworks/shared/caller.py +++ b/frameworks/shared/caller.py @@ -149,7 +149,7 @@ def run_in_venv(caller_file, script_file: str, *args, if res.error_message is not None: raise NoResultError(res.error_message) - for name in ['predictions', 'truth', 'probabilities', 'quantiles']: + for name in ['predictions', 'truth', 'probabilities', 'optional_columns']: res[name] = deserialize_data(res[name], config=ser_config) if res[name] is not None else None if callable(process_results): @@ -164,8 +164,8 @@ def run_in_venv(caller_file, script_file: str, *args, else dataset.test.y), probabilities=res.probabilities, probabilities_labels=res.probabilities_labels, - target_is_encoded=res.target_is_encoded, - quantiles=res.quantiles) + optional_columns=res.optional_columns, + target_is_encoded=res.target_is_encoded) return dict( models_count=res.models_count if res.models_count is not None else 1, From 1f7c5748ac90d401100f2f998ac9c98c65f23136 Mon Sep 17 00:00:00 2001 From: sommerle Date: Wed, 5 Oct 2022 16:22:04 +0000 Subject: [PATCH 24/30] remove whitespaces --- amlb/benchmark.py | 1 - amlb/datasets/file.py | 3 --- 2 files changed, 4 deletions(-) diff --git a/amlb/benchmark.py b/amlb/benchmark.py index 39458efe5..7c54a344c 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -492,7 +492,6 @@ def load_data(self): dataset_name_and_config = copy(self._task_def.dataset) dataset_name_and_config.name = self._task_def.name self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=dataset_name_and_config, fold=self.fold) - else: raise ValueError("Tasks should have one property among [openml_task_id, openml_dataset_id, dataset].") diff --git a/amlb/datasets/file.py b/amlb/datasets/file.py index c7154a76b..77d6474e0 100644 --- a/amlb/datasets/file.py +++ b/amlb/datasets/file.py @@ -54,12 +54,9 @@ def load(self, dataset, fold=0): if DatasetType[dataset['type']] == DatasetType.timeseries and dataset['timestamp_column'] is None: log.warning("Warning: For timeseries task setting undefined timestamp column to `timestamp`.") dataset['timestamp_column'] = "timestamp" - csv_dataset = CsvDataset(train_path, test_path, target=target, features=features, type=type_, timestamp_column=dataset['timestamp_column'] if 'timestamp_column' in dataset else None) - if csv_dataset.type == DatasetType.timeseries: csv_dataset = self.extend_dataset_with_timeseries_config(csv_dataset, dataset) - return csv_dataset else: raise ValueError(f"Unsupported file type: {ext}") From 79e54c93dd9223e1fa91c77f2f15054c89552964 Mon Sep 17 00:00:00 2001 From: sommerle Date: Thu, 6 Oct 2022 09:44:29 +0000 Subject: [PATCH 25/30] deleting merge artifacts --- frameworks/AutoGluonTS/README.md | 16 ------------- frameworks/AutoGluonTS/__init__.py | 38 ------------------------------ 2 files changed, 54 deletions(-) delete mode 100644 frameworks/AutoGluonTS/README.md delete mode 100644 frameworks/AutoGluonTS/__init__.py diff --git a/frameworks/AutoGluonTS/README.md b/frameworks/AutoGluonTS/README.md deleted file mode 100644 index 938b459c4..000000000 --- a/frameworks/AutoGluonTS/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# AutoGluonTS - -AutoGluonTS stands for autogluon.timeseries. This framework handles time series problems. - -This code is currently a prototype, since time series support is not fully defined in AutoMLBenchmark yet. -Consider the code a proof of concept. - -## Run Steps - -To run AutoGluonTS in AutoMLBenchmark on the covid dataset from the AutoGluon tutorial, do the following: - -1. Create a fresh Python environment -2. Follow automlbenchmark install instructions -3. Run the following command in terminal: ```python3 ../automlbenchmark/runbenchmark.py autogluonts ts test``` - -To run mainline AutoGluonTS instead of v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest ts test``` diff --git a/frameworks/AutoGluonTS/__init__.py b/frameworks/AutoGluonTS/__init__.py deleted file mode 100644 index 70283c3e5..000000000 --- a/frameworks/AutoGluonTS/__init__.py +++ /dev/null @@ -1,38 +0,0 @@ -from amlb.benchmark import TaskConfig -from amlb.data import Dataset, DatasetType -from amlb.utils import call_script_in_same_dir - - -def setup(*args, **kwargs): - call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs) - - -def run(dataset: Dataset, config: TaskConfig): - from frameworks.shared.caller import run_in_venv - - if hasattr(dataset, 'timestamp_column') is False: - dataset.timestamp_column = None - if hasattr(dataset, 'id_column') is False: - dataset.id_column = None - if hasattr(dataset, 'prediction_length') is False: - raise AttributeError("Unspecified `prediction_length`.") - if dataset.type is not DatasetType.timeseries: - raise ValueError("AutoGluonTS only supports timeseries.") - - data = dict( - # train=dict(path=dataset.train.data_path('parquet')), - # test=dict(path=dataset.test.data_path('parquet')), - train=dict(path=dataset.train.path), - test=dict(path=dataset.test.path), - target=dict( - name=dataset.target.name, - classes=dataset.target.values - ), - problem_type=dataset.type.name, # AutoGluon problem_type is using same names as amlb.data.DatasetType - timestamp_column=dataset.timestamp_column, - id_column=dataset.id_column, - prediction_length=dataset.prediction_length - ) - - return run_in_venv(__file__, "exec.py", - input_data=data, dataset=dataset, config=config) From 6a251707a710b3af0de149d13ef9a3b2c8ceb800 Mon Sep 17 00:00:00 2001 From: sommerle Date: Thu, 6 Oct 2022 09:48:03 +0000 Subject: [PATCH 26/30] delete merge artifacts --- resources/benchmarks/ts.yaml | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 resources/benchmarks/ts.yaml diff --git a/resources/benchmarks/ts.yaml b/resources/benchmarks/ts.yaml deleted file mode 100644 index 0a73c81fb..000000000 --- a/resources/benchmarks/ts.yaml +++ /dev/null @@ -1,15 +0,0 @@ ---- - -- name: covid - dataset: - train: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv - # s3://autogluon-ts-bench/data/covid_deaths/csv/train.csv | https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv - test: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv - # s3://autogluon-ts-bench/data/covid_deaths/csv/test.csv | https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv - target: ConfirmedCases # target | ConfirmedCases - type: timeseries - prediction_length: 30 - id_column: name # item_id | name - timestamp_column: Date # timestamp | Date - - folds: 1 From 928c2cf4bde2f9119768f9a7602c2155f2e89c50 Mon Sep 17 00:00:00 2001 From: sommerle Date: Thu, 6 Oct 2022 10:23:05 +0000 Subject: [PATCH 27/30] renaming prediction_length to forecast_range_in_steps --- amlb/datasets/file.py | 24 ++++++++++++------------ frameworks/AutoGluon/__init__.py | 6 +++--- frameworks/AutoGluon/exec_ts.py | 6 +++--- resources/benchmarks/timeseries.yaml | 24 ++++++++++++------------ 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/amlb/datasets/file.py b/amlb/datasets/file.py index 77d6474e0..4c7ba9b34 100644 --- a/amlb/datasets/file.py +++ b/amlb/datasets/file.py @@ -142,26 +142,26 @@ def extend_dataset_with_timeseries_config(self, dataset, dataset_config): if dataset_config['id_column'] is None: log.warning("Warning: For timeseries task setting undefined itemid column to `item_id`.") dataset_config['id_column'] = "item_id" - if dataset_config['prediction_length'] is None: - log.warning("Warning: For timeseries task setting undefined prediction length to `1`.") - dataset_config['prediction_length'] = "1" + if dataset_config['forecast_range_in_steps'] is None: + log.warning("Warning: For timeseries task setting undefined forecast_range_in_steps to `1`.") + dataset_config['forecast_range_in_steps'] = "1" dataset.timestamp_column=dataset_config['timestamp_column'] dataset.id_column=dataset_config['id_column'] - dataset.prediction_length=dataset_config['prediction_length'] + dataset.forecast_range_in_steps=int(dataset_config['forecast_range_in_steps']) train_seqs_lengths = dataset.train.X.groupby(dataset.id_column).count() test_seqs_lengths = dataset.test.X.groupby(dataset.id_column).count() - prediction_length_max_diff_train_test = int((test_seqs_lengths - train_seqs_lengths).mean()) - prediction_length_max_min_train_test = int(min(int(test_seqs_lengths.min()), int(train_seqs_lengths.min()))) - 1 - if not dataset.prediction_length == prediction_length_max_diff_train_test: - log.warning("Warning: Prediction length {}, does not equal difference between test and train sequence lengths {}.".format(dataset.prediction_length, prediction_length_max_diff_train_test)) + forecast_range_in_steps_max_diff_train_test = int((test_seqs_lengths - train_seqs_lengths).mean()) + forecast_range_in_steps_max_min_train_test = int(min(int(test_seqs_lengths.min()), int(train_seqs_lengths.min()))) - 1 + if not dataset.forecast_range_in_steps == forecast_range_in_steps_max_diff_train_test: + log.warning("Warning: Forecast range {}, does not equal difference between test and train sequence lengths {}.".format(dataset.forecast_range_in_steps, forecast_range_in_steps_max_diff_train_test)) if not (test_seqs_lengths - train_seqs_lengths).var().item() == 0.: raise ValueError("Error: Not all sequences of train and test set have same sequence length difference.") - if dataset.prediction_length > prediction_length_max_diff_train_test: - raise ValueError("Error: Prediction length {} longer than at least one difference between train and test sequence length.") - if dataset.prediction_length > prediction_length_max_min_train_test: - raise ValueError("Error: Prediction length {} longer than minimum sequence length + 1.".format()) + if dataset.forecast_range_in_steps > forecast_range_in_steps_max_diff_train_test: + raise ValueError("Error: Forecast range {} longer than at least one difference between train and test sequence length.") + if dataset.forecast_range_in_steps > forecast_range_in_steps_max_min_train_test: + raise ValueError("Error: Forecast range {} longer than minimum sequence length + 1.".format()) return dataset diff --git a/frameworks/AutoGluon/__init__.py b/frameworks/AutoGluon/__init__.py index bee3b99a2..025f16590 100644 --- a/frameworks/AutoGluon/__init__.py +++ b/frameworks/AutoGluon/__init__.py @@ -30,8 +30,8 @@ def run(dataset: Dataset, config: TaskConfig): dataset.timestamp_column = None if not hasattr(dataset, 'id_column'): dataset.id_column = None - if not hasattr(dataset, 'prediction_length'): - raise AttributeError("Unspecified `prediction_length`.") + if not hasattr(dataset, 'forecast_range_in_steps'): + raise AttributeError("Unspecified `forecast_range_in_steps`.") data = dict( # train=dict(path=dataset.train.data_path('parquet')), @@ -45,7 +45,7 @@ def run(dataset: Dataset, config: TaskConfig): problem_type=dataset.type.name, # AutoGluon problem_type is using same names as amlb.data.DatasetType timestamp_column=dataset.timestamp_column, id_column=dataset.id_column, - prediction_length=dataset.prediction_length + forecast_range_in_steps=dataset.forecast_range_in_steps ) exec_file = "exec_ts.py" diff --git a/frameworks/AutoGluon/exec_ts.py b/frameworks/AutoGluon/exec_ts.py index 6ec5a77ae..ac1b65b6b 100644 --- a/frameworks/AutoGluon/exec_ts.py +++ b/frameworks/AutoGluon/exec_ts.py @@ -28,7 +28,7 @@ def run(dataset, config): timestamp_column = dataset.timestamp_column id_column = dataset.id_column - prediction_length = dataset.prediction_length + prediction_length = dataset.forecast_range_in_steps eval_metric = get_eval_metric(config) label = dataset.target.name @@ -86,11 +86,11 @@ def run(dataset, config): #dataset..X /. y item_ids, inverse_item_ids = np.unique(test_data.reset_index()[dataset.id_column].squeeze().to_numpy(), return_index=False, return_inverse=True) # 2. capture sequences in a list - y_past = [test_data[label].squeeze().to_numpy()[inverse_item_ids == i][:-dataset.prediction_length] for i in range(len(item_ids))] + y_past = [test_data[label].squeeze().to_numpy()[inverse_item_ids == i][:-prediction_length] for i in range(len(item_ids))] # 3. calculate period error per sequence y_past_period_error = [np.abs(y_past_item[period_length:] - y_past_item[:-period_length]).mean() for y_past_item in y_past] # 4. repeat period error for each sequence, to save one for each element - y_past_period_error_rep = np.repeat(y_past_period_error, dataset.prediction_length) + y_past_period_error_rep = np.repeat(y_past_period_error, prediction_length) optional_columns = quantiles optional_columns = optional_columns.assign(y_past_period_error=y_past_period_error_rep) diff --git a/resources/benchmarks/timeseries.yaml b/resources/benchmarks/timeseries.yaml index b800e8bf8..a6cd3b51e 100644 --- a/resources/benchmarks/timeseries.yaml +++ b/resources/benchmarks/timeseries.yaml @@ -5,7 +5,7 @@ train: s3://autogluon-ts-bench/data/covid_deaths/csv/train.csv test: s3://autogluon-ts-bench/data/covid_deaths/csv/test.csv type: timeseries - prediction_length: 30 + forecast_range_in_steps: 30 folds: 1 - name: hospital @@ -13,7 +13,7 @@ train: s3://autogluon-ts-bench/data/hospital/csv/train.csv test: s3://autogluon-ts-bench/data/hospital/csv/test.csv type: timeseries - prediction_length: 12 + forecast_range_in_steps: 12 folds: 1 - name: kdd_2018 @@ -21,7 +21,7 @@ train: s3://autogluon-ts-bench/data/kdd_2018/csv/train.csv test: s3://autogluon-ts-bench/data/kdd_2018/csv/test.csv type: timeseries - prediction_length: 48 + forecast_range_in_steps: 48 folds: 1 - name: m3_monthly @@ -29,7 +29,7 @@ train: s3://autogluon-ts-bench/data/m3_monthly/csv/train.csv test: s3://autogluon-ts-bench/data/m3_monthly/csv/test.csv type: timeseries - prediction_length: 18 + forecast_range_in_steps: 18 folds: 1 - name: m3_other @@ -37,7 +37,7 @@ train: s3://autogluon-ts-bench/data/m3_other/csv/train.csv test: s3://autogluon-ts-bench/data/m3_other/csv/test.csv type: timeseries - prediction_length: 8 + forecast_range_in_steps: 8 folds: 1 - name: m3_quarterly @@ -45,7 +45,7 @@ train: s3://autogluon-ts-bench/data/m3_quarterly/csv/train.csv test: s3://autogluon-ts-bench/data/m3_quarterly/csv/test.csv type: timeseries - prediction_length: 8 + forecast_range_in_steps: 8 folds: 1 - name: m4_hourly @@ -53,7 +53,7 @@ train: s3://autogluon-ts-bench/data/m4_hourly/csv/train.csv test: s3://autogluon-ts-bench/data/m4_hourly/csv/test.csv type: timeseries - prediction_length: 48 + forecast_range_in_steps: 48 folds: 1 - name: m4_weekly @@ -61,7 +61,7 @@ train: s3://autogluon-ts-bench/data/m4_weekly/csv/train.csv test: s3://autogluon-ts-bench/data/m4_weekly/csv/test.csv type: timeseries - prediction_length: 13 + forecast_range_in_steps: 13 folds: 1 - name: nn5 @@ -69,7 +69,7 @@ train: s3://autogluon-ts-bench/data/nn5/csv/train.csv test: s3://autogluon-ts-bench/data/nn5/csv/test.csv type: timeseries - prediction_length: 56 + forecast_range_in_steps: 56 folds: 1 - name: solar @@ -77,7 +77,7 @@ train: s3://autogluon-ts-bench/data/solar/csv/train.csv test: s3://autogluon-ts-bench/data/solar/csv/test.csv type: timeseries - prediction_length: 24 + forecast_range_in_steps: 24 folds: 1 - name: tourism_monthly @@ -85,7 +85,7 @@ train: s3://autogluon-ts-bench/data/tourism_monthly/csv/train.csv test: s3://autogluon-ts-bench/data/tourism_monthly/csv/test.csv type: timeseries - prediction_length: 24 + forecast_range_in_steps: 24 folds: 1 - name: tourism_quarterly @@ -93,5 +93,5 @@ train: s3://autogluon-ts-bench/data/tourism_quarterly/csv/train.csv test: s3://autogluon-ts-bench/data/tourism_quarterly/csv/test.csv type: timeseries - prediction_length: 8 + forecast_range_in_steps: 8 folds: 1 From 47d311c3a09db52389ca0f87e8ae4d46cd00e501 Mon Sep 17 00:00:00 2001 From: sommerle Date: Thu, 6 Oct 2022 10:59:30 +0000 Subject: [PATCH 28/30] use public dataset, reduced range to maximum --- resources/benchmarks/timeseries.yaml | 98 ++-------------------------- 1 file changed, 7 insertions(+), 91 deletions(-) diff --git a/resources/benchmarks/timeseries.yaml b/resources/benchmarks/timeseries.yaml index a6cd3b51e..26af06497 100644 --- a/resources/benchmarks/timeseries.yaml +++ b/resources/benchmarks/timeseries.yaml @@ -1,97 +1,13 @@ --- -- name: covid_deaths +- name: covid dataset: - train: s3://autogluon-ts-bench/data/covid_deaths/csv/train.csv - test: s3://autogluon-ts-bench/data/covid_deaths/csv/test.csv + train: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv + test: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv + target: ConfirmedCases type: timeseries - forecast_range_in_steps: 30 - folds: 1 - -- name: hospital - dataset: - train: s3://autogluon-ts-bench/data/hospital/csv/train.csv - test: s3://autogluon-ts-bench/data/hospital/csv/test.csv - type: timeseries - forecast_range_in_steps: 12 - folds: 1 - -- name: kdd_2018 - dataset: - train: s3://autogluon-ts-bench/data/kdd_2018/csv/train.csv - test: s3://autogluon-ts-bench/data/kdd_2018/csv/test.csv - type: timeseries - forecast_range_in_steps: 48 - folds: 1 - -- name: m3_monthly - dataset: - train: s3://autogluon-ts-bench/data/m3_monthly/csv/train.csv - test: s3://autogluon-ts-bench/data/m3_monthly/csv/test.csv - type: timeseries - forecast_range_in_steps: 18 - folds: 1 - -- name: m3_other - dataset: - train: s3://autogluon-ts-bench/data/m3_other/csv/train.csv - test: s3://autogluon-ts-bench/data/m3_other/csv/test.csv - type: timeseries - forecast_range_in_steps: 8 - folds: 1 + forecast_range_in_steps: 19 + id_column: name + timestamp_column: Date -- name: m3_quarterly - dataset: - train: s3://autogluon-ts-bench/data/m3_quarterly/csv/train.csv - test: s3://autogluon-ts-bench/data/m3_quarterly/csv/test.csv - type: timeseries - forecast_range_in_steps: 8 - folds: 1 - -- name: m4_hourly - dataset: - train: s3://autogluon-ts-bench/data/m4_hourly/csv/train.csv - test: s3://autogluon-ts-bench/data/m4_hourly/csv/test.csv - type: timeseries - forecast_range_in_steps: 48 - folds: 1 - -- name: m4_weekly - dataset: - train: s3://autogluon-ts-bench/data/m4_weekly/csv/train.csv - test: s3://autogluon-ts-bench/data/m4_weekly/csv/test.csv - type: timeseries - forecast_range_in_steps: 13 - folds: 1 - -- name: nn5 - dataset: - train: s3://autogluon-ts-bench/data/nn5/csv/train.csv - test: s3://autogluon-ts-bench/data/nn5/csv/test.csv - type: timeseries - forecast_range_in_steps: 56 - folds: 1 - -- name: solar - dataset: - train: s3://autogluon-ts-bench/data/solar/csv/train.csv - test: s3://autogluon-ts-bench/data/solar/csv/test.csv - type: timeseries - forecast_range_in_steps: 24 - folds: 1 - -- name: tourism_monthly - dataset: - train: s3://autogluon-ts-bench/data/tourism_monthly/csv/train.csv - test: s3://autogluon-ts-bench/data/tourism_monthly/csv/test.csv - type: timeseries - forecast_range_in_steps: 24 - folds: 1 - -- name: tourism_quarterly - dataset: - train: s3://autogluon-ts-bench/data/tourism_quarterly/csv/train.csv - test: s3://autogluon-ts-bench/data/tourism_quarterly/csv/test.csv - type: timeseries - forecast_range_in_steps: 8 folds: 1 From b244e9c9d02cc7c2d2f9c0d1841c72375c4608fd Mon Sep 17 00:00:00 2001 From: sommerle Date: Thu, 6 Oct 2022 11:00:22 +0000 Subject: [PATCH 29/30] fix format string works --- amlb/datasets/file.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/amlb/datasets/file.py b/amlb/datasets/file.py index 4c7ba9b34..ffcfa8094 100644 --- a/amlb/datasets/file.py +++ b/amlb/datasets/file.py @@ -152,16 +152,20 @@ def extend_dataset_with_timeseries_config(self, dataset, dataset_config): train_seqs_lengths = dataset.train.X.groupby(dataset.id_column).count() test_seqs_lengths = dataset.test.X.groupby(dataset.id_column).count() - forecast_range_in_steps_max_diff_train_test = int((test_seqs_lengths - train_seqs_lengths).mean()) + forecast_range_in_steps_mean_diff_train_test = int((test_seqs_lengths - train_seqs_lengths).mean()) forecast_range_in_steps_max_min_train_test = int(min(int(test_seqs_lengths.min()), int(train_seqs_lengths.min()))) - 1 - if not dataset.forecast_range_in_steps == forecast_range_in_steps_max_diff_train_test: - log.warning("Warning: Forecast range {}, does not equal difference between test and train sequence lengths {}.".format(dataset.forecast_range_in_steps, forecast_range_in_steps_max_diff_train_test)) + if not dataset.forecast_range_in_steps == forecast_range_in_steps_mean_diff_train_test: + msg = f"Warning: Forecast range {dataset.forecast_range_in_steps}, does not equal mean difference between test and train sequence lengths {forecast_range_in_steps_mean_diff_train_test}." + log.warning(msg) if not (test_seqs_lengths - train_seqs_lengths).var().item() == 0.: - raise ValueError("Error: Not all sequences of train and test set have same sequence length difference.") - if dataset.forecast_range_in_steps > forecast_range_in_steps_max_diff_train_test: - raise ValueError("Error: Forecast range {} longer than at least one difference between train and test sequence length.") + msg = f"Error: Not all sequences of train and test set have same sequence length difference." + raise ValueError(msg) + if dataset.forecast_range_in_steps > forecast_range_in_steps_mean_diff_train_test: + msg = f"Error: Forecast range {dataset.forecast_range_in_steps} longer than difference between test and train sequence lengths {forecast_range_in_steps_mean_diff_train_test}." + raise ValueError(msg) if dataset.forecast_range_in_steps > forecast_range_in_steps_max_min_train_test: - raise ValueError("Error: Forecast range {} longer than minimum sequence length + 1.".format()) + msg = f"Error: Forecast range {dataset.forecast_range_in_steps} longer than minimum sequence length + 1, {forecast_range_in_steps_max_min_train_test}." + raise ValueError(msg) return dataset From 3074f4212ce90af0f8dc4269ff1388aae21b57c9 Mon Sep 17 00:00:00 2001 From: sommerle Date: Thu, 6 Oct 2022 11:57:33 +0000 Subject: [PATCH 30/30] fix key error bug, remove magic time limit --- frameworks/AutoGluon/exec_ts.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/frameworks/AutoGluon/exec_ts.py b/frameworks/AutoGluon/exec_ts.py index ac1b65b6b..ab7c4110f 100644 --- a/frameworks/AutoGluon/exec_ts.py +++ b/frameworks/AutoGluon/exec_ts.py @@ -33,7 +33,6 @@ def run(dataset, config): eval_metric = get_eval_metric(config) label = dataset.target.name time_limit = config.max_runtime_seconds - time_limit = 10. training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')} @@ -84,7 +83,7 @@ def run(dataset, config): # we aim to calculate the mean period error from the past for each sequence: 1/N sum_{i=1}^N |x(t_i) - x(t_i - T)| # 1. retrieve item_ids for each sequence/item #dataset..X /. y - item_ids, inverse_item_ids = np.unique(test_data.reset_index()[dataset.id_column].squeeze().to_numpy(), return_index=False, return_inverse=True) + item_ids, inverse_item_ids = np.unique(test_data.reset_index()["item_id"].squeeze().to_numpy(), return_index=False, return_inverse=True) # 2. capture sequences in a list y_past = [test_data[label].squeeze().to_numpy()[inverse_item_ids == i][:-prediction_length] for i in range(len(item_ids))] # 3. calculate period error per sequence