diff --git a/amlb/benchmark.py b/amlb/benchmark.py
index 4ed79d6a1..7c54a344c 100644
--- a/amlb/benchmark.py
+++ b/amlb/benchmark.py
@@ -489,20 +489,9 @@ def load_data(self):
             # TODO
             raise NotImplementedError("OpenML datasets without task_id are not supported yet.")
         elif hasattr(self._task_def, 'dataset'):
-            if self._task_def.dataset['type'] == 'timeseries' and self._task_def.dataset['timestamp_column'] is None:
-                log.warning("Warning: For timeseries task setting undefined timestamp column to `timestamp`.")
-                self._task_def.dataset['timestamp_column'] = "timestamp"
-            self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=self._task_def.dataset, fold=self.fold, timestamp_column=self._task_def.dataset['timestamp_column'])
-            if self._dataset.type == DatasetType.timeseries:
-                if self._task_def.dataset['id_column'] is None:
-                    log.warning("Warning: For timeseries task setting undefined itemid column to `item_id`.")
-                    self._task_def.dataset['id_column'] = "item_id"
-                if self._task_def.dataset['prediction_length'] is None:
-                    log.warning("Warning: For timeseries task setting undefined prediction length to `1`.")
-                    self._task_def.dataset['prediction_length'] = "1"
-                self._dataset.timestamp_column=self._task_def.dataset['timestamp_column']
-                self._dataset.id_column=self._task_def.dataset['id_column']
-                self._dataset.prediction_length=self._task_def.dataset['prediction_length']
+            dataset_name_and_config = copy(self._task_def.dataset)
+            dataset_name_and_config.name = self._task_def.name
+            self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=dataset_name_and_config, fold=self.fold)
         else:
             raise ValueError("Tasks should have one property among [openml_task_id, openml_dataset_id, dataset].")
 
diff --git a/amlb/datasets/file.py b/amlb/datasets/file.py
index 0bfa9453b..ffcfa8094 100644
--- a/amlb/datasets/file.py
+++ b/amlb/datasets/file.py
@@ -30,10 +30,10 @@ def __init__(self, cache_dir=None):
         self._cache_dir = cache_dir if cache_dir else tempfile.mkdtemp(prefix='amlb_cache')
 
     @profile(logger=log)
-    def load(self, dataset, fold=0, timestamp_column=None):
+    def load(self, dataset, fold=0):
         dataset = dataset if isinstance(dataset, ns) else ns(path=dataset)
         log.debug("Loading dataset %s", dataset)
-        paths = self._extract_train_test_paths(dataset.path if 'path' in dataset else dataset, fold=fold)
+        paths = self._extract_train_test_paths(dataset.path if 'path' in dataset else dataset, fold=fold, name=dataset['name'] if 'name' in dataset else None)
         assert fold < len(paths['train']), f"No training dataset available for fold {fold} among dataset files {paths['train']}"
         # seed = rget().seed(fold)
         # if len(paths['test']) == 0:
@@ -51,21 +51,27 @@ def load(self, dataset, fold=0, timestamp_column=None):
         if ext == '.arff':
             return ArffDataset(train_path, test_path, target=target, features=features, type=type_)
         elif ext == '.csv':
-            return CsvDataset(train_path, test_path, target=target, features=features, type=type_, timestamp_column=timestamp_column)
+            if DatasetType[dataset['type']] == DatasetType.timeseries and dataset['timestamp_column'] is None:
+                log.warning("Warning: For timeseries task setting undefined timestamp column to `timestamp`.")
+                dataset['timestamp_column'] = "timestamp"
+            csv_dataset = CsvDataset(train_path, test_path, target=target, features=features, type=type_, timestamp_column=dataset['timestamp_column'] if 'timestamp_column' in dataset else None)
+            if csv_dataset.type == DatasetType.timeseries:
+                csv_dataset = self.extend_dataset_with_timeseries_config(csv_dataset, dataset)
+            return csv_dataset
         else:
             raise ValueError(f"Unsupported file type: {ext}")
 
-    def _extract_train_test_paths(self, dataset, fold=None):
+    def _extract_train_test_paths(self, dataset, fold=None, name=None):
         if isinstance(dataset, (tuple, list)):
             assert len(dataset) % 2 == 0, "dataset list must contain an even number of paths: [train_0, test_0, train_1, test_1, ...]."
             return self._extract_train_test_paths(ns(train=[p for i, p in enumerate(dataset) if i % 2 == 0],
                                                      test=[p for i, p in enumerate(dataset) if i % 2 == 1]),
-                                                  fold=fold)
+                                                  fold=fold, name=name)
         elif isinstance(dataset, ns):
-            return dict(train=[self._extract_train_test_paths(p)['train'][0]
+            return dict(train=[self._extract_train_test_paths(p, name=name)['train'][0]
                                if i == fold else None
                                for i, p in enumerate(as_list(dataset.train))],
-                        test=[self._extract_train_test_paths(p)['train'][0]
+                        test=[self._extract_train_test_paths(p, name=name)['train'][0]
                               if i == fold else None
                               for i, p in enumerate(as_list(dataset.test))])
         else:
@@ -116,7 +122,10 @@ def _extract_train_test_paths(self, dataset, fold=None):
                 assert len(paths) > 0, f"No dataset file found in {dataset}: they should follow the naming xxxx_train.ext, xxxx_test.ext or xxxx_train_0.ext, xxxx_test_0.ext, xxxx_train_1.ext, ..."
                 return paths
         elif is_valid_url(dataset):
-            cached_file = os.path.join(self._cache_dir, os.path.basename(dataset))
+            if name is None:
+                cached_file = os.path.join(self._cache_dir, os.path.basename(dataset))
+            else:
+                cached_file = os.path.join(self._cache_dir, name, os.path.basename(dataset))
             if not os.path.exists(cached_file):  # don't download if previously done
                 handler = get_file_handler(dataset)
                 assert handler.exists(dataset), f"Invalid path/url: {dataset}"
@@ -129,6 +138,38 @@ def __repr__(self):
         return repr_def(self)
 
 
+    def extend_dataset_with_timeseries_config(self, dataset, dataset_config):
+        if dataset_config['id_column'] is None:
+            log.warning("Warning: For timeseries task setting undefined itemid column to `item_id`.")
+            dataset_config['id_column'] = "item_id"
+        if dataset_config['forecast_range_in_steps'] is None:
+            log.warning("Warning: For timeseries task setting undefined forecast_range_in_steps to `1`.")
+            dataset_config['forecast_range_in_steps'] = "1"
+
+        dataset.timestamp_column=dataset_config['timestamp_column']
+        dataset.id_column=dataset_config['id_column']
+        dataset.forecast_range_in_steps=int(dataset_config['forecast_range_in_steps'])
+
+        train_seqs_lengths = dataset.train.X.groupby(dataset.id_column).count()
+        test_seqs_lengths = dataset.test.X.groupby(dataset.id_column).count()
+        forecast_range_in_steps_mean_diff_train_test = int((test_seqs_lengths - train_seqs_lengths).mean())
+        forecast_range_in_steps_max_min_train_test = int(min(int(test_seqs_lengths.min()), int(train_seqs_lengths.min()))) - 1
+        if not dataset.forecast_range_in_steps == forecast_range_in_steps_mean_diff_train_test:
+            msg = f"Warning: Forecast range {dataset.forecast_range_in_steps}, does not equal mean difference between test and train sequence lengths {forecast_range_in_steps_mean_diff_train_test}."
+            log.warning(msg)
+        if not (test_seqs_lengths - train_seqs_lengths).var().item() == 0.:
+            msg = f"Error: Not all sequences of train and test set have same sequence length difference."
+            raise ValueError(msg)
+        if dataset.forecast_range_in_steps > forecast_range_in_steps_mean_diff_train_test:
+            msg = f"Error: Forecast range {dataset.forecast_range_in_steps} longer than difference between test and train sequence lengths {forecast_range_in_steps_mean_diff_train_test}."
+            raise ValueError(msg)
+        if dataset.forecast_range_in_steps > forecast_range_in_steps_max_min_train_test:
+            msg = f"Error: Forecast range {dataset.forecast_range_in_steps} longer than minimum sequence length + 1, {forecast_range_in_steps_max_min_train_test}."
+            raise ValueError(msg)
+        return dataset
+
+
+
 class FileDataset(Dataset):
 
     def __init__(self, train: Datasplit, test: Datasplit,
diff --git a/amlb/results.py b/amlb/results.py
index 91228ca4e..3887203f6 100644
--- a/amlb/results.py
+++ b/amlb/results.py
@@ -228,12 +228,13 @@ def load_predictions(predictions_file):
             try:
                 df = read_csv(predictions_file, dtype=object)
                 log.debug("Predictions preview:\n %s\n", df.head(10).to_string())
+
+                if rconfig().test_mode:
+                    TaskResult.validate_predictions(df)
+
                 if  'y_past_period_error' in df.columns:
                     return TimeSeriesResult(df)
                 else:
-                    if rconfig().test_mode:
-                        TaskResult.validate_predictions(df)
-
                     if df.shape[1] > 2:
                         return ClassificationResult(df)
                     else:
@@ -258,9 +259,9 @@ def load_metadata(metadata_file):
     def save_predictions(dataset: Dataset, output_file: str,
                          predictions: Union[A, DF, S] = None, truth: Union[A, DF, S] = None,
                          probabilities: Union[A, DF] = None, probabilities_labels: Union[list, A] = None,
+                         optional_columns: Union[A, DF] = None,
                          target_is_encoded: bool = False,
-                         preview: bool = True,
-                         quantiles: Union[A, DF] = None):
+                         preview: bool = True):
         """ Save class probabilities and predicted labels to file in csv format.
 
         :param dataset:
@@ -269,9 +270,9 @@ def save_predictions(dataset: Dataset, output_file: str,
         :param predictions:
         :param truth:
         :param probabilities_labels:
+        :param optional_columns:
         :param target_is_encoded:
         :param preview:
-        :param quantiles:
         :return: None
         """
         log.debug("Saving predictions to `%s`.", output_file)
@@ -315,23 +316,9 @@ def save_predictions(dataset: Dataset, output_file: str,
         df = df.assign(predictions=preds)
         df = df.assign(truth=truth)
 
-        if dataset.type == DatasetType.timeseries:
-            if quantiles is not None:
-                quantiles = quantiles.reset_index(drop=True)
-                df = pd.concat([df, quantiles], axis=1)
-
-            period_length = 1 # TODO: This period length could be adapted to the Dataset, but then we need to pass this information as well. As of now this works.
-
-            # we aim to calculate the mean period error from the past for each sequence: 1/N sum_{i=1}^N |x(t_i) - x(t_i - T)|
-            # 1. retrieve item_ids for each sequence/item
-            item_ids, inverse_item_ids = np.unique(dataset.test.X[dataset.id_column].squeeze().to_numpy(), return_index=False, return_inverse=True)
-            # 2. capture sequences in a list
-            y_past = [dataset.test.y.squeeze().to_numpy()[inverse_item_ids == i][:-dataset.prediction_length] for i in range(len(item_ids))]
-            # 3. calculate period error per sequence
-            y_past_period_error = [np.abs(y_past_item[period_length:] - y_past_item[:-period_length]).mean() for y_past_item in y_past]
-            # 4. repeat period error for each sequence, to save one for each element
-            y_past_period_error_rep = np.repeat(y_past_period_error, dataset.prediction_length)
-            df = df.assign(y_past_period_error=y_past_period_error_rep)
+        if optional_columns is not None:
+            df = pd.concat([df, optional_columns], axis=1)
+
         if preview:
             log.info("Predictions preview:\n %s\n", df.head(20).to_string())
         backup_file(output_file)
diff --git a/frameworks/AutoGluon/README.md b/frameworks/AutoGluon/README.md
new file mode 100644
index 000000000..51286533e
--- /dev/null
+++ b/frameworks/AutoGluon/README.md
@@ -0,0 +1,16 @@
+# AutoGluon
+
+To run v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluon ...```
+
+To run mainline: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest ...```
+
+
+# AutoGluonTS
+
+AutoGluonTS stands for autogluon.timeseries. This framework handles time series problems.
+
+## Run Steps
+
+To run v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluonts timeseries ...```
+
+To run mainline: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest timeseries ...```
diff --git a/frameworks/AutoGluon/__init__.py b/frameworks/AutoGluon/__init__.py
index be2c15147..025f16590 100644
--- a/frameworks/AutoGluon/__init__.py
+++ b/frameworks/AutoGluon/__init__.py
@@ -1,25 +1,53 @@
-from amlb.benchmark import TaskConfig
-from amlb.data import Dataset
+
 from amlb.utils import call_script_in_same_dir
+from amlb.benchmark import TaskConfig
+from amlb.data import Dataset, DatasetType
+from copy import deepcopy
 
 
 def setup(*args, **kwargs):
     call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs)
 
-
 def run(dataset: Dataset, config: TaskConfig):
     from frameworks.shared.caller import run_in_venv
 
-    data = dict(
-        train=dict(path=dataset.train.data_path('parquet')),
-        test=dict(path=dataset.test.data_path('parquet')),
-        target=dict(
-            name=dataset.target.name,
-            classes=dataset.target.values
-        ),
-        problem_type=dataset.type.name  # AutoGluon problem_type is using same names as amlb.data.DatasetType
-    )
-
-    return run_in_venv(__file__, "exec.py",
-                       input_data=data, dataset=dataset, config=config)
+    if dataset.type is not DatasetType.timeseries:
 
+        data = dict(
+            train=dict(path=dataset.train.data_path('parquet')),
+            test=dict(path=dataset.test.data_path('parquet')),
+            target=dict(
+                name=dataset.target.name,
+                classes=dataset.target.values
+            ),
+            problem_type=dataset.type.name  # AutoGluon problem_type is using same names as amlb.data.DatasetType
+        )
+        exec_file = "exec.py"
+
+    else:
+        dataset = deepcopy(dataset)
+        if not hasattr(dataset, 'timestamp_column'):
+            dataset.timestamp_column = None
+        if not hasattr(dataset, 'id_column'):
+            dataset.id_column = None
+        if not hasattr(dataset, 'forecast_range_in_steps'):
+            raise AttributeError("Unspecified `forecast_range_in_steps`.")
+
+        data = dict(
+            # train=dict(path=dataset.train.data_path('parquet')),
+            # test=dict(path=dataset.test.data_path('parquet')),
+            train=dict(path=dataset.train.path),
+            test=dict(path=dataset.test.path),
+            target=dict(
+                name=dataset.target.name,
+                classes=dataset.target.values
+            ),
+            problem_type=dataset.type.name,  # AutoGluon problem_type is using same names as amlb.data.DatasetType
+            timestamp_column=dataset.timestamp_column,
+            id_column=dataset.id_column,
+            forecast_range_in_steps=dataset.forecast_range_in_steps
+        )
+        exec_file = "exec_ts.py"
+
+    return run_in_venv(__file__, exec_file,
+                       input_data=data, dataset=dataset, config=config)
diff --git a/frameworks/AutoGluonTS/exec.py b/frameworks/AutoGluon/exec_ts.py
similarity index 78%
rename from frameworks/AutoGluonTS/exec.py
rename to frameworks/AutoGluon/exec_ts.py
index 87e7f44f3..ab7c4110f 100644
--- a/frameworks/AutoGluonTS/exec.py
+++ b/frameworks/AutoGluon/exec_ts.py
@@ -4,6 +4,7 @@
 import warnings
 import sys
 import tempfile
+import numpy as np
 warnings.simplefilter("ignore")
 
 if sys.platform == 'darwin':
@@ -27,7 +28,7 @@ def run(dataset, config):
 
     timestamp_column = dataset.timestamp_column
     id_column = dataset.id_column
-    prediction_length = dataset.prediction_length
+    prediction_length = dataset.forecast_range_in_steps
 
     eval_metric = get_eval_metric(config)
     label = dataset.target.name
@@ -76,6 +77,23 @@ def run(dataset, config):
     save_artifacts(predictor=predictor, leaderboard=leaderboard, config=config)
     shutil.rmtree(predictor.path, ignore_errors=True)
 
+    quantiles = predictions.drop(columns=['mean']).reset_index(drop=True)
+    period_length = 1 # TODO: This period length could be adapted to the Dataset, but then we need to pass this information as well. As of now this works.
+
+    # we aim to calculate the mean period error from the past for each sequence: 1/N sum_{i=1}^N |x(t_i) - x(t_i - T)|
+    # 1. retrieve item_ids for each sequence/item
+    #dataset..X /. y
+    item_ids, inverse_item_ids = np.unique(test_data.reset_index()["item_id"].squeeze().to_numpy(), return_index=False, return_inverse=True)
+    # 2. capture sequences in a list
+    y_past = [test_data[label].squeeze().to_numpy()[inverse_item_ids == i][:-prediction_length] for i in range(len(item_ids))]
+    # 3. calculate period error per sequence
+    y_past_period_error = [np.abs(y_past_item[period_length:] - y_past_item[:-period_length]).mean() for y_past_item in y_past]
+    # 4. repeat period error for each sequence, to save one for each element
+    y_past_period_error_rep = np.repeat(y_past_period_error, prediction_length)
+
+    optional_columns = quantiles
+    optional_columns = optional_columns.assign(y_past_period_error=y_past_period_error_rep)
+
     return result(output_file=config.output_predictions_file,
                   predictions=predictions_only,
                   truth=truth_only,
@@ -85,8 +103,7 @@ def run(dataset, config):
                   models_count=num_models_trained,
                   training_duration=training.duration,
                   predict_duration=predict.duration,
-                  quantiles=predictions.drop(columns=['mean']))
-
+                  optional_columns=optional_columns)
 
 def load_data(train_path, test_path, timestamp_column, id_column):
 
diff --git a/frameworks/AutoGluon/setup.sh b/frameworks/AutoGluon/setup.sh
index 6ef50ed8c..7cbccbee9 100755
--- a/frameworks/AutoGluon/setup.sh
+++ b/frameworks/AutoGluon/setup.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+
 HERE=$(dirname "$0")
 VERSION=${1:-"stable"}
 REPO=${2:-"https://github.com/awslabs/autogluon.git"}
@@ -36,4 +37,10 @@ else
     PIP install -e tabular/[skex]
 fi
 
-PY -c "from autogluon.tabular.version import __version__; print(__version__)" >> "${HERE}/.setup/installed"
+if [[ ${MODULE} == "timeseries" ]]; then
+    PY -c "from autogluon.tabular.version import __version__; print(__version__)" >> "${HERE}/.setup/installed"
+    # TODO: GPU version install
+    PIP install "mxnet<2.0"
+else
+    PY -c "from autogluon.timeseries.version import __version__; print(__version__)" >> "${HERE}/.setup/installed"
+fi
diff --git a/frameworks/AutoGluonTS/README.md b/frameworks/AutoGluonTS/README.md
deleted file mode 100644
index 938b459c4..000000000
--- a/frameworks/AutoGluonTS/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# AutoGluonTS
-
-AutoGluonTS stands for autogluon.timeseries. This framework handles time series problems.
-
-This code is currently a prototype, since time series support is not fully defined in AutoMLBenchmark yet.
-Consider the code a proof of concept.
-
-## Run Steps
-
-To run AutoGluonTS in AutoMLBenchmark on the covid dataset from the AutoGluon tutorial, do the following:
-
-1. Create a fresh Python environment
-2. Follow automlbenchmark install instructions
-3. Run the following command in terminal: ```python3 ../automlbenchmark/runbenchmark.py autogluonts ts test```
-
-To run mainline AutoGluonTS instead of v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest ts test```
diff --git a/frameworks/AutoGluonTS/__init__.py b/frameworks/AutoGluonTS/__init__.py
deleted file mode 100644
index 70283c3e5..000000000
--- a/frameworks/AutoGluonTS/__init__.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from amlb.benchmark import TaskConfig
-from amlb.data import Dataset, DatasetType
-from amlb.utils import call_script_in_same_dir
-
-
-def setup(*args, **kwargs):
-    call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs)
-
-
-def run(dataset: Dataset, config: TaskConfig):
-    from frameworks.shared.caller import run_in_venv
-
-    if hasattr(dataset, 'timestamp_column') is False:
-        dataset.timestamp_column = None
-    if hasattr(dataset, 'id_column') is False:
-        dataset.id_column = None
-    if hasattr(dataset, 'prediction_length') is False:
-        raise AttributeError("Unspecified `prediction_length`.")
-    if dataset.type is not DatasetType.timeseries:
-        raise ValueError("AutoGluonTS only supports timeseries.")
-
-    data = dict(
-        # train=dict(path=dataset.train.data_path('parquet')),
-        # test=dict(path=dataset.test.data_path('parquet')),
-        train=dict(path=dataset.train.path),
-        test=dict(path=dataset.test.path),
-        target=dict(
-            name=dataset.target.name,
-            classes=dataset.target.values
-        ),
-        problem_type=dataset.type.name,  # AutoGluon problem_type is using same names as amlb.data.DatasetType
-        timestamp_column=dataset.timestamp_column,
-        id_column=dataset.id_column,
-        prediction_length=dataset.prediction_length
-    )
-
-    return run_in_venv(__file__, "exec.py",
-                       input_data=data, dataset=dataset, config=config)
diff --git a/frameworks/AutoGluonTS/setup.sh b/frameworks/AutoGluonTS/setup.sh
deleted file mode 100755
index d9fc7e8da..000000000
--- a/frameworks/AutoGluonTS/setup.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-HERE=$(dirname "$0")
-VERSION=${1:-"stable"}
-REPO=${2:-"https://github.com/awslabs/autogluon.git"}
-PKG=${3:-"autogluon"}
-if [[ "$VERSION" == "latest" ]]; then
-    VERSION="master"
-fi
-
-# creating local venv
-. ${HERE}/../shared/setup.sh ${HERE} true
-
-PIP install --upgrade pip
-PIP install --upgrade setuptools wheel
-
-if [[ "$VERSION" == "stable" ]]; then
-    PIP install --no-cache-dir -U "${PKG}"
-    PIP install --no-cache-dir -U "${PKG}.tabular[skex]"
-elif [[ "$VERSION" =~ ^[0-9] ]]; then
-    PIP install --no-cache-dir -U "${PKG}==${VERSION}"
-    PIP install --no-cache-dir -U "${PKG}.tabular[skex]==${VERSION}"
-else
-    TARGET_DIR="${HERE}/lib/${PKG}"
-    rm -Rf ${TARGET_DIR}
-    git clone --depth 1 --single-branch --branch ${VERSION} --recurse-submodules ${REPO} ${TARGET_DIR}
-    cd ${TARGET_DIR}
-    PY_EXEC_NO_ARGS="$(cut -d' ' -f1 <<<"$py_exec")"
-    PY_EXEC_DIR=$(dirname "$PY_EXEC_NO_ARGS")
-    env PATH="$PY_EXEC_DIR:$PATH" bash -c ./full_install.sh
-    PIP install -e tabular/[skex]
-fi
-
-# TODO: GPU version install
-PIP install "mxnet<2.0"
-
-PY -c "from autogluon.timeseries.version import __version__; print(__version__)" >> "${HERE}/.setup/installed"
diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py
index 70b5a3be0..c596e01c5 100644
--- a/frameworks/shared/callee.py
+++ b/frameworks/shared/callee.py
@@ -17,12 +17,12 @@ class FrameworkError(Exception):
 def result(output_file=None,
            predictions=None, truth=None,
            probabilities=None, probabilities_labels=None,
+           optional_columns=None,
            target_is_encoded=False,
            error_message=None,
            models_count=None,
            training_duration=None,
            predict_duration=None,
-           quantiles=None,
            **others):
     return locals()
 
@@ -70,7 +70,7 @@ def load_data(name, path, **_):
                               wait_retry_secs=10):
             result = run_fn(ds, config)
             res = dict(result)
-            for name in ['predictions', 'truth', 'probabilities', 'quantiles']:
+            for name in ['predictions', 'truth', 'probabilities', 'optional_columns']:
                 arr = result[name]
                 if arr is not None:
                     path = os.path.join(config.result_dir, '.'.join([name, 'data']))
diff --git a/frameworks/shared/caller.py b/frameworks/shared/caller.py
index 68963a820..09654dc32 100644
--- a/frameworks/shared/caller.py
+++ b/frameworks/shared/caller.py
@@ -149,7 +149,7 @@ def run_in_venv(caller_file, script_file: str, *args,
         if res.error_message is not None:
             raise NoResultError(res.error_message)
 
-        for name in ['predictions', 'truth', 'probabilities', 'quantiles']:
+        for name in ['predictions', 'truth', 'probabilities', 'optional_columns']:
             res[name] = deserialize_data(res[name], config=ser_config) if res[name] is not None else None
 
         if callable(process_results):
@@ -164,8 +164,8 @@ def run_in_venv(caller_file, script_file: str, *args,
                                     else dataset.test.y),
                              probabilities=res.probabilities,
                              probabilities_labels=res.probabilities_labels,
-                             target_is_encoded=res.target_is_encoded,
-                             quantiles=res.quantiles)
+                             optional_columns=res.optional_columns,
+                             target_is_encoded=res.target_is_encoded)
 
         return dict(
             models_count=res.models_count if res.models_count is not None else 1,
diff --git a/resources/benchmarks/timeseries.yaml b/resources/benchmarks/timeseries.yaml
new file mode 100644
index 000000000..26af06497
--- /dev/null
+++ b/resources/benchmarks/timeseries.yaml
@@ -0,0 +1,13 @@
+---
+
+- name: covid
+  dataset:
+    train: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv
+    test: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv
+    target: ConfirmedCases
+    type: timeseries
+    forecast_range_in_steps: 19
+    id_column: name
+    timestamp_column: Date
+
+  folds: 1
diff --git a/resources/benchmarks/ts.yaml b/resources/benchmarks/ts.yaml
deleted file mode 100644
index 0a73c81fb..000000000
--- a/resources/benchmarks/ts.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
----
-
-- name: covid
-  dataset:
-    train: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv
-    # s3://autogluon-ts-bench/data/covid_deaths/csv/train.csv | https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv
-    test: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv
-    # s3://autogluon-ts-bench/data/covid_deaths/csv/test.csv  |  https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv
-    target: ConfirmedCases              # target    | ConfirmedCases
-    type: timeseries
-    prediction_length: 30
-    id_column: name          # item_id   | name
-    timestamp_column: Date # timestamp | Date
-
-  folds: 1
diff --git a/resources/frameworks.yaml b/resources/frameworks.yaml
index eb59c44bf..513c99586 100644
--- a/resources/frameworks.yaml
+++ b/resources/frameworks.yaml
@@ -86,9 +86,9 @@ autoxgboost:
 flaml:
   version: 'stable'
   description: |
-    FLAML is a lightweight Python library that finds accurate machine learning models 
-    automatically, efficiently and economically. It frees users from selecting learners 
-    and hyperparameters for each learner. It is fast and cheap. 
+    FLAML is a lightweight Python library that finds accurate machine learning models
+    automatically, efficiently and economically. It frees users from selecting learners
+    and hyperparameters for each learner. It is fast and cheap.
   project: https://github.com/microsoft/FLAML
   refs: [https://arxiv.org/pdf/1911.04706.pdf]
 
@@ -139,12 +139,12 @@ mljarsupervised_compete:
   description: "MLJAR is using 'Compete' mode to provide the most accurate predictor"
   params:
     mode: Compete   # set mode for Compete, default mode is Explain
-    
+
 MLNet:
   version: 'latest'
   description: |
     MLNET.CLI is a automated machine learning tool implemented by ml.net.
-    
+
 MLPlan:
   version: 'stable'
   abstract: true
@@ -196,10 +196,12 @@ TPOT:
 ####################################
 
 AutoGluonTS:
+  extends: AutoGluon
   version: "stable"
   description: |
     AutoGluon-TimeSeries
-  project: https://auto.gluon.ai
+  setup_env:
+    MODULE: timeseries
 
 #######################################
 ### Non AutoML reference frameworks ###
@@ -242,4 +244,3 @@ TunedRandomForest:
 #    _n_jobs: 1  # cf. RandomForest
 #    _tuning:
 #      n_estimators: 500
-
diff --git a/resources/frameworks_latest.yaml b/resources/frameworks_latest.yaml
index b23bf72b0..becdc4e3e 100644
--- a/resources/frameworks_latest.yaml
+++ b/resources/frameworks_latest.yaml
@@ -85,10 +85,12 @@ TPOT:
 ####################################
 
 AutoGluonTS:
+  extends: AutoGluon
   version: "latest"
   description: |
     AutoGluon-TimeSeries
-  project: https://auto.gluon.ai
+  setup_env:
+    MODULE: timeseries
 
 #######################################
 ### Non AutoML reference frameworks ###
@@ -111,4 +113,3 @@ TunedRandomForest:
   version: 'latest'
   params:
     n_estimators: 2000
-