diff --git a/.gitignore b/.gitignore
index 4dba33db1..bc9c76adc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ venv/
 .idea/
 *.iml
 *.swp
+launch.json
 
 # tmp files
 .ipynb_checkpoints/
diff --git a/amlb/benchmark.py b/amlb/benchmark.py
index b9975efdc..7c54a344c 100644
--- a/amlb/benchmark.py
+++ b/amlb/benchmark.py
@@ -489,7 +489,9 @@ def load_data(self):
             # TODO
             raise NotImplementedError("OpenML datasets without task_id are not supported yet.")
         elif hasattr(self._task_def, 'dataset'):
-            self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=self._task_def.dataset, fold=self.fold)
+            dataset_name_and_config = copy(self._task_def.dataset)
+            dataset_name_and_config.name = self._task_def.name
+            self._dataset = Benchmark.data_loader.load(DataSourceType.file, dataset=dataset_name_and_config, fold=self.fold)
         else:
             raise ValueError("Tasks should have one property among [openml_task_id, openml_dataset_id, dataset].")
 
@@ -522,7 +524,12 @@ def run(self):
                              predictions_dir=self.benchmark.output_dirs.predictions)
         framework_def = self.benchmark.framework_def
         task_config = copy(self.task_config)
-        task_config.type = 'regression' if self._dataset.type == DatasetType.regression else 'classification'
+        if self._dataset.type == DatasetType.regression:
+            task_config.type = 'regression'
+        elif self._dataset.type == DatasetType.timeseries:
+            task_config.type = 'timeseries'
+        else:
+            task_config.type = 'classification'
         task_config.type_ = self._dataset.type.name
         task_config.framework = self.benchmark.framework_name
         task_config.framework_params = framework_def.params
@@ -552,4 +559,3 @@ def run(self):
         finally:
             self._dataset.release()
         return results.compute_score(result=result, meta_result=meta_result)
-
diff --git a/amlb/data.py b/amlb/data.py
index 4e4cea879..acca17841 100644
--- a/amlb/data.py
+++ b/amlb/data.py
@@ -172,6 +172,7 @@ class DatasetType(Enum):
     binary = 1
     multiclass = 2
     regression = 3
+    timeseries = 4
 
 
 class Dataset(ABC):
diff --git a/amlb/datasets/file.py b/amlb/datasets/file.py
index 6ddca4042..526c131e7 100644
--- a/amlb/datasets/file.py
+++ b/amlb/datasets/file.py
@@ -16,7 +16,7 @@
 from ..utils import Namespace as ns, as_list, lazy_property, list_all_files, memoize, path_from_split, profile, repr_def, split_path
 
 from .fileutils import is_archive, is_valid_url, unarchive_file, get_file_handler
-
+from copy import deepcopy
 
 log = logging.getLogger(__name__)
 
@@ -33,7 +33,7 @@ def __init__(self, cache_dir=None):
     def load(self, dataset, fold=0):
         dataset = dataset if isinstance(dataset, ns) else ns(path=dataset)
         log.debug("Loading dataset %s", dataset)
-        paths = self._extract_train_test_paths(dataset.path if 'path' in dataset else dataset, fold=fold)
+        paths = self._extract_train_test_paths(dataset.path if 'path' in dataset else dataset, fold=fold, name=dataset['name'] if 'name' in dataset else None)
         assert fold < len(paths['train']), f"No training dataset available for fold {fold} among dataset files {paths['train']}"
         # seed = rget().seed(fold)
         # if len(paths['test']) == 0:
@@ -51,21 +51,28 @@ def load(self, dataset, fold=0):
         if ext == '.arff':
             return ArffDataset(train_path, test_path, target=target, features=features, type=type_)
         elif ext == '.csv':
-            return CsvDataset(train_path, test_path, target=target, features=features, type=type_)
+            if DatasetType[dataset['type']] == DatasetType.timeseries and dataset['timestamp_column'] is None:
+                log.warning("Warning: For timeseries task setting undefined timestamp column to `timestamp`.")
+                dataset = deepcopy(dataset)
+                dataset['timestamp_column'] = "timestamp"
+            csv_dataset = CsvDataset(train_path, test_path, target=target, features=features, type=type_, timestamp_column=dataset['timestamp_column'] if 'timestamp_column' in dataset else None)
+            if csv_dataset.type == DatasetType.timeseries:
+                csv_dataset = self.extend_dataset_with_timeseries_config(csv_dataset, dataset)
+            return csv_dataset
         else:
             raise ValueError(f"Unsupported file type: {ext}")
 
-    def _extract_train_test_paths(self, dataset, fold=None):
+    def _extract_train_test_paths(self, dataset, fold=None, name=None):
         if isinstance(dataset, (tuple, list)):
             assert len(dataset) % 2 == 0, "dataset list must contain an even number of paths: [train_0, test_0, train_1, test_1, ...]."
             return self._extract_train_test_paths(ns(train=[p for i, p in enumerate(dataset) if i % 2 == 0],
                                                      test=[p for i, p in enumerate(dataset) if i % 2 == 1]),
-                                                  fold=fold)
+                                                  fold=fold, name=name)
         elif isinstance(dataset, ns):
-            return dict(train=[self._extract_train_test_paths(p)['train'][0]
+            return dict(train=[self._extract_train_test_paths(p, name=name)['train'][0]
                                if i == fold else None
                                for i, p in enumerate(as_list(dataset.train))],
-                        test=[self._extract_train_test_paths(p)['train'][0]
+                        test=[self._extract_train_test_paths(p, name=name)['train'][0]
                               if i == fold else None
                               for i, p in enumerate(as_list(dataset.test))])
         else:
@@ -116,7 +123,10 @@ def _extract_train_test_paths(self, dataset, fold=None):
                 assert len(paths) > 0, f"No dataset file found in {dataset}: they should follow the naming xxxx_train.ext, xxxx_test.ext or xxxx_train_0.ext, xxxx_test_0.ext, xxxx_train_1.ext, ..."
                 return paths
         elif is_valid_url(dataset):
-            cached_file = os.path.join(self._cache_dir, os.path.basename(dataset))
+            if name is None:
+                cached_file = os.path.join(self._cache_dir, os.path.basename(dataset))
+            else:
+                cached_file = os.path.join(self._cache_dir, name, os.path.basename(dataset))
             if not os.path.exists(cached_file):  # don't download if previously done
                 handler = get_file_handler(dataset)
                 assert handler.exists(dataset), f"Invalid path/url: {dataset}"
@@ -129,6 +139,40 @@ def __repr__(self):
         return repr_def(self)
 
 
+    def extend_dataset_with_timeseries_config(self, dataset, dataset_config):
+        dataset = deepcopy(dataset)
+        dataset_config = deepcopy(dataset_config)
+        if dataset_config['id_column'] is None:
+            log.warning("Warning: For timeseries task setting undefined `id_column` to `item_id`.")
+            dataset_config['id_column'] = "item_id"
+        if dataset_config['forecast_range_in_steps'] is None:
+            log.warning("Warning: For timeseries task setting undefined `forecast_range_in_steps` to `1`.")
+            dataset_config['forecast_range_in_steps'] = "1"
+
+        dataset.timestamp_column=dataset_config['timestamp_column']
+        dataset.id_column=dataset_config['id_column']
+        dataset.forecast_range_in_steps=int(dataset_config['forecast_range_in_steps'])
+
+        train_seqs_lengths = dataset.train.X.groupby(dataset.id_column).count()
+        test_seqs_lengths = dataset.test.X.groupby(dataset.id_column).count()
+        forecast_range_in_steps_mean_diff_train_test = int((test_seqs_lengths - train_seqs_lengths).mean())
+        forecast_range_in_steps_max_min_train_test = int(min(int(test_seqs_lengths.min()), int(train_seqs_lengths.min()))) - 1
+        if not dataset.forecast_range_in_steps == forecast_range_in_steps_mean_diff_train_test:
+            msg = f"Warning: Forecast range {dataset.forecast_range_in_steps}, does not equal mean difference between test and train sequence lengths {forecast_range_in_steps_mean_diff_train_test}."
+            log.warning(msg)
+        if not (test_seqs_lengths - train_seqs_lengths).var().item() == 0.:
+            msg = f"Error: Not all sequences of train and test set have same sequence length difference."
+            raise ValueError(msg)
+        if dataset.forecast_range_in_steps > forecast_range_in_steps_mean_diff_train_test:
+            msg = f"Error: Forecast range {dataset.forecast_range_in_steps} longer than difference between test and train sequence lengths {forecast_range_in_steps_mean_diff_train_test}."
+            raise ValueError(msg)
+        if dataset.forecast_range_in_steps > forecast_range_in_steps_max_min_train_test:
+            msg = f"Error: Forecast range {dataset.forecast_range_in_steps} longer than minimum sequence length + 1, {forecast_range_in_steps_max_min_train_test}."
+            raise ValueError(msg)
+        return dataset
+
+
+
 class FileDataset(Dataset):
 
     def __init__(self, train: Datasplit, test: Datasplit,
@@ -302,25 +346,26 @@ def release(self, properties=None):
 class CsvDataset(FileDataset):
 
     def __init__(self, train_path, test_path,
-                 target=None, features=None, type=None):
+                 target=None, features=None, type=None, timestamp_column=None):
         # todo: handle auto-split (if test_path is None): requires loading the training set, split, save
         super().__init__(None, None,
                          target=target, features=features, type=type)
-        self._train = CsvDatasplit(self, train_path)
-        self._test = CsvDatasplit(self, test_path)
+        self._train = CsvDatasplit(self, train_path, timestamp_column=timestamp_column)
+        self._test = CsvDatasplit(self, test_path, timestamp_column=timestamp_column)
         self._dtypes = None
 
 
 class CsvDatasplit(FileDatasplit):
 
-    def __init__(self, dataset, path):
+    def __init__(self, dataset, path, timestamp_column=None):
         super().__init__(dataset, format='csv', path=path)
         self._ds = None
+        self.timestamp_column = timestamp_column
 
     def _ensure_loaded(self):
         if self._ds is None:
             if self.dataset._dtypes is None:
-                df = read_csv(self.path)
+                df = read_csv(self.path, timestamp_column=self.timestamp_column)
                 # df = df.convert_dtypes()
                 dt_conversions = {name: 'category'
                                   for name, dtype in zip(df.dtypes.index, df.dtypes.values)
@@ -336,8 +381,9 @@ def _ensure_loaded(self):
 
                 self._ds = df
                 self.dataset._dtypes = self._ds.dtypes
+
             else:
-                self._ds = read_csv(self.path, dtype=self.dataset._dtypes.to_dict())
+                self._ds = read_csv(self.path, dtype=self.dataset._dtypes.to_dict(), timestamp_column=self.timestamp_column)
 
     @profile(logger=log)
     def load_metadata(self):
@@ -348,7 +394,7 @@ def load_metadata(self):
                                       else 'number' if pat.is_numeric_dtype(dt)
                                       else 'category' if pat.is_categorical_dtype(dt)
                                       else 'string' if pat.is_string_dtype(dt)
-                                      # else 'datetime' if pat.is_datetime64_dtype(dt)
+                                      else 'datetime' if pat.is_datetime64_dtype(dt)
                                       else 'object')
         features = [Feature(i, col, to_feature_type(dtypes[i]))
                     for i, col in enumerate(self._ds.columns)]
diff --git a/amlb/datautils.py b/amlb/datautils.py
index f3eeeb2a5..7946fdc1c 100644
--- a/amlb/datautils.py
+++ b/amlb/datautils.py
@@ -26,7 +26,7 @@
 log = logging.getLogger(__name__)
 
 
-def read_csv(path, nrows=None, header=True, index=False, as_data_frame=True, dtype=None):
+def read_csv(path, nrows=None, header=True, index=False, as_data_frame=True, dtype=None, timestamp_column=None):
     """
     read csv file to DataFrame.
 
@@ -37,13 +37,19 @@ def read_csv(path, nrows=None, header=True, index=False, as_data_frame=True, dty
     :param header: if the columns header should be read.
     :param as_data_frame: if the result should be returned as a data frame (default) or a numpy array.
     :param dtype: data type for columns.
+    :param timestamp_column: column name for timestamp, to ensure dates are correctly parsed by pandas.
     :return: a DataFrame
     """
+    if dtype is not None and timestamp_column is not None and timestamp_column in dtype:
+            dtype = dtype.copy() # to avoid outer context manipulation
+            del dtype[timestamp_column]
+
     df = pd.read_csv(path,
                      nrows=nrows,
                      header=0 if header else None,
                      index_col=0 if index else None,
-                     dtype=dtype)
+                     dtype=dtype,
+                     parse_dates=[timestamp_column] if timestamp_column is not None else None)
     return df if as_data_frame else df.values
 
 
diff --git a/amlb/results.py b/amlb/results.py
index 2f547b4ec..3887203f6 100644
--- a/amlb/results.py
+++ b/amlb/results.py
@@ -228,12 +228,17 @@ def load_predictions(predictions_file):
             try:
                 df = read_csv(predictions_file, dtype=object)
                 log.debug("Predictions preview:\n %s\n", df.head(10).to_string())
+
                 if rconfig().test_mode:
                     TaskResult.validate_predictions(df)
-                if df.shape[1] > 2:
-                    return ClassificationResult(df)
+
+                if  'y_past_period_error' in df.columns:
+                    return TimeSeriesResult(df)
                 else:
-                    return RegressionResult(df)
+                    if df.shape[1] > 2:
+                        return ClassificationResult(df)
+                    else:
+                        return RegressionResult(df)
             except Exception as e:
                 return ErrorResult(ResultError(e))
         else:
@@ -254,6 +259,7 @@ def load_metadata(metadata_file):
     def save_predictions(dataset: Dataset, output_file: str,
                          predictions: Union[A, DF, S] = None, truth: Union[A, DF, S] = None,
                          probabilities: Union[A, DF] = None, probabilities_labels: Union[list, A] = None,
+                         optional_columns: Union[A, DF] = None,
                          target_is_encoded: bool = False,
                          preview: bool = True):
         """ Save class probabilities and predicted labels to file in csv format.
@@ -264,6 +270,7 @@ def save_predictions(dataset: Dataset, output_file: str,
         :param predictions:
         :param truth:
         :param probabilities_labels:
+        :param optional_columns:
         :param target_is_encoded:
         :param preview:
         :return: None
@@ -308,6 +315,10 @@ def save_predictions(dataset: Dataset, output_file: str,
 
         df = df.assign(predictions=preds)
         df = df.assign(truth=truth)
+
+        if optional_columns is not None:
+            df = pd.concat([df, optional_columns], axis=1)
+
         if preview:
             log.info("Predictions preview:\n %s\n", df.head(20).to_string())
         backup_file(output_file)
@@ -656,6 +667,71 @@ def r2(self):
         """R^2"""
         return float(r2_score(self.truth, self.predictions))
 
+class TimeSeriesResult(RegressionResult):
+
+    def __init__(self, predictions_df, info=None):
+        super().__init__(predictions_df, info)
+        self.truth = self.df['truth'].values if self.df is not None else None #.iloc[:, 1].values if self.df is not None else None
+        self.predictions = self.df['predictions'].values if self.df is not None else None #.iloc[:, -2].values if self.df is not None else None
+        self.y_past_period_error = self.df['y_past_period_error'].values
+        self.quantiles = self.df.iloc[:, 2:-1].values
+        self.quantiles_probs = np.array([float(q) for q in self.df.columns[2:-1]])
+        self.truth = self.truth.astype(float, copy=False)
+        self.predictions = self.predictions.astype(float, copy=False)
+        self.quantiles = self.quantiles.astype(float, copy=False)
+        self.y_past_period_error = self.y_past_period_error.astype(float, copy=False)
+
+        self.target = Feature(0, 'target', 'real', is_target=True)
+        self.type = DatasetType.timeseries
+
+    @metric(higher_is_better=False)
+    def mase(self):
+        """Mean Absolute Scaled Error"""
+        return float(np.nanmean(np.abs(self.truth/self.y_past_period_error - self.predictions/self.y_past_period_error)))
+
+    @metric(higher_is_better=False)
+    def smape(self):
+        """Symmetric Mean Absolute Percentage Error"""
+        num = np.abs(self.truth - self.predictions)
+        denom = (np.abs(self.truth) + np.abs(self.predictions)) / 2
+        # If the denominator is 0, we set it to float('inf') such that any division yields 0 (this
+        # might not be fully mathematically correct, but at least we don't get NaNs)
+        denom[denom == 0] = math.inf
+        return np.mean(num / denom)
+
+    @metric(higher_is_better=False)
+    def mape(self):
+        """Symmetric Mean Absolute Percentage Error"""
+        num = np.abs(self.truth - self.predictions)
+        denom = np.abs(self.truth)
+        # If the denominator is 0, we set it to float('inf') such that any division yields 0 (this
+        # might not be fully mathematically correct, but at least we don't get NaNs)
+        denom[denom == 0] = math.inf
+        return np.mean(num / denom)
+
+    @metric(higher_is_better=False)
+    def nrmse(self):
+        """Normalized Root Mean Square Error"""
+        return self.rmse() / np.mean(np.abs(self.truth))
+
+    @metric(higher_is_better=False)
+    def wape(self):
+        """Weighted Average Percentage Error"""
+        return np.sum(np.abs(self.truth - self.predictions)) / np.sum(np.abs(self.truth))
+
+    @metric(higher_is_better=False)
+    def ncrps(self):
+        """Normalized Continuous Ranked Probability Score"""
+        quantile_losses = 2 * np.sum(
+            np.abs(
+                (self.quantiles - self.truth[:, None])
+                * ((self.quantiles >= self.truth[:, None]) - self.quantiles_probs[None, :])
+            ),
+            axis=0,
+        )
+        denom = np.sum(np.abs(self.truth)) # shape [num_time_series, num_quantiles]
+        weighted_losses = quantile_losses.sum(0) / denom  # shape [num_quantiles]
+        return weighted_losses.mean()
 
 _encode_predictions_and_truth_ = False
 
diff --git a/frameworks/AutoGluon/README.md b/frameworks/AutoGluon/README.md
new file mode 100644
index 000000000..51286533e
--- /dev/null
+++ b/frameworks/AutoGluon/README.md
@@ -0,0 +1,16 @@
+# AutoGluon
+
+To run v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluon ...```
+
+To run mainline: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest ...```
+
+
+# AutoGluonTS
+
+AutoGluonTS stands for autogluon.timeseries. This framework handles time series problems.
+
+## Run Steps
+
+To run v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluonts timeseries ...```
+
+To run mainline: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest timeseries ...```
diff --git a/frameworks/AutoGluon/__init__.py b/frameworks/AutoGluon/__init__.py
index be2c15147..c8694148c 100644
--- a/frameworks/AutoGluon/__init__.py
+++ b/frameworks/AutoGluon/__init__.py
@@ -1,15 +1,23 @@
-from amlb.benchmark import TaskConfig
-from amlb.data import Dataset
+
 from amlb.utils import call_script_in_same_dir
+from amlb.benchmark import TaskConfig
+from amlb.data import Dataset, DatasetType
+from copy import deepcopy
 
 
 def setup(*args, **kwargs):
     call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs)
 
-
 def run(dataset: Dataset, config: TaskConfig):
-    from frameworks.shared.caller import run_in_venv
 
+    if dataset.type is not DatasetType.timeseries:
+        return run_autogluon_tabular(dataset, config)
+
+    else:
+        return run_autogluon_timeseries(dataset, config)
+
+def run_autogluon_tabular(dataset: Dataset, config: TaskConfig):
+    from frameworks.shared.caller import run_in_venv
     data = dict(
         train=dict(path=dataset.train.data_path('parquet')),
         test=dict(path=dataset.test.data_path('parquet')),
@@ -23,3 +31,30 @@ def run(dataset: Dataset, config: TaskConfig):
     return run_in_venv(__file__, "exec.py",
                        input_data=data, dataset=dataset, config=config)
 
+def run_autogluon_timeseries(dataset: Dataset, config: TaskConfig):
+    from frameworks.shared.caller import run_in_venv
+    dataset = deepcopy(dataset)
+    if not hasattr(dataset, 'timestamp_column'):
+        dataset.timestamp_column = None
+    if not hasattr(dataset, 'id_column'):
+        dataset.id_column = None
+    if not hasattr(dataset, 'forecast_range_in_steps'):
+        raise AttributeError("Unspecified `forecast_range_in_steps`.")
+
+    data = dict(
+        # train=dict(path=dataset.train.data_path('parquet')),
+        # test=dict(path=dataset.test.data_path('parquet')),
+        train=dict(path=dataset.train.path),
+        test=dict(path=dataset.test.path),
+        target=dict(
+            name=dataset.target.name,
+            classes=dataset.target.values
+        ),
+        problem_type=dataset.type.name,  # AutoGluon problem_type is using same names as amlb.data.DatasetType
+        timestamp_column=dataset.timestamp_column,
+        id_column=dataset.id_column,
+        forecast_range_in_steps=dataset.forecast_range_in_steps
+    )
+
+    return run_in_venv(__file__, "exec_ts.py",
+                       input_data=data, dataset=dataset, config=config)
diff --git a/frameworks/AutoGluon/exec_ts.py b/frameworks/AutoGluon/exec_ts.py
new file mode 100644
index 000000000..ab7c4110f
--- /dev/null
+++ b/frameworks/AutoGluon/exec_ts.py
@@ -0,0 +1,172 @@
+import logging
+import os
+import shutil
+import warnings
+import sys
+import tempfile
+import numpy as np
+warnings.simplefilter("ignore")
+
+if sys.platform == 'darwin':
+    os.environ['OMP_NUM_THREADS'] = '1'
+
+import pandas as pd
+
+from autogluon.core.utils.savers import save_pd, save_pkl
+from autogluon.tabular import TabularDataset
+from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
+from autogluon.timeseries.version import __version__
+
+from frameworks.shared.callee import call_run, result, output_subdir
+from frameworks.shared.utils import Timer, zip_path
+
+log = logging.getLogger(__name__)
+
+
+def run(dataset, config):
+    log.info(f"\n**** AutoGluon TimeSeries [v{__version__}] ****\n")
+
+    timestamp_column = dataset.timestamp_column
+    id_column = dataset.id_column
+    prediction_length = dataset.forecast_range_in_steps
+
+    eval_metric = get_eval_metric(config)
+    label = dataset.target.name
+    time_limit = config.max_runtime_seconds
+
+    training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')}
+
+    train_data, test_data = load_data(train_path=dataset.train.path,
+                                      test_path=dataset.test.path,
+                                      timestamp_column=timestamp_column,
+                                      id_column=id_column)
+    test_data_past = test_data.copy().slice_by_timestep(slice(None, -prediction_length))
+
+    predictor_path = tempfile.mkdtemp() + os.sep
+    with Timer() as training:
+        predictor = TimeSeriesPredictor(
+            target=label,
+            path=predictor_path,
+            prediction_length=prediction_length,
+            eval_metric=eval_metric,
+        )
+        predictor.fit(
+            train_data=train_data,
+            time_limit=time_limit,
+            **training_params,
+        )
+
+    with Timer() as predict:
+        predictions = predictor.predict(test_data_past)
+    log.info(predictions)
+
+    predictions_only = predictions['mean'].values
+    test_data_future = test_data.copy().slice_by_timestep(slice(-prediction_length, None))
+    truth_only = test_data_future[label].values
+
+    log.info(predictions_only)
+    log.info(truth_only)
+
+    leaderboard = predictor.leaderboard(test_data, silent=True)
+
+    with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000):
+        log.info(leaderboard)
+
+    num_models_trained = len(leaderboard)
+
+    save_artifacts(predictor=predictor, leaderboard=leaderboard, config=config)
+    shutil.rmtree(predictor.path, ignore_errors=True)
+
+    quantiles = predictions.drop(columns=['mean']).reset_index(drop=True)
+    period_length = 1 # TODO: This period length could be adapted to the Dataset, but then we need to pass this information as well. As of now this works.
+
+    # we aim to calculate the mean period error from the past for each sequence: 1/N sum_{i=1}^N |x(t_i) - x(t_i - T)|
+    # 1. retrieve item_ids for each sequence/item
+    #dataset..X /. y
+    item_ids, inverse_item_ids = np.unique(test_data.reset_index()["item_id"].squeeze().to_numpy(), return_index=False, return_inverse=True)
+    # 2. capture sequences in a list
+    y_past = [test_data[label].squeeze().to_numpy()[inverse_item_ids == i][:-prediction_length] for i in range(len(item_ids))]
+    # 3. calculate period error per sequence
+    y_past_period_error = [np.abs(y_past_item[period_length:] - y_past_item[:-period_length]).mean() for y_past_item in y_past]
+    # 4. repeat period error for each sequence, to save one for each element
+    y_past_period_error_rep = np.repeat(y_past_period_error, prediction_length)
+
+    optional_columns = quantiles
+    optional_columns = optional_columns.assign(y_past_period_error=y_past_period_error_rep)
+
+    return result(output_file=config.output_predictions_file,
+                  predictions=predictions_only,
+                  truth=truth_only,
+                  probabilities=None,
+                  probabilities_labels=None,
+                  target_is_encoded=False,
+                  models_count=num_models_trained,
+                  training_duration=training.duration,
+                  predict_duration=predict.duration,
+                  optional_columns=optional_columns)
+
+def load_data(train_path, test_path, timestamp_column, id_column):
+
+    train_df = pd.read_csv(
+        train_path,
+        parse_dates=[timestamp_column],
+    )
+
+    train_data = TimeSeriesDataFrame.from_data_frame(
+        train_df,
+        id_column=id_column,
+        timestamp_column=timestamp_column,
+    )
+
+    test_df = pd.read_csv(
+        test_path,
+        parse_dates=[timestamp_column],
+    )
+
+    test_data = TimeSeriesDataFrame.from_data_frame(
+        test_df,
+        id_column=id_column,
+        timestamp_column=timestamp_column,
+    )
+
+    return train_data, test_data
+
+
+def get_eval_metric(config):
+    # TODO: Support more metrics
+    metrics_mapping = dict(
+        mape="MAPE",
+        smape="sMAPE",
+        mase="MASE",
+        mse="MSE",
+        rmse="RMSE",
+    )
+
+    eval_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None
+    if eval_metric is None:
+        log.warning("Performance metric %s not supported.", config.metric)
+    return eval_metric
+
+
+def save_artifacts(predictor, leaderboard, config):
+    artifacts = config.framework_params.get('_save_artifacts', ['leaderboard'])
+    try:
+        if 'leaderboard' in artifacts:
+            leaderboard_dir = output_subdir("leaderboard", config)
+            save_pd.save(path=os.path.join(leaderboard_dir, "leaderboard.csv"), df=leaderboard)
+
+        if 'info' in artifacts:
+            ag_info = predictor.info()
+            info_dir = output_subdir("info", config)
+            save_pkl.save(path=os.path.join(info_dir, "info.pkl"), object=ag_info)
+
+        if 'models' in artifacts:
+            shutil.rmtree(os.path.join(predictor.path, "utils"), ignore_errors=True)
+            models_dir = output_subdir("models", config)
+            zip_path(predictor.path, os.path.join(models_dir, "models.zip"))
+    except Exception:
+        log.warning("Error when saving artifacts.", exc_info=True)
+
+
+if __name__ == '__main__':
+    call_run(run)
diff --git a/frameworks/AutoGluon/setup.sh b/frameworks/AutoGluon/setup.sh
index 6ef50ed8c..967c9d40c 100755
--- a/frameworks/AutoGluon/setup.sh
+++ b/frameworks/AutoGluon/setup.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+
 HERE=$(dirname "$0")
 VERSION=${1:-"stable"}
 REPO=${2:-"https://github.com/awslabs/autogluon.git"}
@@ -36,4 +37,10 @@ else
     PIP install -e tabular/[skex]
 fi
 
-PY -c "from autogluon.tabular.version import __version__; print(__version__)" >> "${HERE}/.setup/installed"
+if [[ ${MODULE} == "timeseries" ]]; then
+    PY -c "from autogluon.timeseries.version import __version__; print(__version__)" >> "${HERE}/.setup/installed"
+    # TODO: GPU version install
+    PIP install "mxnet<2.0"
+else
+    PY -c "from autogluon.tabular.version import __version__; print(__version__)" >> "${HERE}/.setup/installed"
+fi
diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py
index 3bf70dd3c..c596e01c5 100644
--- a/frameworks/shared/callee.py
+++ b/frameworks/shared/callee.py
@@ -17,6 +17,7 @@ class FrameworkError(Exception):
 def result(output_file=None,
            predictions=None, truth=None,
            probabilities=None, probabilities_labels=None,
+           optional_columns=None,
            target_is_encoded=False,
            error_message=None,
            models_count=None,
@@ -69,7 +70,7 @@ def load_data(name, path, **_):
                               wait_retry_secs=10):
             result = run_fn(ds, config)
             res = dict(result)
-            for name in ['predictions', 'truth', 'probabilities']:
+            for name in ['predictions', 'truth', 'probabilities', 'optional_columns']:
                 arr = result[name]
                 if arr is not None:
                     path = os.path.join(config.result_dir, '.'.join([name, 'data']))
diff --git a/frameworks/shared/caller.py b/frameworks/shared/caller.py
index da8cea0e5..09654dc32 100644
--- a/frameworks/shared/caller.py
+++ b/frameworks/shared/caller.py
@@ -149,7 +149,7 @@ def run_in_venv(caller_file, script_file: str, *args,
         if res.error_message is not None:
             raise NoResultError(res.error_message)
 
-        for name in ['predictions', 'truth', 'probabilities']:
+        for name in ['predictions', 'truth', 'probabilities', 'optional_columns']:
             res[name] = deserialize_data(res[name], config=ser_config) if res[name] is not None else None
 
         if callable(process_results):
@@ -164,6 +164,7 @@ def run_in_venv(caller_file, script_file: str, *args,
                                     else dataset.test.y),
                              probabilities=res.probabilities,
                              probabilities_labels=res.probabilities_labels,
+                             optional_columns=res.optional_columns,
                              target_is_encoded=res.target_is_encoded)
 
         return dict(
diff --git a/resources/benchmarks/timeseries.yaml b/resources/benchmarks/timeseries.yaml
new file mode 100644
index 000000000..26af06497
--- /dev/null
+++ b/resources/benchmarks/timeseries.yaml
@@ -0,0 +1,13 @@
+---
+
+- name: covid
+  dataset:
+    train: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv
+    test: https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv
+    target: ConfirmedCases
+    type: timeseries
+    forecast_range_in_steps: 19
+    id_column: name
+    timestamp_column: Date
+
+  folds: 1
diff --git a/resources/config.yaml b/resources/config.yaml
index ba3a9f930..0e237584e 100644
--- a/resources/config.yaml
+++ b/resources/config.yaml
@@ -54,6 +54,7 @@ benchmarks:                     # configuration namespace for the benchmarks def
     binary: ['auc', 'logloss', 'acc', 'balacc']     # available metrics: auc (AUC), acc (Accuracy), balacc (Balanced Accuracy), pr_auc (Precision Recall AUC), logloss (Log Loss), f1, f2, f05 (F-beta scores with beta=1, 2, or 0.5), max_pce, mean_pce (Max/Mean Per-Class Error).
     multiclass: ['logloss', 'acc', 'balacc']        # available metrics: same as for binary, except auc, replaced by auc_ovo (AUC One-vs-One), auc_ovr (AUC One-vs-Rest). AUC metrics and F-beta metrics are computed with weighted average.
     regression: ['rmse', 'r2', 'mae']               # available metrics: mae (Mean Absolute Error), mse (Mean Squared Error), msle (Mean Squared Logarithmic Error), rmse (Root Mean Square Error), rmsle (Root Mean Square Logarithmic Error), r2 (R^2).
+    timeseries: ['mase', 'mape', 'smape', 'rmse', 'mse', 'nrmse', 'wape', 'ncrps']
   defaults:            # the default constraints, usually overridden by a constraint.
     folds: 10          # the amount of fold-runs executed for each dataset.
     max_runtime_seconds: 3600   # default time allocated to the framework to train a model.
diff --git a/resources/frameworks.yaml b/resources/frameworks.yaml
index 9b0f14827..513c99586 100644
--- a/resources/frameworks.yaml
+++ b/resources/frameworks.yaml
@@ -86,9 +86,9 @@ autoxgboost:
 flaml:
   version: 'stable'
   description: |
-    FLAML is a lightweight Python library that finds accurate machine learning models 
-    automatically, efficiently and economically. It frees users from selecting learners 
-    and hyperparameters for each learner. It is fast and cheap. 
+    FLAML is a lightweight Python library that finds accurate machine learning models
+    automatically, efficiently and economically. It frees users from selecting learners
+    and hyperparameters for each learner. It is fast and cheap.
   project: https://github.com/microsoft/FLAML
   refs: [https://arxiv.org/pdf/1911.04706.pdf]
 
@@ -139,12 +139,12 @@ mljarsupervised_compete:
   description: "MLJAR is using 'Compete' mode to provide the most accurate predictor"
   params:
     mode: Compete   # set mode for Compete, default mode is Explain
-    
+
 MLNet:
   version: 'latest'
   description: |
     MLNET.CLI is a automated machine learning tool implemented by ml.net.
-    
+
 MLPlan:
   version: 'stable'
   abstract: true
@@ -191,7 +191,17 @@ TPOT:
 #    population_size: 25
 #    verbosity: 2
 
+####################################
+### TimeSeries AutoML frameworks ###
+####################################
 
+AutoGluonTS:
+  extends: AutoGluon
+  version: "stable"
+  description: |
+    AutoGluon-TimeSeries
+  setup_env:
+    MODULE: timeseries
 
 #######################################
 ### Non AutoML reference frameworks ###
@@ -234,4 +244,3 @@ TunedRandomForest:
 #    _n_jobs: 1  # cf. RandomForest
 #    _tuning:
 #      n_estimators: 500
-
diff --git a/resources/frameworks_latest.yaml b/resources/frameworks_latest.yaml
index d2e8afff3..becdc4e3e 100644
--- a/resources/frameworks_latest.yaml
+++ b/resources/frameworks_latest.yaml
@@ -80,8 +80,17 @@ oboe:
 TPOT:
   version: 'latest'
 
+####################################
+### TimeSeries AutoML frameworks ###
+####################################
 
-
+AutoGluonTS:
+  extends: AutoGluon
+  version: "latest"
+  description: |
+    AutoGluon-TimeSeries
+  setup_env:
+    MODULE: timeseries
 
 #######################################
 ### Non AutoML reference frameworks ###
@@ -104,4 +113,3 @@ TunedRandomForest:
   version: 'latest'
   params:
     n_estimators: 2000
-