Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEAT] Added TSMixerx model #921

Merged
merged 6 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion nbs/core.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
" MLP, NHITS, NBEATS, NBEATSx, DLinear, NLinear,\n",
" TFT, VanillaTransformer,\n",
" Informer, Autoformer, FEDformer,\n",
" StemGNN, PatchTST, TimesNet, TimeLLM, TSMixer\n",
" StemGNN, PatchTST, TimesNet, TimeLLM, TSMixer, TSMixerx\n",
")"
]
},
Expand Down Expand Up @@ -225,6 +225,8 @@
" 'vanillatransformer': VanillaTransformer, 'autovanillatransformer': VanillaTransformer,\n",
" 'timellm': TimeLLM,\n",
" 'tsmixer': TSMixer, 'autotsmixer': TSMixer,\n",
" 'tsmixerx': TSMixerx, 'autotsmixerx': TSMixerx,\n",
"\n",
"}"
]
},
Expand Down Expand Up @@ -1330,6 +1332,7 @@
"\n",
"from neuralforecast.models.stemgnn import StemGNN\n",
"from neuralforecast.models.tsmixer import TSMixer\n",
"from neuralforecast.models.tsmixerx import TSMixerx\n",
"\n",
"from neuralforecast.losses.pytorch import MQLoss, MAE, MSE\n",
"from neuralforecast.utils import AirPassengersDF, AirPassengersPanel, AirPassengersStatic\n",
Expand Down Expand Up @@ -1775,6 +1778,8 @@
" PatchTST(h=12, input_size=24, max_steps=1),\n",
" TimesNet(h=12, input_size=24, max_steps=1),\n",
" StemGNN(h=12, input_size=24, n_series=2, max_steps=1, scaler_type='robust'),\n",
" TSMixer(h=12, input_size=24, n_series=2, max_steps=1, scaler_type='robust'),\n",
" TSMixerx(h=12, input_size=24, n_series=2, max_steps=1, scaler_type='robust'),\n",
" ],\n",
" freq='M'\n",
")\n",
Expand Down Expand Up @@ -1897,6 +1902,8 @@
" PatchTST(h=12, input_size=24, max_steps=1, scaler_type=None),\n",
" TimesNet(h=12, input_size=24, max_steps=1, scaler_type='standard'),\n",
" StemGNN(h=12, input_size=12, n_series=2, max_steps=1, scaler_type='robust'),\n",
" TSMixer(h=12, input_size=12, n_series=2, max_steps=1, scaler_type='robust'),\n",
" TSMixerx(h=12, input_size=12, n_series=2, max_steps=1, scaler_type='robust'),\n",
" DeepAR(h=12, input_size=24, max_steps=1,\n",
" stat_exog_list=['airline1'], futr_exog_list=['trend']),\n",
" ],\n",
Expand Down Expand Up @@ -1933,6 +1940,8 @@
" PatchTST(h=12, input_size=24, max_steps=1, scaler_type=None),\n",
" TimesNet(h=12, input_size=24, max_steps=1, scaler_type='standard'),\n",
" StemGNN(h=12, input_size=12, n_series=2, max_steps=1, scaler_type='robust'),\n",
" TSMixer(h=12, input_size=12, n_series=2, max_steps=1, scaler_type='robust'),\n",
" TSMixerx(h=12, input_size=12, n_series=2, max_steps=1, scaler_type='robust'),\n",
" DeepAR(h=12, input_size=24, max_steps=1,\n",
" stat_exog_list=['airline1'], futr_exog_list=['trend']),\n",
" ],\n",
Expand Down
184 changes: 99 additions & 85 deletions nbs/examples/Multivariate_with_TSMixer.ipynb

Large diffs are not rendered by default.

Binary file added nbs/imgs_models/tsmixerx.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
103 changes: 103 additions & 0 deletions nbs/models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
"from neuralforecast.models.stemgnn import StemGNN\n",
"from neuralforecast.models.hint import HINT\n",
"from neuralforecast.models.tsmixer import TSMixer\n",
"from neuralforecast.models.tsmixerx import TSMixerx\n",
"\n",
"from neuralforecast.losses.pytorch import MAE, MQLoss, DistributionLoss"
]
Expand Down Expand Up @@ -2503,6 +2504,108 @@
"y_hat = model.predict(dataset=dataset)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "640fbbc2",
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"class AutoTSMixerx(BaseAuto):\n",
"\n",
" default_config = {\n",
" \"input_size_multiplier\": [1, 2, 3, 4],\n",
" \"h\": None,\n",
" \"n_series\": None,\n",
" \"n_block\": tune.choice([1, 2, 4, 6, 8]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-2),\n",
" \"ff_dim\": tune.choice([32, 64, 128]),\n",
" \"scaler_type\": tune.choice(['identity', 'robust', 'standard']),\n",
" \"max_steps\": tune.choice([500, 1000, 2000]),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"dropout\": tune.uniform(0.0, 0.99),\n",
" \"loss\": None,\n",
" \"random_seed\": tune.randint(1, 20),\n",
" }\n",
"\n",
" def __init__(self,\n",
" h,\n",
" n_series,\n",
" loss=MAE(),\n",
" valid_loss=None,\n",
" config=None, \n",
" search_alg=BasicVariantGenerator(random_state=1),\n",
" num_samples=10,\n",
" refit_with_val=False,\n",
" cpus=cpu_count(),\n",
" gpus=torch.cuda.device_count(),\n",
" verbose=False,\n",
" alias=None,\n",
" backend='ray',\n",
" callbacks=None):\n",
" \n",
" # Define search space, input/output sizes\n",
" if config is None:\n",
" config = self.default_config.copy() \n",
" config['input_size'] = tune.choice([h*x \\\n",
" for x in self.default_config[\"input_size_multiplier\"]])\n",
"\n",
" # Rolling windows with step_size=1 or step_size=h\n",
" # See `BaseWindows` and `BaseRNN`'s create_windows\n",
" config['step_size'] = tune.choice([1, h])\n",
" del config[\"input_size_multiplier\"]\n",
" if backend == 'optuna':\n",
" config = self._ray_config_to_optuna(config) \n",
"\n",
" # Always use n_series from parameters\n",
" config['n_series'] = n_series\n",
"\n",
" super(AutoTSMixerx, self).__init__(\n",
" cls_model=TSMixerx, \n",
" h=h,\n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" config=config,\n",
" search_alg=search_alg,\n",
" num_samples=num_samples, \n",
" refit_with_val=refit_with_val,\n",
" cpus=cpus,\n",
" gpus=gpus,\n",
" verbose=verbose,\n",
" alias=alias,\n",
" backend=backend,\n",
" callbacks=callbacks, \n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe839643",
"metadata": {},
"outputs": [],
"source": [
"show_doc(AutoTSMixerx, title_level=3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "475c8c68",
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"# Use your own config or AutoTSMixer.default_config\n",
"config = dict(max_steps=1, val_check_steps=1, input_size=12)\n",
"model = AutoTSMixerx(h=12, n_series=1, config=config, num_samples=1, cpus=1)\n",
"\n",
"# Fit and predict\n",
"model.fit(dataset=dataset)\n",
"y_hat = model.predict(dataset=dataset)"
]
},
{
"attachments": {},
"cell_type": "markdown",
Expand Down
109 changes: 72 additions & 37 deletions nbs/models.tsmixer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
"metadata": {},
"source": [
"# TSMixer\n",
"> Time-Series Mixer (`TSMixer`) is a MLP-based multivariate time-series forecasting model. `TSMixer` jointly learns temporal and cross-sectional representations of the time-series by repeatedly combining time- and feature information using stacked mixing layers. A mixing layer consists of a sequential time- and feature Multi Layer Perceptron (`MLP`).\n",
"> Time-Series Mixer (`TSMixer`) is a MLP-based multivariate time-series forecasting model. `TSMixer` jointly learns temporal and cross-sectional representations of the time-series by repeatedly combining time- and feature information using stacked mixing layers. A mixing layer consists of a sequential time- and feature Multi Layer Perceptron (`MLP`). Note: this model cannot handle exogenous inputs. If you want to use additional exogenous inputs, use `TSMixerx`.\n",
"\n",
"<br><br>**References**<br>-[Chen, Si-An, Chun-Liang Li, Nate Yoder, Sercan O. Arik, and Tomas Pfister (2023). \"TSMixer: An All-MLP Architecture for Time Series Forecasting.\"](http://arxiv.org/abs/2303.06053)<br>"
]
},
Expand Down Expand Up @@ -84,49 +85,67 @@
"outputs": [],
"source": [
"#| exporti\n",
"class MixingLayer(nn.Module):\n",
" def __init__(self, n_series, input_size, dropout, ff_dim):\n",
"class TemporalMixing(nn.Module):\n",
" def __init__(self, n_series, input_size, dropout):\n",
" super().__init__()\n",
" # Normalization layers\n",
" self.temporal_norm = nn.BatchNorm1d(num_features=n_series * input_size, eps=0.001, momentum=0.01)\n",
" self.feature_norm = nn.BatchNorm1d(num_features=n_series * input_size, eps=0.001, momentum=0.01)\n",
" \n",
" # Linear layers\n",
" self.temporal_lin = nn.Linear(input_size, input_size)\n",
" self.temporal_drop = nn.Dropout(dropout)\n",
"\n",
" def forward(self, input):\n",
" # Get shapes\n",
" batch_size = input.shape[0]\n",
" input_size = input.shape[1]\n",
" n_series = input.shape[2]\n",
"\n",
" # Temporal MLP\n",
" x = input.permute(0, 2, 1) # [B, L, N] -> [B, N, L]\n",
" x = x.reshape(batch_size, -1) # [B, N, L] -> [B, N * L]\n",
" x = self.temporal_norm(x) # [B, N * L] -> [B, N * L]\n",
" x = x.reshape(batch_size, n_series, input_size) # [B, N * L] -> [B, N, L]\n",
" x = F.relu(self.temporal_lin(x)) # [B, N, L] -> [B, N, L]\n",
" x = x.permute(0, 2, 1) # [B, N, L] -> [B, L, N]\n",
" x = self.temporal_drop(x) # [B, L, N] -> [B, L, N]\n",
"\n",
" return x + input \n",
"\n",
"class FeatureMixing(nn.Module):\n",
" def __init__(self, n_series, input_size, dropout, ff_dim):\n",
" super().__init__()\n",
" self.feature_norm = nn.BatchNorm1d(num_features=n_series * input_size, eps=0.001, momentum=0.01)\n",
" self.feature_lin_1 = nn.Linear(n_series, ff_dim)\n",
" self.feature_lin_2 = nn.Linear(ff_dim, n_series)\n",
"\n",
" # Drop out layers\n",
" self.temporal_drop = nn.Dropout(dropout)\n",
" self.feature_drop_1 = nn.Dropout(dropout)\n",
" self.feature_drop_2 = nn.Dropout(dropout)\n",
"\n",
" def forward(self, input):\n",
" # Get shapes\n",
" batch_size = input.shape[0]\n",
" n_series = input.shape[1]\n",
" input_size = input.shape[2]\n",
"\n",
" # Temporal MLP\n",
" x = input.reshape(batch_size, -1)\n",
" x = self.temporal_norm(x)\n",
" x = x.reshape(batch_size, input_size, n_series)\n",
" x = F.relu(self.temporal_lin(x))\n",
" x = x.permute(0, 2, 1)\n",
" x = self.temporal_drop(x)\n",
" res = x + input\n",
" input_size = input.shape[1]\n",
" n_series = input.shape[2]\n",
"\n",
" # Feature MLP\n",
" x = res.reshape(batch_size, -1)\n",
" x = self.feature_norm(x)\n",
" x = x.reshape(batch_size, input_size, n_series)\n",
" x = x.permute(0, 2, 1)\n",
" x = F.relu(self.feature_lin_1(x))\n",
" x = self.feature_drop_1(x)\n",
" x = self.feature_lin_2(x)\n",
" x = self.feature_drop_2(x)\n",
" x = input.reshape(batch_size, -1) # [B, L, N] -> [B, L * N]\n",
" x = self.feature_norm(x) # [B, L * N] -> [B, L * N]\n",
" x = x.reshape(batch_size, input_size, n_series) # [B, L * N] -> [B, L, N]\n",
" x = F.relu(self.feature_lin_1(x)) # [B, L, N] -> [B, L, ff_dim]\n",
" x = self.feature_drop_1(x) # [B, L, ff_dim] -> [B, L, ff_dim]\n",
" x = self.feature_lin_2(x) # [B, L, ff_dim] -> [B, L, N]\n",
" x = self.feature_drop_2(x) # [B, L, N] -> [B, L, N]\n",
"\n",
" return x + input \n",
"\n",
" return x + res"
"class MixingLayer(nn.Module):\n",
" def __init__(self, n_series, input_size, dropout, ff_dim):\n",
" super().__init__()\n",
" # Mixing layer consists of a temporal and feature mixer\n",
" self.temporal_mixer = TemporalMixing(n_series, input_size, dropout)\n",
" self.feature_mixer = FeatureMixing(n_series, input_size, dropout, ff_dim)\n",
"\n",
" def forward(self, input):\n",
" x = self.temporal_mixer(input)\n",
" x = self.feature_mixer(x)\n",
" return x"
]
},
{
Expand All @@ -145,10 +164,11 @@
"source": [
"#| exporti\n",
"class ReversibleInstanceNorm1d(nn.Module):\n",
" def __init__(self, num_features, eps=1e-5):\n",
" def __init__(self, n_series, eps=1e-5):\n",
" super().__init__()\n",
" self.weight = nn.Parameter(torch.ones(num_features))\n",
" self.bias = nn.Parameter(torch.zeros(num_features))\n",
" self.weight = nn.Parameter(torch.ones((1, 1, n_series)))\n",
" self.bias = nn.Parameter(torch.zeros((1, 1, n_series)))\n",
"\n",
" self.eps = eps\n",
"\n",
" def forward(self, x):\n",
Expand All @@ -174,6 +194,13 @@
" return x"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Model"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -267,10 +294,18 @@
" num_workers_loader=num_workers_loader,\n",
" drop_last_loader=drop_last_loader,\n",
" **trainer_kwargs)\n",
" # Asserts\n",
" if stat_exog_list is not None:\n",
" raise Exception(\"TSMixer does not support static exogenous variables. Use TSMixerx if you want to use static exogenous variables.\")\n",
" if futr_exog_list is not None:\n",
" raise Exception(\"TSMixer does not support future exogenous variables. Use TSMixerx if you want to use future exogenous variables.\")\n",
" if hist_exog_list is not None:\n",
" raise Exception(\"TSMixer does not support historical exogenous variables. Use TSMixerx if you want to use historical exogenous variables.\") \n",
"\n",
" # Reversible InstanceNormalization layer\n",
" self.revin = revin\n",
" if self.revin:\n",
" self.norm = ReversibleInstanceNorm1d(num_features = n_series)\n",
" self.norm = ReversibleInstanceNorm1d(n_series = n_series)\n",
"\n",
" # Mixing layers\n",
" mixing_layers = [MixingLayer(n_series=n_series, \n",
Expand Down Expand Up @@ -313,7 +348,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Usage Examples"
"## 3. Usage Examples"
]
},
{
Expand Down Expand Up @@ -349,9 +384,9 @@
" dropout=0,\n",
" revin=True,\n",
" scaler_type='standard',\n",
" max_steps=80,\n",
" max_steps=200,\n",
" early_stop_patience_steps=-1,\n",
" val_check_steps=10,\n",
" val_check_steps=5,\n",
" learning_rate=1e-3,\n",
" loss=MAE(),\n",
" valid_loss=MAE(),\n",
Expand Down
Loading
Loading