-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update docs, use ipy format to write documents and add dataset module…
… with example synthetic datasets
- Loading branch information
1 parent
a1b3815
commit 927613b
Showing
37 changed files
with
2,690 additions
and
283 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,351 @@ | ||
# %% [markdown] | ||
# | ||
# # Composition of effects | ||
# | ||
# In previous examples, we saw how to create a simple custom effect, | ||
# which applies a simple transformation to the input data. However, the effect's | ||
# interface allows us to apply more complex transformations, such as using the output | ||
# of previous components as input for the current component, or creating a composite | ||
# effect that wraps an effect and applies some sort of transformation. This example | ||
# will cover these topics. | ||
# | ||
# ## Creating a custom effect | ||
# | ||
# The idea here is to create an effect that | ||
# 1. First, uses another effect and compute its output | ||
# 2. Scales the output of 1 by another effect and returns it | ||
# | ||
# One classic use-case for this would be using campaign or seasonality to scale | ||
# the effect of another input, that might be proportional to these effects. | ||
# Marketing investments are a good example of this. We will implement such a composite | ||
# effect in this section. | ||
# | ||
# ### Example dataset | ||
# | ||
# The dataset we use is synthetic, and the relation between the exogenous variable | ||
# and the target is known. However, let's pretend we don't know this relation, and | ||
# analize the data to find some insights that motivate the creation of a custom | ||
# effect. | ||
|
||
|
||
# %% | ||
|
||
from matplotlib import pyplot as plt | ||
from sktime.split import temporal_train_test_split | ||
from sktime.utils.plotting import plot_series | ||
|
||
from prophetverse.datasets.synthetic import load_composite_effect_example | ||
|
||
y, X = load_composite_effect_example() | ||
|
||
y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, test_size=365) | ||
|
||
display(y_train.head()) | ||
display(X_train.head()) | ||
|
||
plot_series(y_train, y_test, labels=["Train", "Test"], title="Target series") | ||
|
||
plot_series( | ||
X["investment"], | ||
markers=[None], | ||
labels=["investment"], | ||
title="Features", | ||
) | ||
plt.show() | ||
|
||
# %% [markdown] | ||
# | ||
# By plotting the years together, we can see that the target has a clear | ||
# yearly seasonality. | ||
|
||
# %% | ||
import matplotlib.dates as mdates | ||
|
||
fig, ax = plt.subplots() | ||
for year, g in y_train.groupby(y_train.index.year): | ||
idx = g.index.to_timestamp().map(lambda t: t.replace(year=2024)) | ||
ax.plot(idx, g, label=year, alpha=0.7) | ||
ax.legend() | ||
# Format by month name | ||
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b")) | ||
ax.set( | ||
title="Target series (grouped by year)", | ||
) | ||
fig.show() | ||
|
||
|
||
# %% [markdown] | ||
# In addition, we also see a lot of peaks and oscilations that seem to be related | ||
# to the investment variable and to campaign variable. Below, we detrend and | ||
# deseasonalize the target to see if we can have an intuition of the relation between | ||
# the exogenous variables and these oscillations. We highlight dates where we have | ||
# a campaign dummy. | ||
# %% | ||
|
||
from sktime.forecasting.trend import PolynomialTrendForecaster | ||
from sktime.transformations.compose import TransformerPipeline | ||
from sktime.transformations.series.detrend import Deseasonalizer, Detrender | ||
|
||
transformer = TransformerPipeline( | ||
steps=[ | ||
("detrend", Detrender(PolynomialTrendForecaster(degree=2))), | ||
("deseasonalize", Deseasonalizer(sp=365)), | ||
] | ||
) | ||
|
||
cleaned = transformer.fit_transform(y_train) | ||
|
||
campaign_mask = X_train["campaign"] > 0 | ||
|
||
fig, ax = plt.subplots() | ||
ax.scatter(X_train["investment"], cleaned, label="Data point") | ||
ax.scatter( | ||
X_train["investment"][campaign_mask], | ||
cleaned[campaign_mask], | ||
s=4, | ||
label="Day with campaign", | ||
) | ||
ax.set( | ||
xlabel="investment", | ||
ylabel="Detrended and deseasonalized target", | ||
title="Scatter plot of the investment vs target (wo trend and seasonality)", | ||
) | ||
ax.legend() | ||
fig.show() | ||
|
||
|
||
# %% [markdown] | ||
# As we can see, the target seems to be proportional to the investment, and dates | ||
# with campaign seem to have an slope higher than the dates without campaign. | ||
# This is a good motivation to try to capture the interaction between two variables. | ||
# We first fit a simple Prophetverse model to the data | ||
|
||
|
||
# %% | ||
from prophetverse.effects import LinearEffect | ||
from prophetverse.effects.fourier import LinearFourierSeasonality | ||
from prophetverse.effects.trend import PiecewiseLinearTrend | ||
from prophetverse.engine import MAPInferenceEngine | ||
from prophetverse.sktime import Prophetverse | ||
from prophetverse.utils.regex import exact, no_input_columns | ||
|
||
model = Prophetverse( | ||
trend=PiecewiseLinearTrend( | ||
changepoint_interval=500, | ||
changepoint_prior_scale=0.00001, | ||
changepoint_range=-500, | ||
), | ||
exogenous_effects=[ | ||
( | ||
"seasonality", | ||
LinearFourierSeasonality( | ||
freq="D", | ||
sp_list=[365.25], | ||
fourier_terms_list=[5], | ||
prior_scale=0.1, | ||
effect_mode="multiplicative", | ||
), | ||
None, | ||
), | ||
( | ||
"campaign", | ||
LinearEffect("additive"), | ||
exact("campaign"), | ||
), | ||
( | ||
"investment", | ||
LinearEffect("additive"), | ||
exact("investment"), | ||
), | ||
], | ||
default_effect=LinearEffect("additive"), | ||
inference_engine=MAPInferenceEngine( | ||
num_steps=50_000, | ||
), | ||
) | ||
|
||
model.fit(y=y_train, X=X_train) | ||
|
||
y_pred = model.predict(X=X_test, fh=y_test.index) | ||
|
||
# %% | ||
|
||
plot_series(y_train, y_test, y_pred, labels=["Train", "Test", "Pred"], | ||
title="Target series") | ||
plt.show() | ||
# %% | ||
from typing import Any, Dict, List | ||
|
||
import jax.numpy as jnp | ||
import pandas as pd | ||
|
||
from prophetverse.effects.base import BaseEffect | ||
|
||
|
||
class WrapEffectAndScaleByAnother(BaseEffect): | ||
"""Wrap an effect and scale it by another effect. | ||
|
||
Parameters | ||
---------- | ||
effect : BaseEffect | ||
The effect to wrap. | ||
|
||
""" | ||
|
||
_tags = {"skip_predict_if_no_match": False, "supports_multivariate": False} | ||
|
||
def __init__( | ||
self, | ||
effect: BaseEffect, | ||
base_effect_name: str, | ||
): | ||
|
||
self.effect = effect | ||
self.base_effect_name = base_effect_name | ||
|
||
super().__init__() | ||
|
||
self.clone_tags(effect) | ||
|
||
def _fit(self, y: pd.DataFrame, X: pd.DataFrame, scale: float = 1): | ||
"""Initialize the effect. | ||
|
||
This method is called during `fit()` of the forecasting model. | ||
It receives the Exogenous variables DataFrame and should be used to initialize | ||
any necessary parameters or data structures, such as detecting the columns that | ||
match the regex pattern. | ||
|
||
This method MUST set _input_feature_columns_names to a list of column names | ||
|
||
Parameters | ||
---------- | ||
y : pd.DataFrame | ||
The timeseries dataframe | ||
|
||
X : pd.DataFrame | ||
The DataFrame to initialize the effect. | ||
|
||
scale : float, optional | ||
The scale of the timeseries. For multivariate timeseries, this is | ||
a dataframe. For univariate, it is a simple float. | ||
|
||
Returns | ||
------- | ||
None | ||
""" | ||
self.effect.fit(X=X, y=y, scale=scale) | ||
|
||
def _transform(self, X: pd.DataFrame, fh: pd.Index) -> Dict[str, Any]: | ||
"""Prepare input data to be passed to numpyro model. | ||
|
||
Returns a dictionary with the data for the lift and for the inner effect. | ||
|
||
Parameters | ||
---------- | ||
X : pd.DataFrame | ||
The input DataFrame containing the exogenous variables for the training | ||
time indexes, if passed during fit, or for the forecasting time indexes, if | ||
passed during predict. | ||
|
||
fh : pd.Index | ||
The forecasting horizon as a pandas Index. | ||
|
||
Returns | ||
------- | ||
Dict[str, Any] | ||
Dictionary with data for the lift and for the inner effect | ||
""" | ||
return self.effect.transform(X=X, fh=fh) | ||
|
||
def _predict( | ||
self, data: Dict, predicted_effects: Dict[str, jnp.ndarray] | ||
) -> jnp.ndarray: | ||
"""Apply and return the effect values. | ||
|
||
Parameters | ||
---------- | ||
data : Any | ||
Data obtained from the transformed method. | ||
|
||
predicted_effects : Dict[str, jnp.ndarray], optional | ||
A dictionary containing the predicted effects, by default None. | ||
|
||
Returns | ||
------- | ||
jnp.ndarray | ||
An array with shape (T,1) for univariate timeseries. | ||
""" | ||
out = self.effect.predict( | ||
data=data, predicted_effects=predicted_effects | ||
) | ||
|
||
base_effect = predicted_effects[self.base_effect_name] | ||
return base_effect * out | ||
|
||
@property | ||
def input_feature_column_names(self) -> List[str]: | ||
"""Return the input feature columns names.""" | ||
return self.effect.input_feature_column_names | ||
|
||
|
||
# %% | ||
|
||
|
||
import numpyro.distributions as dist | ||
|
||
from prophetverse.engine.optimizer import AdamOptimizer | ||
|
||
model = Prophetverse( | ||
trend=PiecewiseLinearTrend( | ||
changepoint_interval=500, | ||
changepoint_prior_scale=0.00001, | ||
changepoint_range=-500, | ||
), | ||
exogenous_effects=[ | ||
( | ||
"seasonality", | ||
LinearFourierSeasonality( | ||
freq="D", | ||
sp_list=[365.25], | ||
fourier_terms_list=[5], | ||
prior_scale=0.1, | ||
effect_mode="multiplicative", | ||
), | ||
no_input_columns, | ||
), | ||
( | ||
"campaign", | ||
LinearEffect("additive"), | ||
exact("campaign"), | ||
), | ||
( | ||
"investment", | ||
LinearEffect("additive"), | ||
exact("investment"), | ||
), | ||
( | ||
"investment_campaign", | ||
WrapEffectAndScaleByAnother( | ||
effect=LinearEffect("additive", prior=dist.HalfNormal(10)), | ||
base_effect_name="campaign", | ||
), | ||
exact("investment"), | ||
), | ||
], | ||
inference_engine=MAPInferenceEngine( | ||
num_steps=50_000, | ||
), | ||
) | ||
|
||
model.fit(y=y_train, X=X_train) | ||
y_pred_composite = model.predict(X=X_test, fh=y_test.index) | ||
|
||
# %% | ||
plot_series( | ||
y_train, y_test, y_pred, labels=["Train", "Test", "Pred"], title="Target series" | ||
) | ||
|
||
# %% | ||
|
||
plot_series( | ||
y_test, y_pred, y_pred_composite, labels=["Test", "Pred", "Pred composite"], title="Target series") |
Oops, something went wrong.