Skip to content

Commit

Permalink
Update docs, use ipy format to write documents and add dataset module…
Browse files Browse the repository at this point in the history
… with example synthetic datasets
  • Loading branch information
felipeangelimvieira committed Dec 3, 2024
1 parent a1b3815 commit 927613b
Show file tree
Hide file tree
Showing 37 changed files with 2,690 additions and 283 deletions.
351 changes: 351 additions & 0 deletions docs/howto/composite-exogenous-effects/index copy.ipy
Original file line number Diff line number Diff line change
@@ -0,0 +1,351 @@
# %% [markdown]
#
# # Composition of effects
#
# In previous examples, we saw how to create a simple custom effect,
# which applies a simple transformation to the input data. However, the effect's
# interface allows us to apply more complex transformations, such as using the output
# of previous components as input for the current component, or creating a composite
# effect that wraps an effect and applies some sort of transformation. This example
# will cover these topics.
#
# ## Creating a custom effect
#
# The idea here is to create an effect that
# 1. First, uses another effect and compute its output
# 2. Scales the output of 1 by another effect and returns it
#
# One classic use-case for this would be using campaign or seasonality to scale
# the effect of another input, that might be proportional to these effects.
# Marketing investments are a good example of this. We will implement such a composite
# effect in this section.
#
# ### Example dataset
#
# The dataset we use is synthetic, and the relation between the exogenous variable
# and the target is known. However, let's pretend we don't know this relation, and
# analize the data to find some insights that motivate the creation of a custom
# effect.


# %%

from matplotlib import pyplot as plt
from sktime.split import temporal_train_test_split
from sktime.utils.plotting import plot_series

from prophetverse.datasets.synthetic import load_composite_effect_example

y, X = load_composite_effect_example()

y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, test_size=365)

display(y_train.head())
display(X_train.head())

plot_series(y_train, y_test, labels=["Train", "Test"], title="Target series")

plot_series(
X["investment"],
markers=[None],
labels=["investment"],
title="Features",
)
plt.show()

# %% [markdown]
#
# By plotting the years together, we can see that the target has a clear
# yearly seasonality.

# %%
import matplotlib.dates as mdates

fig, ax = plt.subplots()
for year, g in y_train.groupby(y_train.index.year):
idx = g.index.to_timestamp().map(lambda t: t.replace(year=2024))
ax.plot(idx, g, label=year, alpha=0.7)
ax.legend()
# Format by month name
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b"))
ax.set(
title="Target series (grouped by year)",
)
fig.show()


# %% [markdown]
# In addition, we also see a lot of peaks and oscilations that seem to be related
# to the investment variable and to campaign variable. Below, we detrend and
# deseasonalize the target to see if we can have an intuition of the relation between
# the exogenous variables and these oscillations. We highlight dates where we have
# a campaign dummy.
# %%

from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.transformations.compose import TransformerPipeline
from sktime.transformations.series.detrend import Deseasonalizer, Detrender

transformer = TransformerPipeline(
steps=[
("detrend", Detrender(PolynomialTrendForecaster(degree=2))),
("deseasonalize", Deseasonalizer(sp=365)),
]
)

cleaned = transformer.fit_transform(y_train)

campaign_mask = X_train["campaign"] > 0

fig, ax = plt.subplots()
ax.scatter(X_train["investment"], cleaned, label="Data point")
ax.scatter(
X_train["investment"][campaign_mask],
cleaned[campaign_mask],
s=4,
label="Day with campaign",
)
ax.set(
xlabel="investment",
ylabel="Detrended and deseasonalized target",
title="Scatter plot of the investment vs target (wo trend and seasonality)",
)
ax.legend()
fig.show()


# %% [markdown]
# As we can see, the target seems to be proportional to the investment, and dates
# with campaign seem to have an slope higher than the dates without campaign.
# This is a good motivation to try to capture the interaction between two variables.
# We first fit a simple Prophetverse model to the data


# %%
from prophetverse.effects import LinearEffect
from prophetverse.effects.fourier import LinearFourierSeasonality
from prophetverse.effects.trend import PiecewiseLinearTrend
from prophetverse.engine import MAPInferenceEngine
from prophetverse.sktime import Prophetverse
from prophetverse.utils.regex import exact, no_input_columns

model = Prophetverse(
trend=PiecewiseLinearTrend(
changepoint_interval=500,
changepoint_prior_scale=0.00001,
changepoint_range=-500,
),
exogenous_effects=[
(
"seasonality",
LinearFourierSeasonality(
freq="D",
sp_list=[365.25],
fourier_terms_list=[5],
prior_scale=0.1,
effect_mode="multiplicative",
),
None,
),
(
"campaign",
LinearEffect("additive"),
exact("campaign"),
),
(
"investment",
LinearEffect("additive"),
exact("investment"),
),
],
default_effect=LinearEffect("additive"),
inference_engine=MAPInferenceEngine(
num_steps=50_000,
),
)

model.fit(y=y_train, X=X_train)

y_pred = model.predict(X=X_test, fh=y_test.index)

# %%

plot_series(y_train, y_test, y_pred, labels=["Train", "Test", "Pred"],
title="Target series")
plt.show()
# %%
from typing import Any, Dict, List

import jax.numpy as jnp
import pandas as pd

from prophetverse.effects.base import BaseEffect


class WrapEffectAndScaleByAnother(BaseEffect):
"""Wrap an effect and scale it by another effect.

Parameters
----------
effect : BaseEffect
The effect to wrap.

"""

_tags = {"skip_predict_if_no_match": False, "supports_multivariate": False}

def __init__(
self,
effect: BaseEffect,
base_effect_name: str,
):

self.effect = effect
self.base_effect_name = base_effect_name

super().__init__()

self.clone_tags(effect)

def _fit(self, y: pd.DataFrame, X: pd.DataFrame, scale: float = 1):
"""Initialize the effect.

This method is called during `fit()` of the forecasting model.
It receives the Exogenous variables DataFrame and should be used to initialize
any necessary parameters or data structures, such as detecting the columns that
match the regex pattern.

This method MUST set _input_feature_columns_names to a list of column names

Parameters
----------
y : pd.DataFrame
The timeseries dataframe

X : pd.DataFrame
The DataFrame to initialize the effect.

scale : float, optional
The scale of the timeseries. For multivariate timeseries, this is
a dataframe. For univariate, it is a simple float.

Returns
-------
None
"""
self.effect.fit(X=X, y=y, scale=scale)

def _transform(self, X: pd.DataFrame, fh: pd.Index) -> Dict[str, Any]:
"""Prepare input data to be passed to numpyro model.

Returns a dictionary with the data for the lift and for the inner effect.

Parameters
----------
X : pd.DataFrame
The input DataFrame containing the exogenous variables for the training
time indexes, if passed during fit, or for the forecasting time indexes, if
passed during predict.

fh : pd.Index
The forecasting horizon as a pandas Index.

Returns
-------
Dict[str, Any]
Dictionary with data for the lift and for the inner effect
"""
return self.effect.transform(X=X, fh=fh)

def _predict(
self, data: Dict, predicted_effects: Dict[str, jnp.ndarray]
) -> jnp.ndarray:
"""Apply and return the effect values.

Parameters
----------
data : Any
Data obtained from the transformed method.

predicted_effects : Dict[str, jnp.ndarray], optional
A dictionary containing the predicted effects, by default None.

Returns
-------
jnp.ndarray
An array with shape (T,1) for univariate timeseries.
"""
out = self.effect.predict(
data=data, predicted_effects=predicted_effects
)

base_effect = predicted_effects[self.base_effect_name]
return base_effect * out

@property
def input_feature_column_names(self) -> List[str]:
"""Return the input feature columns names."""
return self.effect.input_feature_column_names


# %%


import numpyro.distributions as dist

from prophetverse.engine.optimizer import AdamOptimizer

model = Prophetverse(
trend=PiecewiseLinearTrend(
changepoint_interval=500,
changepoint_prior_scale=0.00001,
changepoint_range=-500,
),
exogenous_effects=[
(
"seasonality",
LinearFourierSeasonality(
freq="D",
sp_list=[365.25],
fourier_terms_list=[5],
prior_scale=0.1,
effect_mode="multiplicative",
),
no_input_columns,
),
(
"campaign",
LinearEffect("additive"),
exact("campaign"),
),
(
"investment",
LinearEffect("additive"),
exact("investment"),
),
(
"investment_campaign",
WrapEffectAndScaleByAnother(
effect=LinearEffect("additive", prior=dist.HalfNormal(10)),
base_effect_name="campaign",
),
exact("investment"),
),
],
inference_engine=MAPInferenceEngine(
num_steps=50_000,
),
)

model.fit(y=y_train, X=X_train)
y_pred_composite = model.predict(X=X_test, fh=y_test.index)

# %%
plot_series(
y_train, y_test, y_pred, labels=["Train", "Test", "Pred"], title="Target series"
)

# %%

plot_series(
y_test, y_pred, y_pred_composite, labels=["Test", "Pred", "Pred composite"], title="Target series")
Loading

0 comments on commit 927613b

Please sign in to comment.