diff --git a/docs/howto/composite-exogenous-effects/index copy.ipy b/docs/howto/composite-exogenous-effects/index copy.ipy new file mode 100644 index 0000000..31638dc --- /dev/null +++ b/docs/howto/composite-exogenous-effects/index copy.ipy @@ -0,0 +1,351 @@ +# %% [markdown] +# +# # Composition of effects +# +# In previous examples, we saw how to create a simple custom effect, +# which applies a simple transformation to the input data. However, the effect's +# interface allows us to apply more complex transformations, such as using the output +# of previous components as input for the current component, or creating a composite +# effect that wraps an effect and applies some sort of transformation. This example +# will cover these topics. +# +# ## Creating a custom effect +# +# The idea here is to create an effect that +# 1. First, uses another effect and compute its output +# 2. Scales the output of 1 by another effect and returns it +# +# One classic use-case for this would be using campaign or seasonality to scale +# the effect of another input, that might be proportional to these effects. +# Marketing investments are a good example of this. We will implement such a composite +# effect in this section. +# +# ### Example dataset +# +# The dataset we use is synthetic, and the relation between the exogenous variable +# and the target is known. However, let's pretend we don't know this relation, and +# analize the data to find some insights that motivate the creation of a custom +# effect. + + +# %% + +from matplotlib import pyplot as plt +from sktime.split import temporal_train_test_split +from sktime.utils.plotting import plot_series + +from prophetverse.datasets.synthetic import load_composite_effect_example + +y, X = load_composite_effect_example() + +y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, test_size=365) + +display(y_train.head()) +display(X_train.head()) + +plot_series(y_train, y_test, labels=["Train", "Test"], title="Target series") + +plot_series( + X["investment"], + markers=[None], + labels=["investment"], + title="Features", +) +plt.show() + +# %% [markdown] +# +# By plotting the years together, we can see that the target has a clear +# yearly seasonality. + +# %% +import matplotlib.dates as mdates + +fig, ax = plt.subplots() +for year, g in y_train.groupby(y_train.index.year): + idx = g.index.to_timestamp().map(lambda t: t.replace(year=2024)) + ax.plot(idx, g, label=year, alpha=0.7) +ax.legend() +# Format by month name +ax.xaxis.set_major_formatter(mdates.DateFormatter("%b")) +ax.set( + title="Target series (grouped by year)", +) +fig.show() + + +# %% [markdown] +# In addition, we also see a lot of peaks and oscilations that seem to be related +# to the investment variable and to campaign variable. Below, we detrend and +# deseasonalize the target to see if we can have an intuition of the relation between +# the exogenous variables and these oscillations. We highlight dates where we have +# a campaign dummy. +# %% + +from sktime.forecasting.trend import PolynomialTrendForecaster +from sktime.transformations.compose import TransformerPipeline +from sktime.transformations.series.detrend import Deseasonalizer, Detrender + +transformer = TransformerPipeline( + steps=[ + ("detrend", Detrender(PolynomialTrendForecaster(degree=2))), + ("deseasonalize", Deseasonalizer(sp=365)), + ] +) + +cleaned = transformer.fit_transform(y_train) + +campaign_mask = X_train["campaign"] > 0 + +fig, ax = plt.subplots() +ax.scatter(X_train["investment"], cleaned, label="Data point") +ax.scatter( + X_train["investment"][campaign_mask], + cleaned[campaign_mask], + s=4, + label="Day with campaign", +) +ax.set( + xlabel="investment", + ylabel="Detrended and deseasonalized target", + title="Scatter plot of the investment vs target (wo trend and seasonality)", +) +ax.legend() +fig.show() + + +# %% [markdown] +# As we can see, the target seems to be proportional to the investment, and dates +# with campaign seem to have an slope higher than the dates without campaign. +# This is a good motivation to try to capture the interaction between two variables. +# We first fit a simple Prophetverse model to the data + + +# %% +from prophetverse.effects import LinearEffect +from prophetverse.effects.fourier import LinearFourierSeasonality +from prophetverse.effects.trend import PiecewiseLinearTrend +from prophetverse.engine import MAPInferenceEngine +from prophetverse.sktime import Prophetverse +from prophetverse.utils.regex import exact, no_input_columns + +model = Prophetverse( + trend=PiecewiseLinearTrend( + changepoint_interval=500, + changepoint_prior_scale=0.00001, + changepoint_range=-500, + ), + exogenous_effects=[ + ( + "seasonality", + LinearFourierSeasonality( + freq="D", + sp_list=[365.25], + fourier_terms_list=[5], + prior_scale=0.1, + effect_mode="multiplicative", + ), + None, + ), + ( + "campaign", + LinearEffect("additive"), + exact("campaign"), + ), + ( + "investment", + LinearEffect("additive"), + exact("investment"), + ), + ], + default_effect=LinearEffect("additive"), + inference_engine=MAPInferenceEngine( + num_steps=50_000, + ), +) + +model.fit(y=y_train, X=X_train) + +y_pred = model.predict(X=X_test, fh=y_test.index) + +# %% + +plot_series(y_train, y_test, y_pred, labels=["Train", "Test", "Pred"], + title="Target series") +plt.show() +# %% +from typing import Any, Dict, List + +import jax.numpy as jnp +import pandas as pd + +from prophetverse.effects.base import BaseEffect + + +class WrapEffectAndScaleByAnother(BaseEffect): + """Wrap an effect and scale it by another effect. + + Parameters + ---------- + effect : BaseEffect + The effect to wrap. + + """ + + _tags = {"skip_predict_if_no_match": False, "supports_multivariate": False} + + def __init__( + self, + effect: BaseEffect, + base_effect_name: str, + ): + + self.effect = effect + self.base_effect_name = base_effect_name + + super().__init__() + + self.clone_tags(effect) + + def _fit(self, y: pd.DataFrame, X: pd.DataFrame, scale: float = 1): + """Initialize the effect. + + This method is called during `fit()` of the forecasting model. + It receives the Exogenous variables DataFrame and should be used to initialize + any necessary parameters or data structures, such as detecting the columns that + match the regex pattern. + + This method MUST set _input_feature_columns_names to a list of column names + + Parameters + ---------- + y : pd.DataFrame + The timeseries dataframe + + X : pd.DataFrame + The DataFrame to initialize the effect. + + scale : float, optional + The scale of the timeseries. For multivariate timeseries, this is + a dataframe. For univariate, it is a simple float. + + Returns + ------- + None + """ + self.effect.fit(X=X, y=y, scale=scale) + + def _transform(self, X: pd.DataFrame, fh: pd.Index) -> Dict[str, Any]: + """Prepare input data to be passed to numpyro model. + + Returns a dictionary with the data for the lift and for the inner effect. + + Parameters + ---------- + X : pd.DataFrame + The input DataFrame containing the exogenous variables for the training + time indexes, if passed during fit, or for the forecasting time indexes, if + passed during predict. + + fh : pd.Index + The forecasting horizon as a pandas Index. + + Returns + ------- + Dict[str, Any] + Dictionary with data for the lift and for the inner effect + """ + return self.effect.transform(X=X, fh=fh) + + def _predict( + self, data: Dict, predicted_effects: Dict[str, jnp.ndarray] + ) -> jnp.ndarray: + """Apply and return the effect values. + + Parameters + ---------- + data : Any + Data obtained from the transformed method. + + predicted_effects : Dict[str, jnp.ndarray], optional + A dictionary containing the predicted effects, by default None. + + Returns + ------- + jnp.ndarray + An array with shape (T,1) for univariate timeseries. + """ + out = self.effect.predict( + data=data, predicted_effects=predicted_effects + ) + + base_effect = predicted_effects[self.base_effect_name] + return base_effect * out + + @property + def input_feature_column_names(self) -> List[str]: + """Return the input feature columns names.""" + return self.effect.input_feature_column_names + + +# %% + + +import numpyro.distributions as dist + +from prophetverse.engine.optimizer import AdamOptimizer + +model = Prophetverse( + trend=PiecewiseLinearTrend( + changepoint_interval=500, + changepoint_prior_scale=0.00001, + changepoint_range=-500, + ), + exogenous_effects=[ + ( + "seasonality", + LinearFourierSeasonality( + freq="D", + sp_list=[365.25], + fourier_terms_list=[5], + prior_scale=0.1, + effect_mode="multiplicative", + ), + no_input_columns, + ), + ( + "campaign", + LinearEffect("additive"), + exact("campaign"), + ), + ( + "investment", + LinearEffect("additive"), + exact("investment"), + ), + ( + "investment_campaign", + WrapEffectAndScaleByAnother( + effect=LinearEffect("additive", prior=dist.HalfNormal(10)), + base_effect_name="campaign", + ), + exact("investment"), + ), + ], + inference_engine=MAPInferenceEngine( + num_steps=50_000, + ), +) + +model.fit(y=y_train, X=X_train) +y_pred_composite = model.predict(X=X_test, fh=y_test.index) + +# %% +plot_series( + y_train, y_test, y_pred, labels=["Train", "Test", "Pred"], title="Target series" +) + +# %% + +plot_series( + y_test, y_pred, y_pred_composite, labels=["Test", "Pred", "Pred composite"], title="Target series") diff --git a/docs/howto/composite-exogenous-effects/index.ipy b/docs/howto/composite-exogenous-effects/index.ipy index fa35d56..b4326a7 100644 --- a/docs/howto/composite-exogenous-effects/index.ipy +++ b/docs/howto/composite-exogenous-effects/index.ipy @@ -1,20 +1,159 @@ # %% [markdown] -# +# # # Composition of effects -# -# In previous examples, we saw how to create a custom effect, particularly de `LogEffect`, -# which scales the impact of a variable by a logarithm. # -# What if we want to use the effect of another variable as a multiplicative factor of the -# current effect? One classic example would be using holidays or seasonality to scale -# the effect of another input, that might be proportional to these effects. Probably, -# marketing investments are a good example of this. We will implement such a composite +# In previous examples, we saw how to create a simple custom effect, +# which applies a simple transformation to the input data. However, the effect's +# interface allows us to apply more complex transformations, such as using the output +# of previous components as input for the current component, or creating a composite +# effect that wraps an effect and applies some sort of transformation. This example +# will cover these topics. +# +# ## Creating a custom effect +# +# The idea here is to create an effect that uses another predicted component +# to scale the impact of an exogenous variable. +# +# One classic use-case for this would be using seasonality to scale +# the effect of investment, that might be proportional to it. +# Marketing investments are a good example of this. We will implement such a composite # effect in this section. # +# ### Example dataset +# +# The dataset we use is synthetic, and the relation between the exogenous variable +# and the target is known. However, let's pretend we don't know this relation, and +# analize the data to find some insights that motivate the creation of a custom +# effect. The dataset has a target variable, which is a time series, and an exogenous +# variable, which is the investment made for each date. + + +# %% + +import numpyro.distributions as dist +from matplotlib import pyplot as plt +from sktime.split import temporal_train_test_split +from sktime.utils.plotting import plot_series + +from prophetverse.datasets.synthetic import load_composite_effect_example + +y, X = load_composite_effect_example() + +y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, test_size=365) + +display(y_train.head()) +display(X_train.head()) + +plot_series(y_train, y_test, labels=["Train", "Test"], title="Target series") + +plot_series( + X["investment"], + labels=["investment"], + title="Features", +) +plt.show() + +# %% [markdown] +# +# The timeseries has a yearly seasonality, and it seems that some oscillations are +# proportional to +# the investment. Below, we model the timeseries with a simple linear effect between +# the investment and the target, and a yearly seasonality based on fourier terms. +# Then, we will analize the residuals to see if there is any pattern that we can +# capture with a custom effect. + + +# %% +from prophetverse.effects import LinearEffect +from prophetverse.effects.fourier import LinearFourierSeasonality +from prophetverse.effects.trend import PiecewiseLinearTrend +from prophetverse.engine import MAPInferenceEngine +from prophetverse.engine.optimizer import AdamOptimizer +from prophetverse.sktime import Prophetverse +from prophetverse.utils.regex import exact, no_input_columns + +model = Prophetverse( + trend=PiecewiseLinearTrend( + changepoint_interval=500, + changepoint_prior_scale=0.00001, + changepoint_range=-500, + ), + exogenous_effects=[ + ( + "seasonality", + LinearFourierSeasonality( + freq="D", + sp_list=[365.25], + fourier_terms_list=[5], + prior_scale=1, + effect_mode="multiplicative", + ), + no_input_columns, + ), + ( + "investment", + LinearEffect("multiplicative", prior=dist.Normal(0, 1)), + exact("investment"), + ), + ], + inference_engine=MAPInferenceEngine( + optimizer=AdamOptimizer(1e-4), + num_steps=100_000, + ), +) + +model.fit(y=y_train, X=X_train) +model + +# %% [markdown] +# We plot the predictions on training set to see if the model performs well. +# %% +y_pred = model.predict(X=X_train, fh=y_train.index) +plot_series(y_train, y_pred, labels=["Train", "Pred"], + title="Target series") +plt.show() + +# %% [markdown] +# We can see that some peaks are not captured by the model. +# Our hypothesis to explain this phenomenon +# is that the investment has more impact on the target when it is +# done during the positive seasonality periods. To test this, we plot the residuals +# of the model against the investment, and color the points based on the seasonality +# component. We can see that slopes are different for positive and negative +# seasonality, which indicates that our hypothesis is possibly correct. + # %% +components = model.predict_components(X=X_train, fh=y_train.index) + +residual = y_train["target"] - components["mean"] +fig, ax = plt.subplots() +ax.scatter( + X_train["investment"], + residual, + c=components["seasonality"] < 0, + cmap="Accent", + alpha=0.9 +) +# Create legend manually +colors = plt.cm.get_cmap("Accent").colors +ax.scatter([], [], color=colors[0], label="Positive seasonality") +ax.scatter([], [], color=colors[1], label="Negative seasonality") +ax.legend() +ax.set(xlabel="Investment", ylabel="Residual", title="Residuals vs Investment") +fig.show() + +# %% [markdown] +# ## Creating the composite effect +# To model this behaviour with Prophetverse, we will create a custom effect, that +# scales a new effect by the output of a previous component. +# The `_fit` and `_transform` methods call the inner effect's methods, and the +# predict method multiplies the inner effect's predictions by the seasonality, which +# is passed as `base_effect_name`. +# %% from typing import Any, Dict, List + import jax.numpy as jnp import pandas as pd @@ -43,11 +182,8 @@ class WrapEffectAndScaleByAnother(BaseEffect): self.base_effect_name = base_effect_name super().__init__() - - self.set_tags({"supports_multivariate": self.effect.supports_multivariate, - "skip_predict_if_no_match": self.effect.skip_predict_if_no_match - }) - + + self.clone_tags(effect) def _fit(self, y: pd.DataFrame, X: pd.DataFrame, scale: float = 1): """Initialize the effect. @@ -76,7 +212,7 @@ class WrapEffectAndScaleByAnother(BaseEffect): None """ self.effect.fit(X=X, y=y, scale=scale) - + def _transform(self, X: pd.DataFrame, fh: pd.Index) -> Dict[str, Any]: """Prepare input data to be passed to numpyro model. @@ -99,7 +235,6 @@ class WrapEffectAndScaleByAnother(BaseEffect): """ return self.effect.transform(X=X, fh=fh) - def _predict( self, data: Dict, predicted_effects: Dict[str, jnp.ndarray] ) -> jnp.ndarray: @@ -123,7 +258,7 @@ class WrapEffectAndScaleByAnother(BaseEffect): ) base_effect = predicted_effects[self.base_effect_name] - return base_effect * out + return base_effect * out @property def input_feature_column_names(self) -> List[str]: @@ -131,4 +266,70 @@ class WrapEffectAndScaleByAnother(BaseEffect): return self.effect.input_feature_column_names +# %% [markdown] +# ### Instantiating the model with the composite effect +# To create the model, we use the model instance we have, and the rshift operator to +# append the composite effect to the model. +# %% {"tags" : ["remove_output"]} +import numpyro.distributions as dist +from prophetverse.engine.optimizer import AdamOptimizer + +composite_effect_tuple = ( + + "investment_seasonality", # The effect ID, can be what you want + # Now the effect object + WrapEffectAndScaleByAnother( + # The effect to wrap + effect=LinearEffect("multiplicative", prior=dist.HalfNormal(1)), + # The previous effect to use as scale. It is important + # That this base_effect is passed before this effect in + # exogenous_effect parameter! + base_effect_name="seasonality", + ), + # The columns to pass to the effect. In this case, we only pass + # the investment column + exact("investment"), +) + +# We use the rshift operator to append an effect to the model +model_composite = model >> composite_effect_tuple + +model_composite.fit(y=y_train, X=X_train) +y_pred_composite = model_composite.predict(X=X_train, fh=y_train.index) + + +# %% [markdown] +# We can see below how these oscilations are captured by the model correctly +# when adding this joint effect. + +plot_series( + y_train, y_pred_composite, labels=["Train", "Pred"], title="Target series" +) + +# %% [markdown] +# ### Evaluating the model on test set +# We compare to the previous model to see if the new effect improved the predictions on +# test set: +# %% + +y_pred_composite = model_composite.predict(X=X_test, fh=y_test.index) +y_pred = model.predict(X=X_test, fh=y_test.index) + +plot_series( + y_test, y_pred, y_pred_composite, + labels=["Test", "Pred", "Pred composite"], + title="Target series") + +plt.show() + + +# %% [markdown] +# ### Extracting the components +# The components can be extracted as usual, with the `predict_components` method. # %% +components = model_composite.predict_components(fh=y_test.index, X=X_test) + +fig, ax = plt.subplots(figsize=(10,5)) +components.plot.line(ax=ax) +ax.set_title("Predicted Components") +fig.show() \ No newline at end of file diff --git a/docs/howto/composite-exogenous-effects/index.md b/docs/howto/composite-exogenous-effects/index.md index c760b99..2a12254 100644 --- a/docs/howto/composite-exogenous-effects/index.md +++ b/docs/howto/composite-exogenous-effects/index.md @@ -1,225 +1,681 @@ -# Customizing exogenous effects +# Composition of effects -The exogenous effect API allows you to create custom exogenous components for the Prophetverse model. This is useful when we want to model specific patterns or relationships between the exogenous variables and the target variable. For example, enforcing a positive effect of a variable on the mean, or modeling a non-linear relationship. +In previous examples, we saw how to create a simple custom effect, +which applies a simple transformation to the input data. However, the effect's +interface allows us to apply more complex transformations, such as using the output +of previous components as input for the current component, or creating a composite +effect that wraps an effect and applies some sort of transformation. This example +will cover these topics. -If you have read the [theory section](https://prophetverse.com/the-theory/), by effect we mean each function $f_i$. You can implement those custom -functions by subclassing the `BaseEffect` class, and then use them in the `Prophetverse` model. Some effects are already implemented in the library, and you can find them in the `prophetverse.effects` module. +## Creating a custom effect -When creating a model instance, effects can be specified through `exogenous_effects` parameter of the `Prophetverse` model. This parameter is a list of tuples of three values: the name, the effect object, and a regex to filter -columns related to that effect. The regex is what defines $x_i$ in the previous section. The `prophetverse.utils.regex` module provides some useful functions to create regex patterns for common use cases, include `starts_with`, `ends_with`, `contains`, and `no_input_columns`. +The idea here is to create an effect that uses another predicted component +to scale the impact of an exogenous variable. + +One classic use-case for this would be using seasonality to scale +the effect of investment, that might be proportional to it. +Marketing investments are a good example of this. We will implement such a composite +effect in this section. + +### Example dataset + +The dataset we use is synthetic, and the relation between the exogenous variable +and the target is known. However, let's pretend we don't know this relation, and +analize the data to find some insights that motivate the creation of a custom +effect. The dataset has a target variable, which is a time series, and an exogenous +variable, which is the investment made for each date. -For example: ```python -from prophetverse.sktime import Prophetverse -from prophetverse.effects import LinearFourierSeasonality, HillEffect -from prophetverse.utils.regex import starts_with, no_input_columns +import numpyro.distributions as dist +from matplotlib import pyplot as plt +from sktime.split import temporal_train_test_split +from sktime.utils.plotting import plot_series -exogenous_effects = [ - - ( - "seasonality", # The name of the effect - LinearFourierSeasonality( # The object - freq="D", - sp_list=[7, 365.25], - fourier_terms_list=[3, 10], - prior_scale=0.1, - effect_mode="multiplicative", - ), - no_input_columns, # The regex - ), - ( - "exog", - HillEffect(effect_mode="additive"), - starts_with("exog") - ) -] +from prophetverse.datasets.synthetic import load_composite_effect_example + +y, X = load_composite_effect_example() + +y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, test_size=365) -model = Prophetverse(exogenous_effects=exogenous_effects) +display(y_train.head()) +display(X_train.head()) + +plot_series(y_train, y_test, labels=["Train", "Test"], title="Target series") + +plot_series( + X["investment"], + labels=["investment"], + title="Features", +) +plt.show() ```
Output: [1]
-The effects can be any object that implements the `BaseEffect` interface, and you can -create your own effects by subclassing `BaseEffect` and implementing `_fit`, `_transform` and -`_predict` methods. - -* `_fit` (optional): This method is called during fit() of the forecasting and should be used to initialize any necessary parameters or data structures. -It receives the exogenous variables dataframe X, the series `y`, and the scale factor `scale` that was used to scale the timeseries. ++ | target | +
---|---|
time | ++ |
2010-01-01 | +29.375431 | +
2010-01-02 | +30.268786 | +
2010-01-03 | +29.128912 | +
2010-01-04 | +31.014165 | +
2010-01-05 | +31.890928 | +
+ | investment | +
---|---|
time | ++ |
2010-01-01 | +0.198274 | +
2010-01-02 | +0.198274 | +
2010-01-03 | +0.198274 | +
2010-01-04 | +0.198274 | +
2010-01-05 | +0.207695 | +
Output: [2]
+ + + + +Prophetverse(exogenous_effects=[('seasonality', + LinearFourierSeasonality(effect_mode='multiplicative', + fourier_terms_list=[5], + freq='D', + prior_scale=1, + sp_list=[365.25]), + '^$'), + ('investment', + LinearEffect(prior=<numpyro.distributions.continuous.Normal object at 0x166605810>), + '^investment$')], + inference_engine=MAPInferenceEngine(num_steps=100000, + optimizer=AdamOptimizer(step_size=0.0001)), + trend=PiecewiseLinearTrend(changepoint_interval=500, + changepoint_prior_scale=1e-05, + changepoint_range=-500))Please rerun this cell to show the HTML repr or trust the notebook.
Prophetverse(exogenous_effects=[('seasonality', + LinearFourierSeasonality(effect_mode='multiplicative', + fourier_terms_list=[5], + freq='D', + prior_scale=1, + sp_list=[365.25]), + '^$'), + ('investment', + LinearEffect(prior=<numpyro.distributions.continuous.Normal object at 0x166605810>), + '^investment$')], + inference_engine=MAPInferenceEngine(num_steps=100000, + optimizer=AdamOptimizer(step_size=0.0001)), + trend=PiecewiseLinearTrend(changepoint_interval=500, + changepoint_prior_scale=1e-05, + changepoint_range=-500))
PiecewiseLinearTrend(changepoint_interval=500, changepoint_prior_scale=1e-05, + changepoint_range=-500)
PiecewiseLinearTrend(changepoint_interval=500, changepoint_prior_scale=1e-05, + changepoint_range=-500)
Output: [3]
-class LogEffect(BaseAdditiveOrMultiplicativeEffect): - """Represents a log effect as effect = scale * log(rate * data + 1). - Parameters - ---------- - scale_prior : Optional[Distribution], optional - The prior distribution for the scale parameter., by default Gamma - rate_prior : Optional[Distribution], optional - The prior distribution for the rate parameter., by default Gamma - effect_mode : effects_application, optional - Either "additive" or "multiplicative", by default "multiplicative" - """ + +![png](index_files/output_5_0.png) + - def __init__( - self, - effect_mode: EFFECT_APPLICATION_TYPE = "multiplicative", - scale_prior: Optional[Distribution] = None, - rate_prior: Optional[Distribution] = None, - ): - self.scale_prior = scale_prior or dist.Gamma(1, 1) - self.rate_prior = rate_prior or dist.Gamma(1, 1) - super().__init__(effect_mode=effect_mode) - def _predict( # type: ignore[override] - self, - data: jnp.ndarray, - predicted_effects: Optional[Dict[str, jnp.ndarray]] = None, - ) -> jnp.ndarray: - """Apply and return the effect values. +We can see that some peaks are not captured by the model. +Our hypothesis to explain this phenomenon +is that the investment has more impact on the target when it is +done during the positive seasonality periods. To test this, we plot the residuals +of the model against the investment, and color the points based on the seasonality +component. We can see that slopes are different for positive and negative +seasonality, which indicates that our hypothesis is possibly correct. - Parameters - ---------- - data : Any - Data obtained from the transformed method. - predicted_effects : Dict[str, jnp.ndarray], optional - A dictionary containing the predicted effects, by default None. - Returns - ------- - jnp.ndarray - An array with shape (T,1) for univariate timeseries, or (N, T, 1) for - multivariate timeseries, where T is the number of timepoints and N is the - number of series. - """ - scale = numpyro.sample("log_scale", self.scale_prior) - rate = numpyro.sample("log_rate", self.rate_prior) - effect = scale * jnp.log(jnp.clip(rate * data + 1, 1e-8, None)) - return effect +```python +components = model.predict_components(X=X_train, fh=y_train.index) +residual = y_train["target"] - components["mean"] -``` +fig, ax = plt.subplots() +ax.scatter( + X_train["investment"], + residual, + c=components["seasonality"] < 0, + cmap="Accent", + alpha=0.9 +) +# Create legend manually +colors = plt.cm.get_cmap("Accent").colors +ax.scatter([], [], color=colors[0], label="Positive seasonality") +ax.scatter([], [], color=colors[1], label="Negative seasonality") +ax.legend() +ax.set(xlabel="Investment", ylabel="Residual", title="Residuals vs Investment") +fig.show() +``` +Output: [4]
-The `_fit` and `_transform` methods are not implemented, and the default behaviour is -preserved (the columns of the dataframe that match the regex pattern are selected, and the result is converted to a `jnp.ndarray` with key "data"). + +![png](index_files/output_7_1.png) + -### Composition of effects -We can go further and create a custom effect that __adds a likelihood term to the model__. -The `LiftExperimentLikelihood` tackles the use case of having a lift experiment, and -wanting to incorporate it to guide the exogenous effect. The likelihood term is added -in the `_predict` method, and the observed lift preprocessed in `_transform` method. -The attribute `input_feature_column_names` is also overriden to return the input feature -columns of the inner effect. +## Creating the composite effect +To model this behaviour with Prophetverse, we will create a custom effect, that +scales a new effect by the output of a previous component. +The `_fit` and `_transform` methods call the inner effect's methods, and the +predict method multiplies the inner effect's predictions by the seasonality, which +is passed as `base_effect_name`. ```python -"""Composition of effects (Effects that wrap other effects).""" - from typing import Any, Dict, List import jax.numpy as jnp -import numpyro -import numpyro.distributions as dist import pandas as pd -from prophetverse.utils.frame_to_array import series_to_tensor_or_array - from prophetverse.effects.base import BaseEffect -__all__ = ["LiftExperimentLikelihood"] - - -class LiftExperimentLikelihood(BaseEffect): - """Wrap an effect and applies a normal likelihood to its output. - This class uses an input as a reference for the effect, and applies a normal - likelihood to the output of the effect. +class WrapEffectAndScaleByAnother(BaseEffect): + """Wrap an effect and scale it by another effect. Parameters ---------- effect : BaseEffect The effect to wrap. - lift_test_results : pd.DataFrame - A dataframe with the lift test results. Should be in sktime format, and must - have the same index as the input data. - prior_scale : float - The scale of the prior distribution for the likelihood. + """ _tags = {"skip_predict_if_no_match": False, "supports_multivariate": False} @@ -227,19 +683,17 @@ class LiftExperimentLikelihood(BaseEffect): def __init__( self, effect: BaseEffect, - lift_test_results: pd.DataFrame, - prior_scale: float, + base_effect_name: str, ): self.effect = effect - self.lift_test_results = lift_test_results - self.prior_scale = prior_scale - - assert self.prior_scale > 0, "prior_scale must be greater than 0" + self.base_effect_name = base_effect_name super().__init__() - def fit(self, y: pd.DataFrame, X: pd.DataFrame, scale: float = 1): + self.clone_tags(effect) + + def _fit(self, y: pd.DataFrame, X: pd.DataFrame, scale: float = 1): """Initialize the effect. This method is called during `fit()` of the forecasting model. @@ -266,8 +720,6 @@ class LiftExperimentLikelihood(BaseEffect): None """ self.effect.fit(X=X, y=y, scale=scale) - self.timeseries_scale = scale - super().fit(X=X, y=y, scale=scale) def _transform(self, X: pd.DataFrame, fh: pd.Index) -> Dict[str, Any]: """Prepare input data to be passed to numpyro model. @@ -289,15 +741,7 @@ class LiftExperimentLikelihood(BaseEffect): Dict[str, Any] Dictionary with data for the lift and for the inner effect """ - data_dict = {} - data_dict["inner_effect_data"] = self.effect._transform(X, fh=fh) - - X_lift = self.lift_test_results.reindex(fh, fill_value=jnp.nan) - lift_array = series_to_tensor_or_array(X_lift) - data_dict["observed_lift"] = lift_array / self.timeseries_scale - data_dict["obs_mask"] = ~jnp.isnan(data_dict["observed_lift"]) - - return data_dict + return self.effect.transform(X=X, fh=fh) def _predict( self, data: Dict, predicted_effects: Dict[str, jnp.ndarray] @@ -317,27 +761,114 @@ class LiftExperimentLikelihood(BaseEffect): jnp.ndarray An array with shape (T,1) for univariate timeseries. """ - observed_lift = data["observed_lift"] - obs_mask = data["obs_mask"] - - x = self.effect.predict( - data=data["inner_effect_data"], predicted_effects=predicted_effects - ) - - numpyro.sample( - "lift_experiment", - dist.Normal(x, self.prior_scale), - obs=observed_lift, - obs_mask=obs_mask, + out = self.effect.predict( + data=data, predicted_effects=predicted_effects ) - return x + base_effect = predicted_effects[self.base_effect_name] + return base_effect * out @property def input_feature_column_names(self) -> List[str]: """Return the input feature columns names.""" - return self.effect._input_feature_column_names + return self.effect.input_feature_column_names + + + +``` + +### Instantiating the model with the composite effect +To create the model, we use the model instance we have, and the rshift operator to +append the composite effect to the model. + + + +```python +import numpyro.distributions as dist +from prophetverse.engine.optimizer import AdamOptimizer + +composite_effect_tuple = ( + + "investment_seasonality", # The effect ID, can be what you want + # Now the effect object + WrapEffectAndScaleByAnother( + # The effect to wrap + effect=LinearEffect("multiplicative", prior=dist.HalfNormal(1)), + # The previous effect to use as scale. It is important + # That this base_effect is passed before this effect in + # exogenous_effect parameter! + base_effect_name="seasonality", + ), + # The columns to pass to the effect. In this case, we only pass + # the investment column + exact("investment"), +) + +# We use the rshift operator to append an effect to the model +model_composite = model >> composite_effect_tuple + +model_composite.fit(y=y_train, X=X_train) +y_pred_composite = model_composite.predict(X=X_train, fh=y_train.index) + + + +``` + +We can see below how these oscilations are captured by the model correctly +when adding this joint effect. + +plot_series( + y_train, y_pred_composite, labels=["Train", "Pred"], title="Target series" +) + + + +### Evaluating the model on test set +We compare to the previous model to see if the new effect improved the predictions on +test set: + + + +```python + +y_pred_composite = model_composite.predict(X=X_test, fh=y_test.index) +y_pred = model.predict(X=X_test, fh=y_test.index) + +plot_series( + y_test, y_pred, y_pred_composite, + labels=["Test", "Pred", "Pred composite"], + title="Target series") + +plt.show() ``` +Output: [7]
+ + + +![png](index_files/output_14_0.png) + + + +### Extracting the components +The components can be extracted as usual, with the `predict_components` method. + + + +```python +components = model_composite.predict_components(fh=y_test.index, X=X_test) + +fig, ax = plt.subplots(figsize=(10,5)) +components.plot.line(ax=ax) +ax.set_title("Predicted Components") +fig.show() +``` +Output: [8]
+ + + +![png](index_files/output_16_1.png) + + diff --git a/docs/howto/composite-exogenous-effects/index_files/output_11_100.png b/docs/howto/composite-exogenous-effects/index_files/output_11_100.png new file mode 100644 index 0000000..697f82b Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_11_100.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_11_104.png b/docs/howto/composite-exogenous-effects/index_files/output_11_104.png new file mode 100644 index 0000000..697f82b Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_11_104.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_13_1.png b/docs/howto/composite-exogenous-effects/index_files/output_13_1.png new file mode 100644 index 0000000..95b28ba Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_13_1.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_14_0.png b/docs/howto/composite-exogenous-effects/index_files/output_14_0.png new file mode 100644 index 0000000..95b28ba Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_14_0.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_14_1.png b/docs/howto/composite-exogenous-effects/index_files/output_14_1.png new file mode 100644 index 0000000..95b28ba Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_14_1.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_15_1.png b/docs/howto/composite-exogenous-effects/index_files/output_15_1.png new file mode 100644 index 0000000..505e25f Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_15_1.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_16_1.png b/docs/howto/composite-exogenous-effects/index_files/output_16_1.png new file mode 100644 index 0000000..505e25f Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_16_1.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_1_3.png b/docs/howto/composite-exogenous-effects/index_files/output_1_3.png new file mode 100644 index 0000000..f101a66 Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_1_3.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_1_4.png b/docs/howto/composite-exogenous-effects/index_files/output_1_4.png new file mode 100644 index 0000000..091064b Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_1_4.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_3_1.png b/docs/howto/composite-exogenous-effects/index_files/output_3_1.png new file mode 100644 index 0000000..309c6d7 Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_3_1.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_5_0.png b/docs/howto/composite-exogenous-effects/index_files/output_5_0.png new file mode 100644 index 0000000..337c9f7 Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_5_0.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_7_0.png b/docs/howto/composite-exogenous-effects/index_files/output_7_0.png new file mode 100644 index 0000000..56cc8d4 Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_7_0.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_7_1.png b/docs/howto/composite-exogenous-effects/index_files/output_7_1.png new file mode 100644 index 0000000..c192a1a Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_7_1.png differ diff --git a/docs/howto/composite-exogenous-effects/index_files/output_9_1.png b/docs/howto/composite-exogenous-effects/index_files/output_9_1.png new file mode 100644 index 0000000..c192a1a Binary files /dev/null and b/docs/howto/composite-exogenous-effects/index_files/output_9_1.png differ diff --git a/docs/howto/custom-effects/index.ipy b/docs/howto/custom-effects/index.ipy index f717964..cd05232 100644 --- a/docs/howto/custom-effects/index.ipy +++ b/docs/howto/custom-effects/index.ipy @@ -1,27 +1,43 @@ # %% [markdown] # # Customizing exogenous effects +# This section explains how to create custom exogenous effects in Prophetverse. +# We will start by explaining what is an exogenous effect, and then we will show +# a practical example where we create an effect that uses a squared function to +# model the relationship between the exogenous variable and the target variable. # -# The exogenous effect API allows you to create custom exogenous components for the Prophetverse model. This is useful when we want to model specific patterns or relationships between the exogenous variables and the target variable. For example, enforcing a positive effect of a variable on the mean, or modeling a non-linear relationship. +# ## The effects API # -# If you have read the [theory section](https://prophetverse.com/the-theory/), +# The exogenous effect API allows you to create custom exogenous components for the +# Prophetverse model. This is useful when we want to model specific patterns or +# relationships between the exogenous variables and the target variable. For example, +# enforcing a positive effect of a variable on the mean, or modeling a non-linear +# relationship. +# +# If you have read the [theory section](https://prophetverse.com/the-theory/), # by effect we mean each function $f_i$. You can implement those custom # functions by subclassing the `BaseEffect` class, and then use them in the -# `Prophetverse` model. Some effects are already implemented in the library, +# `Prophetverse` model. Some effects are already implemented in the library, # and you can find them in the `prophetverse.effects` module. # -# When creating a model instance, effects can be specified through `exogenous_effects` -# parameter of the `Prophetverse` model. This parameter is a list of tuples of three +# When creating a model instance, effects can be specified through `exogenous_effects` +# parameter of the `Prophetverse` model. This parameter is a list of tuples of three # values: the name, the effect object, and a regex to filter -# columns related to that effect. The regex is what defines $x_i$ in the previous section. The `prophetverse.utils.regex` module provides some useful functions to create regex patterns for common use cases, include `starts_with`, `ends_with`, `contains`, and `no_input_columns`. +# columns related to that effect. The regex is what defines $x_i$ in the previous section. +# The `prophetverse.utils.regex` module provides some useful functions to create +# regex patterns for common use cases, include `starts_with`, `ends_with`, `contains`, +# and `no_input_columns`. # -# For example: +# Consider the example below, where we create a model with a linear seasonality effect +# and a custom effect that uses the feature `channel1_investment` as input and transforms +# it with a [hill curve](https://en.wikipedia.org/wiki/Hill_equation_(biochemistry)), +# which is a common curve for capturing diminishing returns. -# %% -from prophetverse.sktime import Prophetverse -from prophetverse.effects import LinearFourierSeasonality, HillEffect -from prophetverse.utils.regex import starts_with, no_input_columns, exact +# %% {"tags" : ["remove_output"]} +from prophetverse.effects import HillEffect, LinearFourierSeasonality +from prophetverse.sktime import Prophetverse +from prophetverse.utils.regex import exact, no_input_columns, starts_with exogenous_effects = [ ( @@ -48,9 +64,15 @@ model = Prophetverse(exogenous_effects=exogenous_effects) # %% [markdown] # +# Creating such models in Prophetverse is like creating buildings from lego blocks. +# You define how you model should work, and then you can leverage all the interface to +# carry out the forecasting and inference tasks. +# +# ## Creating a custom effect +# # The effects can be any object that implements the `BaseEffect` interface, and you can -# create your own effects by subclassing `BaseEffect` and implementing `_fit`, `_transform` and -# `_predict` methods. +# create your own effects by subclassing `BaseEffect` and implementing `_fit`, +# `_transform` and `_predict` methods. # # * `_fit` (optional): This method is called during fit() of the forecasting and should # be used to initialize any necessary parameters or data structures. @@ -58,33 +80,33 @@ model = Prophetverse(exogenous_effects=exogenous_effects) # `scale` that was used to scale the timeseries. # # * `_transform` (optional): This method receives the exogenous variables dataframe, -# and should return an object containing the data needed for the effect. This object -# will be passed to the predict method as `data`. By default the columns of the +# and should return an object containing the data needed for the effect. This object +# will be passed to the predict method as `data`. By default the columns of the # dataframe that match the regex pattern are selected, and the result is converted to # a `jnp.ndarray`. # -# * `_predict` (mandatory): This method receives the output of `_transform` and all +# * `_predict` (mandatory): This method receives the output of `_transform` and all # previously computed effects. It should return the effect values as a `jnp.ndarray` # # In many cases, the `_fit` and `_transform` steps are not needed to be implemented, # since the default behaviour may be the desired one. In the example below, we implement -# a really simple `LogEffect` class, which leverages the default behaviour of the +# a really simple `SquaredEffect` class, which leverages the default behaviour of the # `BaseEffect` class. # -# ## Example -# -# ### Log Effect -# -# The `BaseAdditiveOrMultiplicativeEffect` provides an init argument `effect_mode` that -# allows you to specify if the effect is additive or multiplicative. Let's take as an -# example the `LogEffect`: -# +# ### Squared Effect class # +# The `SquaredEffect` class receives two +# hyperparameters: the prior distribution for the scale parameter, and the prior +# distribution for the offset parameter. If no prior is provided, it uses a +# `Gamma(1, 1)` for the scale and a `Normal(0, 1)` for the offset. Note that here +# we already see an interesting feature of Prophetverse: by adopting a Gamma Prior, +# we force the effect to be positive. Any other prior with positive support would +# work as well. If no such constraint is needed, we can use a `Normal(0, 1)` prior or +# any other distribution with support in the real line. # %% -# prophetverse/effects/log.py from typing import Dict, Optional @@ -93,30 +115,27 @@ import numpyro from numpyro import distributions as dist from numpyro.distributions import Distribution -from prophetverse.effects.base import ( - BaseEffect, -) +from prophetverse.effects.base import BaseEffect + -class LogEffect(BaseEffect): - """Represents a log effect as effect = scale * log(rate * data + 1). +class SquaredEffect(BaseEffect): + """Represents a squared effect as effect = scale * (data - offset)^2. Parameters ---------- scale_prior : Optional[Distribution], optional The prior distribution for the scale parameter., by default Gamma - rate_prior : Optional[Distribution], optional - The prior distribution for the rate parameter., by default Gamma - effect_mode : effects_application, optional - Either "additive" or "multiplicative", by default "multiplicative" + offset_prior : Optional[Distribution], optional + The prior distribution for the rate parameter., by default Normal(0, 1) """ def __init__( self, scale_prior: Optional[Distribution] = None, - rate_prior: Optional[Distribution] = None, + offset_prior : Optional[Distribution] = None, ): self.scale_prior = scale_prior or dist.Gamma(1, 1) - self.rate_prior = rate_prior or dist.Gamma(1, 1) + self.offset_prior = offset_prior or dist.Normal(0, 1) super().__init__() def _predict( # type: ignore[override] @@ -142,8 +161,8 @@ class LogEffect(BaseEffect): number of series. """ scale = numpyro.sample("log_scale", self.scale_prior) - rate = numpyro.sample("log_rate", self.rate_prior) - effect = scale * jnp.log(jnp.clip(rate * data + 1, 1e-8, None)) + offset = numpyro.sample("offset", self.offset_prior) + effect = scale * (data - offset) ** 2 return effect @@ -151,7 +170,115 @@ class LogEffect(BaseEffect): # # # The `_fit` and `_transform` methods are not implemented, and the default behaviour is -# preserved (the columns of the dataframe that match the regex pattern are selected, and the result is converted to a `jnp.ndarray` with key "data"). +# preserved (the columns of the dataframe that match the regex pattern are selected, +# and the result is converted to a `jnp.ndarray` with key "data"). +# +# ## Practical example +# +# The example below is, of course, a toy example, but I hope it illustrates the +# process of creating a custom effect. +# We load a synthetic dataset with a squared relationship between the exogenous +# variable and the target variable, and then we fit a model with the `SquaredEffect`. +# The true relationship is 2 * (x - 5) ** 2, and we will see if the model is able to +# recover it. +# +# ### Loading the series + +# %% +import matplotlib.pyplot as plt +from sktime.split import temporal_train_test_split +from sktime.utils.plotting import plot_series + +from prophetverse.datasets import load_synthetic_squared_exogenous + +y, X = load_synthetic_squared_exogenous() +y_train, y_test, X_train, X_test = temporal_train_test_split( + y, + X, + test_size=0.2, +) + +display(y.head()) +display(X.head()) + + +fig, ax = plot_series( + y_train, y_test, labels=["Train", "Test"], title="Target variable" +) +fig.show() +fig, ax = plot_series( + X_train, X_test, labels=["Train", "Test"], title="Exogenous variable" +) +fig.show() + +# %% [markdown] + +# #### Creating the model + +# %% + +from prophetverse.effects.trend import PiecewiseLinearTrend +from prophetverse.engine import MAPInferenceEngine +from prophetverse.engine.optimizer import AdamOptimizer, BFGSOptimizer +from prophetverse.sktime import Prophetverse +from prophetverse.utils.regex import exact + +model = ( + Prophetverse() + >> PiecewiseLinearTrend( + changepoint_interval=100, + changepoint_prior_scale=.1, + changepoint_range=-100, + + ) + >> MAPInferenceEngine(num_steps=50_000) +) >> ( + "exog_effect", + SquaredEffect( + scale_prior=dist.Normal(0, 10), + offset_prior=dist.Normal(0, 10), + ), + exact("exogenous"), +) +model +# %% [markdown] +# To fit and plot, we use always the same interface, from `sktime` library. +# %% +model.fit(y=y_train, X=X_train) +y_pred = model.predict(fh=y_test.index, X=X) +y_pred.head() +# %% +plot_series(y, y_pred, labels=["True", "Predicted"], title="True vs Predicted") +plt.show() -# %% \ No newline at end of file +# %% [markdown] +# #### Recovering the predicted effect and components +# +# This library adds extra methods to the sktime interface, such as `predict_components`, +# which behaves similarly to `predict`, but returns the components of the forecast as +# components of the output. +# +# The name of the effect in the output dataframe is equal to the one we have passed +# as first item in the tuple when creating the model. In this case, the name is +# "exog_effect". +# %% + +components = model.predict_components(fh=y.index, X=X) +components.head() + +# %% [markdown] +# Now, let's compare it with the true effect. We will plot the true effect and the +# predicted effect in the same plot. +# %% +fig, ax = plt.subplots() +ax.scatter(X["exogenous"], 2 * (X["exogenous"] - 5) ** 2, + color="black", label="True effect") +ax.scatter(X["exogenous"], components["exog_effect"], + marker="x", color="red", + s=10, label="Predicted effect") +ax.set(xlabel="Exogenous variable", + ylabel="Effect", + title="True effect vs Predicted effect") +ax.legend() +fig.show() diff --git a/docs/howto/custom-effects/index.md b/docs/howto/custom-effects/index.md index ffe5afa..fd0e811 100644 --- a/docs/howto/custom-effects/index.md +++ b/docs/howto/custom-effects/index.md @@ -1,28 +1,44 @@ # Customizing exogenous effects +This section explains how to create custom exogenous effects in Prophetverse. +We will start by explaining what is an exogenous effect, and then we will show +a practical example where we create an effect that uses a squared function to +model the relationship between the exogenous variable and the target variable. -The exogenous effect API allows you to create custom exogenous components for the Prophetverse model. This is useful when we want to model specific patterns or relationships between the exogenous variables and the target variable. For example, enforcing a positive effect of a variable on the mean, or modeling a non-linear relationship. +## The effects API -If you have read the [theory section](https://prophetverse.com/the-theory/), +The exogenous effect API allows you to create custom exogenous components for the +Prophetverse model. This is useful when we want to model specific patterns or +relationships between the exogenous variables and the target variable. For example, +enforcing a positive effect of a variable on the mean, or modeling a non-linear +relationship. + +If you have read the [theory section](https://prophetverse.com/the-theory/), by effect we mean each function $f_i$. You can implement those custom functions by subclassing the `BaseEffect` class, and then use them in the -`Prophetverse` model. Some effects are already implemented in the library, +`Prophetverse` model. Some effects are already implemented in the library, and you can find them in the `prophetverse.effects` module. -When creating a model instance, effects can be specified through `exogenous_effects` -parameter of the `Prophetverse` model. This parameter is a list of tuples of three +When creating a model instance, effects can be specified through `exogenous_effects` +parameter of the `Prophetverse` model. This parameter is a list of tuples of three values: the name, the effect object, and a regex to filter -columns related to that effect. The regex is what defines $x_i$ in the previous section. The `prophetverse.utils.regex` module provides some useful functions to create regex patterns for common use cases, include `starts_with`, `ends_with`, `contains`, and `no_input_columns`. +columns related to that effect. The regex is what defines $x_i$ in the previous section. +The `prophetverse.utils.regex` module provides some useful functions to create +regex patterns for common use cases, include `starts_with`, `ends_with`, `contains`, +and `no_input_columns`. + +Consider the example below, where we create a model with a linear seasonality effect +and a custom effect that uses the feature `channel1_investment` as input and transforms +it with a [hill curve](https://en.wikipedia.org/wiki/Hill_equation_(biochemistry)), +which is a common curve for capturing diminishing returns. -For example: ```python +from prophetverse.effects import HillEffect, LinearFourierSeasonality from prophetverse.sktime import Prophetverse -from prophetverse.effects import LinearFourierSeasonality, HillEffect -from prophetverse.utils.regex import starts_with, no_input_columns, exact - +from prophetverse.utils.regex import exact, no_input_columns, starts_with exogenous_effects = [ ( @@ -49,12 +65,17 @@ model = Prophetverse(exogenous_effects=exogenous_effects) ``` -Output: [1]
+Creating such models in Prophetverse is like creating buildings from lego blocks. +You define how you model should work, and then you can leverage all the interface to +carry out the forecasting and inference tasks. + +## Creating a custom effect + The effects can be any object that implements the `BaseEffect` interface, and you can -create your own effects by subclassing `BaseEffect` and implementing `_fit`, `_transform` and -`_predict` methods. +create your own effects by subclassing `BaseEffect` and implementing `_fit`, +`_transform` and `_predict` methods. * `_fit` (optional): This method is called during fit() of the forecasting and should be used to initialize any necessary parameters or data structures. @@ -62,28 +83,29 @@ It receives the exogenous variables dataframe X, the series `y`, and the scale f `scale` that was used to scale the timeseries. * `_transform` (optional): This method receives the exogenous variables dataframe, -and should return an object containing the data needed for the effect. This object -will be passed to the predict method as `data`. By default the columns of the +and should return an object containing the data needed for the effect. This object +will be passed to the predict method as `data`. By default the columns of the dataframe that match the regex pattern are selected, and the result is converted to a `jnp.ndarray`. -* `_predict` (mandatory): This method receives the output of `_transform` and all +* `_predict` (mandatory): This method receives the output of `_transform` and all previously computed effects. It should return the effect values as a `jnp.ndarray` In many cases, the `_fit` and `_transform` steps are not needed to be implemented, since the default behaviour may be the desired one. In the example below, we implement -a really simple `LogEffect` class, which leverages the default behaviour of the +a really simple `SquaredEffect` class, which leverages the default behaviour of the `BaseEffect` class. -## Example - -### Log Effect - -The `BaseAdditiveOrMultiplicativeEffect` provides an init argument `effect_mode` that -allows you to specify if the effect is additive or multiplicative. Let's take as an -example the `LogEffect`: - +### Squared Effect class +The `SquaredEffect` class receives two +hyperparameters: the prior distribution for the scale parameter, and the prior +distribution for the offset parameter. If no prior is provided, it uses a +`Gamma(1, 1)` for the scale and a `Normal(0, 1)` for the offset. Note that here +we already see an interesting feature of Prophetverse: by adopting a Gamma Prior, +we force the effect to be positive. Any other prior with positive support would +work as well. If no such constraint is needed, we can use a `Normal(0, 1)` prior or +any other distribution with support in the real line. @@ -91,7 +113,6 @@ example the `LogEffect`: ```python -# prophetverse/effects/log.py from typing import Dict, Optional @@ -100,30 +121,27 @@ import numpyro from numpyro import distributions as dist from numpyro.distributions import Distribution -from prophetverse.effects.base import ( - BaseEffect, -) +from prophetverse.effects.base import BaseEffect + -class LogEffect(BaseEffect): - """Represents a log effect as effect = scale * log(rate * data + 1). +class SquaredEffect(BaseEffect): + """Represents a squared effect as effect = scale * (data - offset)^2. Parameters ---------- scale_prior : Optional[Distribution], optional The prior distribution for the scale parameter., by default Gamma - rate_prior : Optional[Distribution], optional - The prior distribution for the rate parameter., by default Gamma - effect_mode : effects_application, optional - Either "additive" or "multiplicative", by default "multiplicative" + offset_prior : Optional[Distribution], optional + The prior distribution for the rate parameter., by default Normal(0, 1) """ def __init__( self, scale_prior: Optional[Distribution] = None, - rate_prior: Optional[Distribution] = None, + offset_prior : Optional[Distribution] = None, ): self.scale_prior = scale_prior or dist.Gamma(1, 1) - self.rate_prior = rate_prior or dist.Gamma(1, 1) + self.offset_prior = offset_prior or dist.Normal(0, 1) super().__init__() def _predict( # type: ignore[override] @@ -149,8 +167,8 @@ class LogEffect(BaseEffect): number of series. """ scale = numpyro.sample("log_scale", self.scale_prior) - rate = numpyro.sample("log_rate", self.rate_prior) - effect = scale * jnp.log(jnp.clip(rate * data + 1, 1e-8, None)) + offset = numpyro.sample("offset", self.offset_prior) + effect = scale * (data - offset) ** 2 return effect @@ -160,7 +178,739 @@ class LogEffect(BaseEffect): The `_fit` and `_transform` methods are not implemented, and the default behaviour is -preserved (the columns of the dataframe that match the regex pattern are selected, and the result is converted to a `jnp.ndarray` with key "data"). +preserved (the columns of the dataframe that match the regex pattern are selected, +and the result is converted to a `jnp.ndarray` with key "data"). + +## Practical example + +The example below is, of course, a toy example, but I hope it illustrates the +process of creating a custom effect. +We load a synthetic dataset with a squared relationship between the exogenous +variable and the target variable, and then we fit a model with the `SquaredEffect`. +The true relationship is 2 * (x - 5) ** 2, and we will see if the model is able to +recover it. + +### Loading the series + + + + +```python +import matplotlib.pyplot as plt +from sktime.split import temporal_train_test_split +from sktime.utils.plotting import plot_series + +from prophetverse.datasets import load_synthetic_squared_exogenous + +y, X = load_synthetic_squared_exogenous() +y_train, y_test, X_train, X_test = temporal_train_test_split( + y, + X, + test_size=0.2, +) + +display(y.head()) +display(X.head()) + + +fig, ax = plot_series( + y_train, y_test, labels=["Train", "Test"], title="Target variable" +) +fig.show() +fig, ax = plot_series( + X_train, X_test, labels=["Train", "Test"], title="Exogenous variable" +) +fig.show() + + +``` +Output: [3]
+ + ++ | target | +
---|---|
time | ++ |
2010-01-01 | +-0.511580 | +
2010-01-02 | +41.634559 | +
2010-01-03 | +11.708982 | +
2010-01-04 | +3.135654 | +
2010-01-05 | +15.558547 | +
+ | exogenous | +
---|---|
time | ++ |
2010-01-01 | +4.370861 | +
2010-01-02 | +9.556429 | +
2010-01-03 | +7.587945 | +
2010-01-04 | +6.387926 | +
2010-01-05 | +2.404168 | +
Output: [4]
+ + + + +Prophetverse(exogenous_effects=[('exog_effect', + SquaredEffect(offset_prior=<numpyro.distributions.continuous.Normal object at 0x163dd1910>, + scale_prior=<numpyro.distributions.continuous.Normal object at 0x163d95ad0>), + '^exogenous$')], + inference_engine=MAPInferenceEngine(num_steps=50000), + trend=PiecewiseLinearTrend(changepoint_interval=100, + changepoint_prior_scale=0.1, + changepoint_range=-100))Please rerun this cell to show the HTML repr or trust the notebook.
Prophetverse(exogenous_effects=[('exog_effect', + SquaredEffect(offset_prior=<numpyro.distributions.continuous.Normal object at 0x163dd1910>, + scale_prior=<numpyro.distributions.continuous.Normal object at 0x163d95ad0>), + '^exogenous$')], + inference_engine=MAPInferenceEngine(num_steps=50000), + trend=PiecewiseLinearTrend(changepoint_interval=100, + changepoint_prior_scale=0.1, + changepoint_range=-100))
PiecewiseLinearTrend(changepoint_interval=100, changepoint_prior_scale=0.1, + changepoint_range=-100)
PiecewiseLinearTrend(changepoint_interval=100, changepoint_prior_scale=0.1, + changepoint_range=-100)
Output: [5]
+ + + + ++ | target | +
---|---|
2011-07-15 | +21.782337 | +
2011-07-16 | +19.701607 | +
2011-07-17 | +26.434742 | +
2011-07-18 | +41.059937 | +
2011-07-19 | +12.419967 | +
Output: [6]
+ + + +![png](index_files/output_10_0.png) + + + +#### Recovering the predicted effect and components + +This library adds extra methods to the sktime interface, such as `predict_components`, +which behaves similarly to `predict`, but returns the components of the forecast as +components of the output. + +The name of the effect in the output dataframe is equal to the one we have passed +as first item in the tuple when creating the model. In this case, the name is +"exog_effect". + + + +```python + +components = model.predict_components(fh=y.index, X=X) +components.head() + + +``` +Output: [7]
+ + + + ++ | exog_effect | +mean | +obs | +trend | +
---|---|---|---|---|
2010-01-01 | +0.814534 | +0.975764 | +0.930191 | +0.161231 | +
2010-01-02 | +41.405857 | +41.578205 | +41.576435 | +0.172360 | +
2010-01-03 | +13.318528 | +13.502026 | +13.603917 | +0.183490 | +
2010-01-04 | +3.808372 | +4.002995 | +4.021019 | +0.194620 | +
2010-01-05 | +13.581171 | +13.786910 | +13.910775 | +0.205750 | +
Output: [8]
+ +![png](index_files/output_14_1.png) + diff --git a/docs/howto/custom-effects/index_files/output_10_0.png b/docs/howto/custom-effects/index_files/output_10_0.png new file mode 100644 index 0000000..9a513e2 Binary files /dev/null and b/docs/howto/custom-effects/index_files/output_10_0.png differ diff --git a/docs/howto/custom-effects/index_files/output_10_1.png b/docs/howto/custom-effects/index_files/output_10_1.png new file mode 100644 index 0000000..9a513e2 Binary files /dev/null and b/docs/howto/custom-effects/index_files/output_10_1.png differ diff --git a/docs/howto/custom-effects/index_files/output_11_1.png b/docs/howto/custom-effects/index_files/output_11_1.png new file mode 100644 index 0000000..9a513e2 Binary files /dev/null and b/docs/howto/custom-effects/index_files/output_11_1.png differ diff --git a/docs/howto/custom-effects/index_files/output_14_1.png b/docs/howto/custom-effects/index_files/output_14_1.png new file mode 100644 index 0000000..671c708 Binary files /dev/null and b/docs/howto/custom-effects/index_files/output_14_1.png differ diff --git a/docs/howto/custom-effects/index_files/output_15_1.png b/docs/howto/custom-effects/index_files/output_15_1.png new file mode 100644 index 0000000..671c708 Binary files /dev/null and b/docs/howto/custom-effects/index_files/output_15_1.png differ diff --git a/docs/howto/custom-effects/index_files/output_5_3.png b/docs/howto/custom-effects/index_files/output_5_3.png new file mode 100644 index 0000000..a30a97a Binary files /dev/null and b/docs/howto/custom-effects/index_files/output_5_3.png differ diff --git a/docs/howto/custom-effects/index_files/output_5_4.png b/docs/howto/custom-effects/index_files/output_5_4.png new file mode 100644 index 0000000..1b95112 Binary files /dev/null and b/docs/howto/custom-effects/index_files/output_5_4.png differ diff --git a/docs/howto/custom-effects/index_files/output_6_1.png b/docs/howto/custom-effects/index_files/output_6_1.png new file mode 100644 index 0000000..a30a97a Binary files /dev/null and b/docs/howto/custom-effects/index_files/output_6_1.png differ diff --git a/docs/howto/custom-effects/index_files/output_6_2.png b/docs/howto/custom-effects/index_files/output_6_2.png new file mode 100644 index 0000000..1b95112 Binary files /dev/null and b/docs/howto/custom-effects/index_files/output_6_2.png differ diff --git a/docs/mmm/lifttest.ipy b/docs/mmm/lifttest.ipy new file mode 100644 index 0000000..9be5a0d --- /dev/null +++ b/docs/mmm/lifttest.ipy @@ -0,0 +1,237 @@ +# %% [markdown] +# # Univariate timeseries and exogenous effects + +# %% +# Disable warnings +import warnings + +warnings.simplefilter(action="ignore") +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.pyplot as plt +from sktime.transformations.series.fourier import FourierFeatures +from sktime.forecasting.compose import ForecastingPipeline +from numpyro import distributions as dist +import numpyro + +numpyro.enable_x64() + +# %% [markdown] +# # Import dataset +# +# We import a dataset from Prophet's original repository. We then put it into sktime-friendly format, where the index is a `pd.PeriodIndex` and the colums are the time series. + +# %% +df = pd.read_csv( + "https://raw.githubusercontent.com/facebook/prophet/main/examples/example_wp_log_peyton_manning.csv" +) +df["ds"] = pd.to_datetime(df["ds"]).dt.to_period("D") +y = df.set_index("ds") +display(y) + +# %% +y2 = y.copy() +# Set numpy seed +np.random.seed(0) + +# Create random input +X = pd.DataFrame( + np.abs(np.random.rand(len(y2), 1)) ** 4, + index=y2.index, + columns=["exog"], +) +true_exog_effect = np.log(1.5 * X["exog"].values.reshape((-1, 1)) + 1) * 0.8 +y = y + true_exog_effect +ax = y.rename(columns={"y": "New series"}).plot.line() +(y - true_exog_effect).rename(columns={"y": "Original series"}).plot.line(ax=ax) + +# %% +from sktime.forecasting.model_selection import temporal_train_test_split + +y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, test_size=365 * 2) + +# %% [markdown] +# ## Custom effect +# +# In the last example, we used an effect that was imported from Prophetverse. You may, however, be interested in your own effects. Lets create a custom "Hill" effect, that is used in Marketing Mix Modelling applications. + +# %% +from prophetverse.effects.base import BaseAdditiveOrMultiplicativeEffect +import jax.numpy as jnp + + +class HillEffect(BaseAdditiveOrMultiplicativeEffect): + """ + Represents a Hill effect in a time series model. + + Attributes: + half_max_prior: Prior distribution for the half-maximum parameter. + slope_prior: Prior distribution for the slope parameter. + max_effect_prior: Prior distribution for the maximum effect parameter. + effect_mode: Mode of the effect (either "additive" or "multiplicative"). + """ + + def __init__( + self, + half_max_prior=None, + slope_prior=None, + max_effect_prior=None, + min_slope=0, + effect_mode="multiplicative", + ): + + if half_max_prior is None: + half_max_prior = dist.Gamma(2, 1) + if slope_prior is None: + slope_prior = dist.HalfNormal(2) + if max_effect_prior is None: + max_effect_prior = dist.Gamma(2, 1) + + self.half_max_prior = half_max_prior + self.slope_prior = slope_prior + self.min_slope = min_slope + self.max_effect_prior = max_effect_prior + self.effect_mode = effect_mode + super().__init__() + + def _predict(self, data, previous_effects=None): + """ + Computes the effect using the log transformation. + + Args: + trend: The trend component. + data: The input data. + + Returns: + The computed effect. + """ + + half_max = numpyro.sample("half_max", self.half_max_prior) + slope = numpyro.sample("slope", self.slope_prior) + self.min_slope + max_effect = numpyro.sample("max_effect", self.max_effect_prior) + + effect = max_effect * (1 / (1 + (data / half_max) ** -slope)) + effect = jnp.clip(effect, 0, max_effect) + return effect + + +# %% +from prophetverse.effects.log import LogEffect +import numpyro +from prophetverse.sktime import Prophetverse +from prophetverse.sktime.seasonality import seasonal_transformer +from prophetverse.effects.linear import LinearEffect +from prophetverse.utils.regex import starts_with +from prophetverse.effects.lift_experiment import LiftExperimentLikelihood + + +exogenous_effects = [ + ( + "seasonality", + LinearEffect( + prior=dist.Normal(0, 0.1), + effect_mode="multiplicative", + ), + starts_with(["sin", "cos"]), + ), + ("exog", HillEffect(effect_mode="additive"), starts_with("exog")), +] + +model = Prophetverse( + trend="linear", + changepoint_interval=300, + changepoint_prior_scale=0.0001, + exogenous_effects=exogenous_effects, + noise_scale=0.05, + optimizer_steps=50000, + optimizer_name="Adam", + optimizer_kwargs={"step_size": 0.0001}, + inference_method="map", +) +model.fit(y=y_train, X=X_train) + +# %% +sites = model.predict_all_sites(fh=X.index, X=X) + +fig, ax = plt.subplots(figsize=(4, 4)) + +ax.scatter(sites["exog"], true_exog_effect, s=2) +# 45 degree line +ax.plot([0, 1], [0, 1], "k--") +ax.set_xlabel("Predicted effect") +ax.set_ylabel("True effect") +ax.set_title("Effect estimation") +fig.show() + +# %% [markdown] +# ## Leveraging A/B tests results to better detect the effect +# +# In many cases, such as Marketing Mix Modeling, we have access to A/B tests or other experiments that allow us to estimate the effect of a given intervention. We can use this information to tune the output of our variable's effect. In Prophetverse, this can be achieved with `prophetverse.effects.LiftExperimentLikelihood`, that adds a likelihood term using the expected effect for a given date. + +# %% +mocked_lift_test_experiment = pd.DataFrame( + data=np.random.normal(true_exog_effect, 0.1), + columns=["lift_results"], + index=y.index, +) +mocked_lift_test_experiment = mocked_lift_test_experiment.loc[y.index] +# Remove some samples, since we may only have A/B tests for a subset of the data + +mocked_lift_test_experiment = mocked_lift_test_experiment.loc[ + np.random.choice(mocked_lift_test_experiment.index, 100, replace=False) +].reindex(mocked_lift_test_experiment.index) + +display(mocked_lift_test_experiment.head(), mocked_lift_test_experiment.dropna().head()) + +# %% +from prophetverse.effects import LiftExperimentLikelihood + +model_with_lift = model.clone() + + +model_with_lift.set_params( + exogenous_effects=[ + ( + "seasonality", + LinearEffect( + prior=dist.Normal(0, 0.1), + effect_mode="multiplicative", + ), + starts_with(["sin", "cos"]), + ), + ( + "exog", + LiftExperimentLikelihood( + HillEffect(effect_mode="additive"), + lift_test_results=mocked_lift_test_experiment, + prior_scale=0.001, + ), + starts_with("exog"), + ), + ] +) + +model_with_lift.fit(y=y_train, X=X_train) + +# %% +sites_with_lift = model_with_lift.predict_all_sites(fh=X.index, X=X) +sites_with_lift.head() + +# %% +fig, ax = plt.subplots(figsize=(4, 4)) + +ax.scatter( + X["exog"], sites["exog"], s=2, label="Predicted effect without A/B test data" +) +ax.scatter(X["exog"], true_exog_effect, s=5, label="True effect") +ax.scatter( + X["exog"], + sites_with_lift["exog"], + s=2, + label="Predicted effect with A/B test data", +) +ax.set_xlabel("Input value") +ax.set_ylabel("Predicted effect") +fig.legend() +fig.show() diff --git a/mkdocs.yml b/mkdocs.yml index cc394f8..6c0816d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -41,12 +41,12 @@ nav: - Hierarchical Time Series: tutorial/hierarchical/file.md - Nonnegative timeseries: tutorial/count-data/file.md - How-to 🛠️: - - Custom Exogenous Effects: howto/custom-effects/index.md - - Custom Trend: howto/custom-trend/index.md + - Custom Exogenous Effects ✨ : howto/custom-effects/index.md + - Custom Trend 🌟: howto/custom-trend/index.md - Composite Exogenous Effects: howto/composite-exogenous-effects/index.md - - Marketing Mix Modeling 🚀: - - Lift test calibration: examples/custom-effect.ipynb + #- Marketing Mix Modeling 🚀: + # - Lift test calibration: examples/custom-effect.ipynb - Understand Prophetverse: - Theory: the-theory.md diff --git a/pyproject.toml b/pyproject.toml index 177e6ea..c5be292 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,8 @@ python-markdown-math = {version = "^0.8", optional = true} tabulate = {version = "^0.9.0", optional = true} mike = {version = "^2.1.3", optional = true} mkdocs-ipymd = "^0.0.4" +seaborn = {version = "^0.13.2", optional = true} +statsmodels = {version = "^0.14.4", optional = true} [tool.poetry.extras] @@ -61,6 +63,8 @@ dev = [ "pylint", "mkdocstrings", "mike", + "seaborn", + "statsmodels", ] [tool.pytest.ini_options] diff --git a/src/prophetverse/datasets/__init__.py b/src/prophetverse/datasets/__init__.py new file mode 100644 index 0000000..f033c0b --- /dev/null +++ b/src/prophetverse/datasets/__init__.py @@ -0,0 +1,19 @@ +"""Datasets for ProphetVerse.""" + +from .loaders import ( + load_forecastingdata, + load_pedestrian_count, + load_peyton_manning, + load_tensorflow_github_stars, + load_tourism, +) +from .synthetic import load_synthetic_squared_exogenous + +__all__ = [ + "load_forecastingdata", + "load_pedestrian_count", + "load_peyton_manning", + "load_tensorflow_github_stars", + "load_tourism", + "load_synthetic_squared_exogenous", +] diff --git a/src/prophetverse/datasets/synthetic/__init__.py b/src/prophetverse/datasets/synthetic/__init__.py new file mode 100644 index 0000000..13900fc --- /dev/null +++ b/src/prophetverse/datasets/synthetic/__init__.py @@ -0,0 +1,6 @@ +"""Synthetic datasets for testing and examples.""" + +from ._composite_effect_example import load_composite_effect_example +from ._squared_exogenous import load_synthetic_squared_exogenous + +__all__ = ["load_composite_effect_example", "load_synthetic_squared_exogenous"] diff --git a/src/prophetverse/datasets/synthetic/_composite_effect_example.py b/src/prophetverse/datasets/synthetic/_composite_effect_example.py new file mode 100644 index 0000000..413c0ab --- /dev/null +++ b/src/prophetverse/datasets/synthetic/_composite_effect_example.py @@ -0,0 +1,52 @@ +import numpy as np +import pandas as pd + + +def load_composite_effect_example(): + """ + Load a synthetic time series with a composite effect. + + Returns + ------- + pd.DataFrame + DataFrame containing the time series data. + """ + rng = np.random.default_rng(0) + timeindex = pd.period_range( + start="2010-01-01", freq="D", periods=365 * 7, name="time" + ) + + t = np.arange(len(timeindex)) + + w = np.ones(100) / 100 + trend = np.ones(len(t)) * t / 20 + 10 + + seasonality = ( + np.sin(2 * np.pi * t / 365.25) * 0.7 + + np.sin(2 * np.pi * t / 365.25 * 2) * 1 + # + np.sin(2 * np.pi * t / 365.25 * 3) * 0.5 + # + np.sin(2 * np.pi * t / 365.25 * 4) * 0.5 + ) * 0.8 + 1 + + exog = np.clip(rng.normal(0.1, 1, size=len(t)), 0, None) + # rolling mean + w = np.ones(15) / 15 + exog = np.convolve(exog, w, mode="same") + exog -= np.min(exog) + exog_effect = exog * 0.5 + noise = rng.normal(0, 0.1, size=len(t)) + y = pd.DataFrame( + data={ + "target": trend * (1 + exog_effect + seasonality + noise) + + trend * exog * (seasonality - seasonality.min() + 1) * 2 + }, + index=timeindex, + ) + + X = pd.DataFrame( + { + "investment": exog, + }, + index=timeindex, + ) + return y, X diff --git a/src/prophetverse/datasets/synthetic/_squared_exogenous.py b/src/prophetverse/datasets/synthetic/_squared_exogenous.py new file mode 100644 index 0000000..2ff3e03 --- /dev/null +++ b/src/prophetverse/datasets/synthetic/_squared_exogenous.py @@ -0,0 +1,124 @@ +import numpy as np +import pandas as pd + +__all__ = ["load_synthetic_squared_exogenous"] + + +def _generate_dataset( + n_periods: int, + seasonality_period: int, + trend_slope: float, + exogenous_range: tuple, + noise_std: float = 1.0, + seed: int = 0, +) -> tuple[pd.Series, pd.DataFrame]: + """ + Generate a simple synthetic time series in sktime format. + + The series is composed of seasonality, trend, + and exogenous variables. + + Parameters + ---------- + n_periods : int + Number of time periods to simulate. + seasonality_period : int + Period of the seasonal component. + trend_slope : float + Slope of the linear trend. + exogenous_range : tuple + Range (min, max) for the exogenous variable values. + noise_std : float, optional + Standard deviation of the Gaussian noise, by default 1.0. + seed : int, optional + Random seed for reproducibility, by default None. + + Returns + ------- + tuple[pd.Series, pd.DataFrame] + y : pd.Series + Target variable in sktime format with time index. + X : pd.DataFrame + Exogenous variables and components in sktime format with time index. + + Examples + -------- + >>> y, X = generate_sktime_time_series(100, 12, 0.5, (1, 10), 0.5, seed=42) + >>> y.head() + time + 0 0.838422 + 1 1.488498 + 2 2.230748 + 3 2.930336 + 4 3.724452 + Name: target, dtype: float64 + >>> X.head() + seasonality trend exogenous noise + time + 0 0.000000 0.000000 5.749081 0.211731 + 1 0.258819 0.500000 6.901429 0.326080 + 2 0.500000 1.000000 6.463987 0.460959 + 3 0.707107 1.500000 5.197317 0.676962 + 4 0.866025 2.000000 3.312037 0.546416 + """ + rng = np.random.default_rng(seed) + + # Time index + time_index = pd.period_range( + start="2010-01-01", freq="D", periods=n_periods, name="time" + ) + + # Seasonal component + seasonality = np.sin(2 * np.pi * np.arange(n_periods) / seasonality_period) + + _t = np.arange(n_periods) + _t = _t - _t.mean() + _t = _t / n_periods * 20 + # Linear trend + trend = trend_slope / (1 + np.exp(-_t)) + + # Exogenous variable + exogenous = rng.uniform(*exogenous_range, size=n_periods) + + # Logarithmic effect of exogenous variable + exog_effect = 2 * (exogenous - 5) ** 2 # Adding 1 to avoid log(0) + + # Noise + noise = rng.normal(scale=noise_std, size=n_periods) + + # Target variable + target = seasonality + trend + exog_effect + noise + + # Construct y and X + y = pd.Series(data=target, index=time_index, name="target").to_frame() + X = pd.DataFrame( + data={ + "exogenous": exogenous, + }, + index=time_index, + ) + + return y, X + + +def load_synthetic_squared_exogenous(): + """Load the synthetic log exogenous dataset. + + This dataset is just for documentation purposes. + + Returns + ------- + pd.DataFrame + The synthetic target variable + pd.DataFrame + The synthetic exogenous variable + + """ + return _generate_dataset( + n_periods=700, + seasonality_period=365.25, + trend_slope=10, + exogenous_range=(1, 10), + noise_std=2, + seed=42, + ) diff --git a/src/prophetverse/engine/map.py b/src/prophetverse/engine/map.py index 141c3fa..f5b1e32 100644 --- a/src/prophetverse/engine/map.py +++ b/src/prophetverse/engine/map.py @@ -56,6 +56,7 @@ def __init__( rng_key=None, progress_bar: bool = DEFAULT_PROGRESS_BAR, stable_update=False, + forward_mode_differentiation=False, ): self.optimizer_factory = optimizer_factory @@ -64,6 +65,7 @@ def __init__( self.num_samples = num_samples self.progress_bar = progress_bar self.stable_update = stable_update + self.forward_mode_differentiation = forward_mode_differentiation super().__init__(rng_key) deprecation_warning( @@ -104,6 +106,7 @@ def get_result( num_steps, progress_bar, stable_update, + forward_mode_differentiation, **kwargs, ) -> SVIRunResult: svi_ = SVI( @@ -117,6 +120,7 @@ def get_result( progress_bar=progress_bar, stable_update=stable_update, num_steps=num_steps, + forward_mode_differentiation=forward_mode_differentiation, **kwargs, ) @@ -128,6 +132,7 @@ def get_result( self.num_steps, stable_update=self.stable_update, progress_bar=self.progress_bar, + forward_mode_differentiation=self.forward_mode_differentiation, **kwargs, ) @@ -171,7 +176,7 @@ def _predict(self, **kwargs): Returns ------- - numpyro.samples_ + dict The predicted samples generated by the model. """ predictive = numpyro.infer.Predictive( @@ -181,8 +186,8 @@ def _predict(self, **kwargs): # posterior_samples=self.posterior_samples_, num_samples=self.num_samples, ) - numpyro.samples_ = predictive(rng_key=self._rng_key, **kwargs) - return numpyro.samples_ + self.samples_ = predictive(rng_key=self._rng_key, **kwargs) + return self.samples_ class MAPInferenceEngineError(Exception): diff --git a/src/prophetverse/sktime/base.py b/src/prophetverse/sktime/base.py index 9810f23..34ec785 100644 --- a/src/prophetverse/sktime/base.py +++ b/src/prophetverse/sktime/base.py @@ -605,7 +605,7 @@ def _scale_y(self, y: pd.DataFrame) -> pd.DataFrame: if self._likelihood_is_discrete: return y - if isinstance(self._scale, float): + if isinstance(self._scale, (int, float)): return y / self._scale scale_for_each_obs = self._scale.loc[y.index.droplevel(-1)].values