Skip to content

Commit

Permalink
Quantile transform for AR models. Optional. An attempt to stabilize l…
Browse files Browse the repository at this point in the history
…ags.
  • Loading branch information
antoinecarme committed Jan 19, 2025
1 parent 5b5161f commit 0079c30
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 36 deletions.
22 changes: 8 additions & 14 deletions pyaf/TS/Keras_Models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,6 @@ def dumpCoefficients(self, iMax=10):
def build_RNN_Architecture(self, iARInputs, iARTarget):
assert(0);

def fit_inputs_and_targets_scalers(self, iARInputs, iARTarget):
from sklearn.preprocessing import StandardScaler

self.mStandardScaler_Input = StandardScaler()
self.mStandardScaler_Target = StandardScaler()
lARInputs = self.mStandardScaler_Input.fit_transform(iARInputs)
lARTarget = self.mStandardScaler_Target.fit_transform(iARTarget.reshape(-1, 1))
return (lARInputs, lARTarget)

def get_default_keras_options(self):
lDict = {}
return lDict
Expand All @@ -61,11 +52,9 @@ def fit_keras_model(self, iARInputs, iARTarget):

def predict_keras_model(self, iARInputs):
lTimer = tsutil.cTimer(("PREDICTING_KERAS_MODEL", self.mOutName))
lARInputs = self.mStandardScaler_Input.transform(iARInputs)
lARInputs = self.reshape_inputs(lARInputs)
lARInputs = iARInputs
lPredicted = self.mModel.predict(lARInputs);
lPredicted = np.reshape(lPredicted, (-1, 1))
lPredicted = self.mStandardScaler_Target.inverse_transform(lPredicted)
return lPredicted

def fit(self):
Expand All @@ -78,8 +67,8 @@ def fit(self):

lARInputs = lAREstimFrame[self.mInputNames].values
lARTarget = lAREstimFrame[series].values

(lARInputs, lARTarget) = self.fit_inputs_and_targets_scalers(lARInputs, lARTarget)
if(self.mLagEncoder is not None):
lARTarget = self.mLagEncoder.transform(lARTarget.reshape(-1, 1)).flatten()

self.build_RNN_Architecture(lARInputs, lARTarget);

Expand All @@ -95,6 +84,9 @@ def fit(self):
lPredicted = self.predict_keras_model(lFullARInputs);

self.mARFrame[self.mOutName] = lPredicted
if(self.mLagEncoder is not None):
self.mARFrame[self.mOutName] = self.mLagEncoder.inverse_transform(self.mARFrame[self.mOutName].values.reshape(-1, 1)).flatten()

self.compute_ar_residue(self.mARFrame)

def transformDataset(self, df, horizon_index = 1):
Expand All @@ -107,6 +99,8 @@ def transformDataset(self, df, horizon_index = 1):
lPredicted = self.predict_keras_model(inputs)

df[self.mOutName] = lPredicted;
if(self.mLagEncoder is not None):
df[self.mOutName] = self.mLagEncoder.inverse_transform(df[self.mOutName].values.reshape(-1, 1)).flatten()
self.compute_ar_residue(df)
return df;

Expand Down
22 changes: 7 additions & 15 deletions pyaf/TS/Pytorch_Models.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,6 @@ def dumpCoefficients(self, iMax=10):
def build_RNN_Architecture(self, iARInputs, iARTarget):
assert(0);

def fit_inputs_and_targets_scalers(self, iARInputs, iARTarget):
from sklearn.preprocessing import StandardScaler

self.mStandardScaler_Input = StandardScaler()
self.mStandardScaler_Target = StandardScaler()
lARInputs = self.mStandardScaler_Input.fit_transform(iARInputs)
lARTarget = self.mStandardScaler_Target.fit_transform(iARTarget.reshape(iARTarget.shape[0], 1))
lARTarget = lARTarget.reshape((lARTarget.shape[0], 1))
return (lARInputs, lARTarget)

def get_default_pytorch_options(self):
lDict = {}
return lDict
Expand All @@ -70,12 +60,10 @@ def predict_pytorch_model(self, iARInputs):
lTimer = None
if(self.mOptions.mDebug):
lTimer = tsutil.cTimer(("PREDICTING_PYTORCH_MODEL", self.mOutName))
lARInputs = self.mStandardScaler_Input.transform(iARInputs)
lARInputs = self.reshape_inputs(lARInputs)
lARInputs = iARInputs
lARInputs = lARInputs.astype(np.float32)
lPredicted = self.mModel.predict(lARInputs);
lPredicted = np.reshape(lPredicted, (-1, 1))
lPredicted = self.mStandardScaler_Target.inverse_transform(lPredicted)
return lPredicted


Expand All @@ -89,8 +77,8 @@ def fit(self):

lARInputs = lAREstimFrame[self.mInputNames].values
lARTarget = lAREstimFrame[series].values

(lARInputs, lARTarget) = self.fit_inputs_and_targets_scalers(lARInputs, lARTarget)
if(self.mLagEncoder is not None):
lARTarget = self.mLagEncoder.transform(lARTarget.reshape(-1, 1))

self.build_RNN_Architecture(lARInputs, lARTarget);

Expand All @@ -105,6 +93,8 @@ def fit(self):
lPredicted = self.predict_pytorch_model(lFullARInputs)
self.mARFrame[self.mOutName] = lPredicted

if(self.mLagEncoder is not None):
self.mARFrame[self.mOutName] = self.mLagEncoder.inverse_transform(self.mARFrame[self.mOutName].values.reshape(-1, 1)).flatten()
self.compute_ar_residue(self.mARFrame)

def transformDataset(self, df, horizon_index = 1):
Expand All @@ -117,6 +107,8 @@ def transformDataset(self, df, horizon_index = 1):
lPredicted = self.predict_pytorch_model(inputs)

df[self.mOutName] = lPredicted;
if(self.mLagEncoder is not None):
df[self.mOutName] = self.mLagEncoder.inverse_transform(df[self.mOutName].values.reshape(-1, 1)).flatten()
self.compute_ar_residue(df)
return df;

Expand Down
9 changes: 9 additions & 0 deletions pyaf/TS/Scikit_Models.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def fit(self):
lARInputs = lAREstimFrame[self.mInputNames].values

lARTarget = lAREstimFrame[series].values
if(self.mLagEncoder is not None):
lARTarget = self.mLagEncoder.transform(lARTarget.reshape(-1, 1)).flatten()
# tsutil.print_pyaf_detailed_info(len(self.mInputNames), lARInputs.shape , lARTarget.shape)
assert(lARInputs.shape[1] > 0);
assert(lARTarget.shape[0] > 0);
Expand Down Expand Up @@ -110,6 +112,8 @@ def fit(self):
if(self.mDecompositionType in ['TSR']):
self.mARFrame[self.mOutName] = 1.0

if(self.mLagEncoder is not None):
self.mARFrame[self.mOutName] = self.mLagEncoder.inverse_transform(self.mARFrame[self.mOutName].values.reshape(-1, 1)).flatten()

self.compute_ar_residue(self.mARFrame)

Expand All @@ -131,6 +135,8 @@ def transformDataset(self, df, horizon_index = 1):
if(self.mDecompositionType in ['TSR']):
df[self.mOutName] = 1.0

if(self.mLagEncoder is not None):
df[self.mOutName] = self.mLagEncoder.inverse_transform(df[self.mOutName].values.reshape(-1, 1)).flatten()

self.compute_ar_residue(df)
return df;
Expand All @@ -144,6 +150,9 @@ def __init__(self , cycle_residue_name, P , iExogenousInfo = None):

def dumpCoefficients(self, iMax=10):
logger = tsutil.get_pyaf_logger();
if(self.mLagEncoder is not None):
logger.info("AR_MODEL_LAG_ENCODNG_QUANTILES " + str(self.mLagEncoder.quantiles_.flatten().tolist()));

lDict = dict(zip(self.mInputNamesAfterSelection , self.mScikitModel.coef_.round(6)));
lDict1 = dict(zip(self.mInputNamesAfterSelection , abs(self.mScikitModel.coef_.round(6))));
i = 1;
Expand Down
28 changes: 21 additions & 7 deletions pyaf/TS/SignalDecomposition_AR.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(self , cycle_residue_name, iExogenousInfo = None):
self.mInputNames = [];
self.mExogenousInfo = iExogenousInfo;
self.mLagsForSeries = {cycle_residue_name : []}
self.mLagEncoder = None

def compute_ar_residue(self, df):
target = df[self.mCycleResidueName].values
Expand Down Expand Up @@ -86,9 +87,11 @@ def generateLagsForForecast(self, df, selection = None):
lDict = {}
# lDict[self.mCycleResidueName] = df[self.mCycleResidueName]
series = self.mCycleResidueName
lSeries = df[self.mCycleResidueName]
lSeries = df[self.mCycleResidueName].values
if(self.mLagEncoder is not None):
lSeries = self.mLagEncoder.transform(lSeries.reshape(-1, 1)).flatten()
# Investigate Large Horizon Models #213 : The model can produce overflows in its inputs when iterated.
lSeries = lSeries.values.clip(-1e+10, +1e10)
lSeries = lSeries.clip(-1e+10, +1e10)
for p in self.mLagsForSeries[self.mCycleResidueName]:
name = series +'_Lag' + str(p);
if(selection is None or name in selection):
Expand Down Expand Up @@ -149,6 +152,7 @@ def __init__(self):
self.mARFrame = None
self.mARList = {}
self.mExogenousInfo = None;
self.mLagEncoders = {}

def plotAR(self):
for trend in self.mTrendList:
Expand All @@ -169,15 +173,24 @@ def shift_series(self, series, p):
new_values = np.append([ series[0] ]*p, series[0:N-p])
return new_values

def generateLagsForTraining(self, df, series, pMinMax):
def generateLagsForTraining(self, df, series, pMinMax, iEncode = False):
lSeries = df[series].values;
lCanApplyQuantileTransform = iEncode and (self.mOptions.mLagEncoding is not None)
if(lCanApplyQuantileTransform):
from sklearn.preprocessing import QuantileTransformer
df_Estim = self.mSplit.getEstimPart(df)
NQ = int(min(20, np.sqrt(df_Estim.shape[0]))) # optimal quantiles number heuristics : sqrt(N)
qt = QuantileTransformer(n_quantiles=NQ, random_state=self.mOptions.mSeed)
qt.fit(df_Estim[series].values.reshape(-1, 1))
self.mLagEncoders[series] = qt
lSeries = qt.transform(lSeries.reshape(-1, 1))
(pmin, pmax) = pMinMax
lSeries = df[series];
self.mDefaultValues[series] = lSeries.values[0];
self.mDefaultValues[series] = lSeries[0];
lDict = {}
lags = []
for p in range(pmin, pmax+1):
name = series+'_Lag' + str(p)
lShiftedSeries = self.shift_series(lSeries.values, p)
lShiftedSeries = self.shift_series(lSeries, p)
lShiftedEstim = self.mSplit.getEstimPart(lShiftedSeries);
lAcceptable = self.is_not_constant(lShiftedEstim);
if(lAcceptable):
Expand Down Expand Up @@ -206,12 +219,13 @@ def preselect_exog_vars(self, df, cycle_residue):

def addLagsForTraining(self, df, cycle_residue):
P = self.get_nb_lags();
lag_df, lags = self.generateLagsForTraining(df, cycle_residue, (1, P));
lag_df, lags = self.generateLagsForTraining(df, cycle_residue, (1, P), iEncode = True);
lag_dfs = [lag_df]
for autoreg in self.mARList[cycle_residue]:
for lag in lags:
(name , p) = lag
autoreg.register_lag(name, p)
autoreg.mLagEncoder = self.mLagEncoders.get(cycle_residue)

# Exogenous variables lags
lUseExog = False # Exog variables can be configured but not used ("AR" activated and "ARX" disabled).
Expand Down

0 comments on commit 0079c30

Please sign in to comment.