-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathpipeline.py
120 lines (96 loc) · 2.81 KB
/
pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Ploting packages
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
# Date wrangling
from datetime import datetime, timedelta
# Data wrangling
import pandas as pd
# The deep learning class
from deep_model import DeepModelTS
# Reading the configuration file
import yaml
# Directory managment
import os
# Reading the hyper parameters for the pipeline
with open(f'{os.getcwd()}\\conf.yml') as file:
conf = yaml.load(file, Loader=yaml.FullLoader)
# Reading the data
d = pd.read_csv('input/DAYTON_hourly.csv')
d['Datetime'] = [datetime.strptime(x, '%Y-%m-%d %H:%M:%S') for x in d['Datetime']]
# Making sure there are no duplicated data
# If there are some duplicates we average the data during those duplicated days
d = d.groupby('Datetime', as_index=False)['DAYTON_MW'].mean()
# Sorting the values
d.sort_values('Datetime', inplace=True)
# Initiating the class
deep_learner = DeepModelTS(
data=d,
Y_var='DAYTON_MW',
lag=conf.get('lag'),
LSTM_layer_depth=conf.get('LSTM_layer_depth'),
epochs=conf.get('epochs'),
train_test_split=conf.get('train_test_split') # The share of data that will be used for validation
)
# Fitting the model
model = deep_learner.LSTModel()
# Making the prediction on the validation set
# Only applicable if train_test_split in the conf.yml > 0
yhat = deep_learner.predict()
if len(yhat) > 0:
# Constructing the forecast dataframe
fc = d.tail(len(yhat)).copy()
fc.reset_index(inplace=True)
fc['forecast'] = yhat
# Ploting the forecasts
plt.figure(figsize=(12, 8))
for dtype in ['DAYTON_MW', 'forecast']:
plt.plot(
'Datetime',
dtype,
data=fc,
label=dtype,
alpha=0.8
)
plt.legend()
plt.grid()
plt.show()
# Forecasting n steps ahead
# Creating the model using full data and forecasting n steps ahead
deep_learner = DeepModelTS(
data=d,
Y_var='DAYTON_MW',
lag=24,
LSTM_layer_depth=64,
epochs=10,
train_test_split=0
)
# Fitting the model
deep_learner.LSTModel()
# Forecasting n steps ahead
n_ahead = 168
yhat = deep_learner.predict_n_ahead(n_ahead)
yhat = [y[0][0] for y in yhat]
# Constructing the forecast dataframe
fc = d.tail(400).copy()
fc['type'] = 'original'
last_date = max(fc['Datetime'])
hat_frame = pd.DataFrame({
'Datetime': [last_date + timedelta(hours=x + 1) for x in range(n_ahead)],
'DAYTON_MW': yhat,
'type': 'forecast'
})
fc = fc.append(hat_frame)
fc.reset_index(inplace=True, drop=True)
# Ploting the forecasts
plt.figure(figsize=(12, 8))
for col_type in ['original', 'forecast']:
plt.plot(
'Datetime',
'DAYTON_MW',
data=fc[fc['type']==col_type],
label=col_type
)
plt.legend()
plt.grid()
plt.show()