-
Notifications
You must be signed in to change notification settings - Fork 1
/
Python-Forecasting-03-Holt.py
129 lines (102 loc) · 5.3 KB
/
Python-Forecasting-03-Holt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
############################################################################
# Created by: Prof. Valdecy Pereira, D.Sc.
# UFF - Universidade Federal Fluminense (Brazil)
# email: valdecy.pereira@gmail.com
# Course: Forecasting
# Lesson: Holt
# Citation:
# PEREIRA, V. (2018). Project: Forecasting, File: Python-Forecasting-03-Holt.py, GitHub repository: <https://github.com/Valdecy/Forecasting-03-Holt>
############################################################################
# Installing Required Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
from sklearn.metrics import mean_squared_error
from math import sqrt
################ Part 1 - Holt's Method #############################
# Function: Holt
def holt(timeseries, alpha = 0.2, beta = 0.1, graph = True, horizon = 0, trend = "multiplicative"):
timeseries = pd.DataFrame(timeseries.values, index = timeseries.index, columns = [timeseries.name])/1.0
holt = pd.DataFrame(np.nan, index = timeseries.index, columns = ['Holt'])
holt_A = pd.DataFrame(np.nan, index = timeseries.index, columns = ['A'])
holt_T = pd.DataFrame(np.nan, index = timeseries.index, columns = ['T'])
n = 1
for i in range(0, len(timeseries) - n):
if (i == 0 and trend == "none"):
holt_A.iloc[i, 0] = float(timeseries.iloc[0,:])
holt.iloc[i + n, 0] = holt_A.iloc[i, 0]
elif (i == 0 and trend == "additive"):
holt_A.iloc[i, 0] = float(timeseries.iloc[0,:])
holt_T.iloc[i, 0] = 0.0
holt.iloc[i + n, 0] = holt_A.iloc[i, 0] + n*holt_T.iloc[i, 0]
elif (i == 0 and trend == "multiplicative"):
holt_A.iloc[i, 0] = float(timeseries.iloc[0,:])
holt_T.iloc[i, 0] = 1.0
holt.iloc[i + n, 0] = holt_A.iloc[i, 0] + n*holt_T.iloc[i, 0]
elif (i > 0 and trend == "none"):
holt_A.iloc[i, 0] = alpha*(float(timeseries.iloc[i,:])) + (1 - alpha)*(holt_A.iloc[i - 1, 0])
holt.iloc[i + n, 0] = holt_A.iloc[i, 0]
last = float(holt.iloc[i,0])
elif (i > 0 and trend == "additive"):
holt_A.iloc[i, 0] = alpha*(float(timeseries.iloc[i,:])) + (1 - alpha)*(holt_A.iloc[i - 1, 0] + holt_T.iloc[i - 1, 0])
holt_T.iloc[i, 0] = beta*(holt_A.iloc[i, 0] - holt_A.iloc[i - 1, 0]) + (1 - beta)*holt_T.iloc[i - 1, 0]
holt.iloc[i + n, 0] = holt_A.iloc[i, 0] + n*holt_T.iloc[i, 0]
last = float(holt.iloc[i,0])
elif (i > 0 and trend == "multiplicative"):
holt_A.iloc[i, 0] = alpha*(float(timeseries.iloc[i,:])) + (1 - alpha)*(holt_A.iloc[i - 1, 0] * holt_T.iloc[i - 1, 0])
holt_T.iloc[i, 0] = beta*(holt_A.iloc[i, 0] / holt_A.iloc[i - 1, 0]) + (1 - beta)*holt_T.iloc[i - 1, 0]
holt.iloc[i + n, 0] = holt_A.iloc[i, 0] * n*holt_T.iloc[i, 0]
last = float(holt.iloc[i,0])
if horizon > 0:
time_horizon = len(timeseries) + horizon
time_horizon_index = pd.date_range(timeseries.index[0], periods = time_horizon, freq = timeseries.index.inferred_freq)
pred = pd.DataFrame(np.nan, index = time_horizon_index, columns = ["Prediction"])
for i in range(0, horizon):
pred.iloc[len(timeseries) + i] = last
pred = pred.iloc[:,0]
rms = sqrt(mean_squared_error(timeseries.iloc[(n+1):,0], holt.iloc[(n+1):,0]))
timeseries = timeseries.iloc[:,0]
holt = holt.iloc[:,0]
if graph == True and horizon <= 0:
style.use('ggplot')
plt.plot(timeseries)
plt.plot(holt)
plt.title(timeseries.name)
plt.ylabel('')
plt.xticks(rotation = 90)
plt.show()
elif graph == True and horizon > 0:
style.use('ggplot')
plt.plot(timeseries)
plt.plot(holt)
plt.plot(pred)
plt.title(timeseries.name)
plt.ylabel('')
plt.xticks(rotation = 90)
plt.show()
return holt, last, rms
############### End of Function ##############
# Brute Force Optmization
def optimize_holt(timeseries, trend = "multiplicative"):
error = pd.DataFrame(columns = ['alpha', 'beta', 'rmse'])
count = 0
for alpha in range(0, 101):
for beta in range(0, 101):
print("alpha = ", alpha/100, " beta = ", beta/100)
ts, last, rms = holt(timeseries, alpha = alpha/100, beta = beta/100, graph = False, horizon = 0, trend = trend)
error.loc[count] = [alpha/100, beta/100, rms]
count = count + 1
return error, error.loc[error['rmse'].idxmin()]
############### End of Function ##############
######################## Part 2 - Usage ####################################
# Load Dataset
df = pd.read_csv('Python-Forecasting-03-Dataset.txt', sep = '\t')
# Transform Dataset to a Time Series
X = df.iloc[:,:]
X = X.set_index(pd.DatetimeIndex(df.iloc[:,0])) # First column as row names
X = X.iloc[:,1]
# Calling Functions
holt(X, alpha = 0.61, beta = 0.55, graph = True, horizon = 0, trend = "multiplicative")
optimize_holt(X, trend = "multiplicative")
########################## End of Code #####################################