-
Notifications
You must be signed in to change notification settings - Fork 0
/
forecast.py
149 lines (119 loc) · 5.46 KB
/
forecast.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import pandas as pd
import matplotlib.pyplot as plt
from models import train_model
from preprocess import rename_columns, swap_missing_data, interpolate_missing, split_data, date_and_hour, calculate_mape, add_holiday_variable
# Initialize models
models = ["LinearModel", "XGBoostModel", "rfModel"]
#rename column headers
column_mapping = {
'Datetime': 'datetime',
'Current demand': 'caiso_load_actuals',
'KCASANFR698_Temperature': 'SF_temp',
'KCASANFR698_Dew_Point':'SF_dew',
'KCASANFR698_Humidity':'SF_humidity',
'KCASANFR698_Speed':'SF_windspeed',
'KCASANFR698_Gust':'SF_windgust',
'KCASANFR698_Pressure':'SF_pressure',
'KCASANJO17_Temperature': 'SJ_temp',
'KCASANJO17_Dew_Point':'SJ_dew',
'KCASANJO17_Humidity':'SJ_humidity',
'KCASANJO17_Speed':'SJ_windspeed',
'KCASANJO17_Gust':'SJ_windgust',
'KCASANJO17_Pressure':'SJ_pressure',
'KCABAKER271_Temperature': 'BAKE_temp',
'KCABAKER271_Humidity':'BAKE_humidity',
'KCABAKER271_Speed':'BAKE_windspeed',
'KCABAKER271_Pressure':'BAKE_pressure',
'KCAELSEG23_Temperature': 'EL_temp',
'KCAELSEG23_Dew_Point':'EL_dew',
'KCAELSEG23_Humidity':'EL_humidity',
'KCAELSEG23_Speed':'EL_windspeed',
'KCAELSEG23_Gust':'EL_windgust',
'KCAELSEG23_Pressure':'EL_pressure',
'KCARIVER117_Temperature': 'RIV_temp',
'KCARIVER117_Dew_Point':'RIV_dew',
'KCARIVER117_Humidity':'RIV_humidity',
'KCARIVER117_Speed':'RIV_windspeed',
'KCARIVER117_Gust':'RIV_windgust',
'KCARIVER117_Pressure':'RIV_pressure'
}
# Columns for swap missing NaN data between SF and SJ
sf_columns = [
'KCASANFR698_Temperature', 'KCASANFR698_Dew_Point', 'KCASANFR698_Humidity',
'KCASANFR698_Speed', 'KCASANFR698_Gust', 'KCASANFR698_Pressure'
]
sj_columns = [
'KCASANJO17_Temperature', 'KCASANJO17_Dew_Point', 'KCASANJO17_Humidity',
'KCASANJO17_Speed', 'KCASANJO17_Gust', 'KCASANJO17_Pressure'
]
# Specify the date ranges
train_start_date = pd.to_datetime('2021-01-02')
train_end_date = pd.to_datetime('2023-10-03')
predict_date = pd.to_datetime('2023-10-04')
# Assuming you want date objects
train_start_date_date = train_start_date.date()
train_end_date_date = train_end_date.date()
predict_date_date = predict_date.date()
#library, change to file path where .csv files located
path = 'C:/Users/~/~/'
merged_df = pd.read_csv(path + 'data.csv')
merged_df = swap_missing_data(merged_df, sf_columns, sj_columns) #Swap SF and SJ weather data for NaN values
merged_df = interpolate_missing(merged_df)
data = rename_columns(merged_df, column_mapping) #renames the column headers
date_and_hour(data)
data = add_holiday_variable(data, 'datetime', train_start_date, train_end_date) # Add holiday variable
#filter train/test data for start/end dates
df = data[(data['datetime'] >= train_start_date) & (data['datetime'] <= train_end_date + pd.Timedelta(days=1))]
# Split data into features and target
X = df.drop(['datetime','TH_SP15_GEN-APND'], axis=1).values
y = df['TH_SP15_GEN-APND'].values
# Split data
X_train, X_test, y_train, y_test = split_data(df)
# Initialize dictionaries to store forecasts
hourly_forecasts = {model_name: [] for model_name in models}
##Model fit##
# Loop through models and hours for training
for model_name in models:
for hour in range(1, 25):
X_train_hour = X_train[X_train['he'] == hour].drop(['he', 'date'], axis=1)
y_train_hour = y_train[X_train['he'] == hour]
model = train_model(model_name, X_train_hour, y_train_hour)
# Assuming you have 2023-07-31 data for exogenous features
X_forecast = data[data['date'] == pd.to_datetime(predict_date_date).date()]
# Assuming you have 2023-07-31 data for exogenous features
X_forecast = data[data['date'] == pd.to_datetime(predict_date_date).date()]
X_forecast = X_forecast[X_forecast['he'] == hour].drop(['datetime', 'TH_SP15_GEN-APND', 'date', 'he'], axis=1)
y_forecast = model.predict(X_forecast)
hourly_forecasts[model_name].append(y_forecast)
##Compare Performance##
# Subset df for the predict_date
actuals = data[data['date'] == pd.to_datetime(predict_date_date)]
actuals = actuals[['date', 'he', 'TH_SP15_GEN-APND']]
actuals = actuals.reset_index(drop=True)
actual_price = actuals['TH_SP15_GEN-APND'] # Extract actual load data
# Prediction vs Actuals
plt.figure(figsize=(15, 6)) # Set figure size to 15 x 6
plt.plot(range(24), actual_price, color='red', marker='o', label='Actual SP15 Price') # Plot the actual load in red
# Plot model predictions
for model_name, forecasts in hourly_forecasts.items():
plt.plot(range(24), forecasts[:24], label=f'{model_name} Forecast', linestyle='dotted')
# Add plot elements
plt.title("Model Predictions vs. Actual Load")
plt.xlabel("Hour")
plt.ylabel("Price ($/MWh)")
plt.legend(loc="upper left")
# Format y-axis ticks as currency
plt.gca().yaxis.set_major_formatter(mtick.FormatStrFormatter('$%.0f'))
# Display predicted date
predicted_date_str = pd.to_datetime(predict_date_date).strftime("%Y-%m-%d")
plt.text(
0.95, # X-position adjusted for right alignment
0.95, # Y-position adjusted for above the plot
f"Predicted Date: {predicted_date_str}",
ha="right",
va="top",
transform=plt.gcf().transFigure,
fontsize=10,
bbox=dict(facecolor='none', edgecolor='black', pad=10.0),
)
plt.show()