-
Notifications
You must be signed in to change notification settings - Fork 1
/
fyp_integrated_model_main.py
812 lines (553 loc) · 27.6 KB
/
fyp_integrated_model_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
# -*- coding: utf-8 -*-
# Final Notebook for the FYP Project (fyp21001)
# Student Name: JOSHI Swapnil
# UID: 3035435773
# Version: 7.3
# Development Platform: Python 3.8 on MacOS X + Python Jupyter Notebooks on Google Collab
# Latest date of modification: April 17 2022, 10:33am
"""# **INSTALLING THE REQUIRED LIBRARIES**
"""
#Just change the pip command to install any libraries
!pip install tvdatafeed #for the datafeed
!pip install vectorbt #for main backtesting engine
#Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import vectorbt as vbt
import warnings
warnings.filterwarnings('ignore')
from tvDatafeed import TvDatafeed, Interval #ensure that '!pip install tvdatafeed' has been executed
from sklearn.preprocessing import MinMaxScaler
from pandas import DataFrame
from selenium import webdriver
from datetime import datetime
from statsmodels.tsa.stattools import adfuller
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc
"""# **DATA COLLECTION - E-mini S&P 500 Futures**
##### Collecting the data for E-mini S&P 500 futures to be used for building our model
"""
#Fetching the data for E-mini S&P 500 Futures:
tv = TvDatafeed()
es = tv.get_hist(symbol='ES',exchange='CME_MINI',interval=Interval.in_4_hour,n_bars=5000,fut_contract=1) #fetch the data for front-month futures
es_px = es.drop(['symbol','volume'],1) #drop the columns for symbol and volume (not needed in the analysis)
es_px.dropna() # drop any NaN values
"""# **Heikin-Ashi Candle Conversion**
##### Converting normal candlesticks to Heikin-Ashi candlesticks by writing a new function. The price adjustment will be made later during backtesting.
"""
#Define a function to calculate the Heikin-Ashi candle values (OHLC):
def heikin_ashi(df):
heikin_ashi_df = pd.DataFrame(index=df.index.values, columns=['open', 'high', 'low', 'close'])
heikin_ashi_df['close'] = (df['open'] + df['high'] + df['low'] + df['close']) / 4
for i in range(len(df)):
if i == 0:
heikin_ashi_df.iat[0, 0] = df['open'].iloc[0]
else:
heikin_ashi_df.iat[i, 0] = (heikin_ashi_df.iat[i-1, 0] + heikin_ashi_df.iat[i-1, 3]) / 2
heikin_ashi_df['high'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['high']).max(axis=1)
heikin_ashi_df['low'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['low']).min(axis=1)
return heikin_ashi_df
#Converting candlesticks to Heikin Ashi
hadf = heikin_ashi(es_px)
#Why use open prices instead of closing prices?
'''
- Open prices are better because the strategy will always make a trade at the open price.
- This is especially important for the Heikin Ashi candles
- In the case of Heikin Ashi, if we use close prices instead of the open, the trades will be made at
prices that do not exist (more precisely, at past prices), and that is inaccurate.
'''
data_open = pd.DataFrame(hadf['open'])
data_open.columns = ['open_hadf']
data_open['open'] = pd.DataFrame(es_px['open'])
#EMAs (EMA is better because it assigns more weight to recent values and less to the old ones)
data_open['EMA_1'] = data_open['open_hadf'].ewm(span=1, adjust=False).mean()
data_open['EMA_5'] = data_open['open_hadf'].ewm(span=5, adjust=False).mean()
data_open
# Creating copies of the dataset:
# Copy-1 : to be used at a later stage for backtesting of the momentum model
# since the data_open DF will be modified for classification purposes,
# it is best to create another dataframe that can be used for vectorized backtesting using VectorBT
momentum_backtest_data = data_open.copy(deep=True)
"""# **BASIC FEATURE ADDITION**"""
# Feature addition - pct change and log returns (these will also be used for the Monte Carlo simulation)
data_open['hadf_pct_change'] = data_open['open_hadf'].pct_change()
data_open['hadf_log_return'] = np.log(1 + data_open.hadf_pct_change)
data_open = data_open.dropna()
data_open
# Adding a new column for the direction (based on the returns)
NaN = np.nan
data_open['direction'] = NaN
data_open.direction = np.where(data_open['hadf_pct_change'] > 0, 'UP', 'DOWN')
data_open
#END OF DATA PREPROCESSING
"""# **MONTE CARLO SIMULATION**
##### --> Implementing a Monte Carlo simulation that uses the statistical properties of the time series to generate a simulated series (with the same properties).
##### --> This simulated data will be used later in the testing of our ML models
"""
# MONTE CARLO SIMULATION - generating a random time series using the statistical properties of the data
# This time series will be used later to test the ML models
data_copy = data_open.copy(deep=True)
simulation = pd.DataFrame()
mu, sigma = data_copy['hadf_log_return'].mean(), data_copy['hadf_log_return'].std() #Mean and Stdev for the new timeseries
sim_rets = np.random.normal(mu, sigma, 5000)
initial = data_copy['open_hadf'].iloc[-1]
simulation['open_hadf'] = initial * (sim_rets + 1).cumprod()
simulation['EMA_5'] = simulation['open_hadf'].ewm(span=5, adjust=False).mean()
simulation['hadf_pct_change'] = simulation['open_hadf'].pct_change()
simulation['hadf_log_return'] = np.log(1 + simulation.hadf_pct_change)
simulation = simulation.dropna()
# Adding a new column for the direction (based on the returns)
NaN = np.nan
simulation['direction'] = NaN
simulation.direction = np.where(simulation['hadf_pct_change'] > 0, 'UP', 'DOWN')
simulation
"""# **START OF EDA**
##### Now that we have done the preliminary data preprocessing, we can start working on some basic tests and explore the data further - especially testing its stationarity
"""
#1 Simple describe() function [in-built]
print(data_open.describe())
# We can see from the results above that the time series for prices is definitely not stationary
# However, the percentage and log returns have a good chance to be stationary (mean returns are realistic and will not change with time. Or at least, haven't yet)
# Let us now try to perform the ADF test (Dickey-Fuller test) to check the stationarity of a time series
def adf_test(timeseries):
print ('Results of Dickey-Fuller Test:')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print (dfoutput)
#ADF on % returns:
print("")
print("ADF on RETURNS:")
print("")
adf_test(data_open['hadf_pct_change'])
print("")
#ADF on log returns:
print("ADF on LOG RETURNS:")
print("")
adf_test(data_open['hadf_log_return'])
print("")
# Plotting the log returns to double check the stationarity
plt.figure(figsize=(16,8))
plt.plot(data_open['hadf_log_return'])
plt.show()
"""##### **Null Hypothesis:** Series is non-stationary or series has a unit root.
##### **Alternate Hypothesis:** Series is stationary or series has no unit root.
* From the test above, we can see that the Test Statistic < (all the critical values) and p-value < 0.05 --> So we reject the Null Hypothesis
* The p-value is extremely small, which indicates that there is strong evidence in favour of the Alternative Hypothesis
* The data does look normal to an extent. However, the spike around the center indicates that there is some Positive Kurtosis in the data
#### **NEXT STEPS:**
* Development of a Classification Model -- Logistic Regression & Random Forest Classifier
* We build these classifiers and see which one works better -
* We also need to consider which of the classifiers can be integrated well into the strategy
# **BINARY CLASSIFICATION APPROACH using LOGISTIC REGRESSION**
"""
# CONVERTING THE DATA FOR A BINARY CLASSIFICATION PROBLEM:
# Adding a new column for the direction (based on the returns)
NaN = np.nan
data_open['direction'] = NaN
data_open.direction = np.where(data_open['hadf_pct_change'] > 0, 'UP', 'DOWN')
data_open
# Splitting the data into test and train:
X_train, X_test, y_train, y_test = train_test_split(data_open[['open_hadf','EMA_5']], data_open.direction, test_size=0.25)
# Building a model class
model = LogisticRegression()
classifier = model.fit(X_train, y_train)
# Testing the model on X_test (part of the original data):
y_pred = model.predict(X_test)
print("Model's Accuracy:",model.score(X_test, y_test))
print(classification_report(y_test,y_pred))
# Plotting a confusion matrix
plot_confusion_matrix(model, X_test, y_test)
plt.show()
# Testing the model on the 'simulation' dataframe (random simulated data):
simX_test = simulation.copy(deep=True)
simY_test = simulation.copy(deep=True)
simX_test = simX_test.drop(['direction','hadf_pct_change','hadf_log_return'],1)
simY_test = simY_test.drop(['open_hadf','EMA_5','hadf_pct_change','hadf_log_return'],1)
simulation_pred = model.predict(simX_test)
print("Model's Accuracy:",model.score(simX_test, simY_test))
print(classification_report(simY_test,simulation_pred))
# Plotting a confusion matrix
plot_confusion_matrix(model, simX_test, simY_test)
plt.show()
"""# **BINARY CLASSIFICATION APPROACH using a RANDOM FOREST CLASSIFIER**
"""
# Building and fitting the model
rf = RandomForestClassifier(max_depth=900, max_samples=2000, n_estimators = 50)
rf.fit(X_train, y_train)
# Testing the model on X_test (part of the original data):
y_preds = rf.predict(X_test)
print("Model's Accuracy:",rf.score(X_test, y_test))
print(classification_report(y_test,y_preds))
print("")
# Plotting a confusion matrix
plot_confusion_matrix(rf, X_test, y_test)
plt.show()
# Testing the model on the 'simulation' dataframe (random simulated data):
simX_test = simulation.copy(deep=True)
simY_test = simulation.copy(deep=True)
simX_test = simX_test.drop(['direction','hadf_pct_change','hadf_log_return'],1)
simY_test = simY_test.drop(['open_hadf','EMA_5','hadf_pct_change','hadf_log_return'],1)
simulation_preds = rf.predict(simX_test)
print("Model's Accuracy:",rf.score(simX_test, simY_test))
print(classification_report(simY_test,simulation_preds))
# Plotting a confusion matrix
plot_confusion_matrix(rf, simX_test, simY_test)
plt.show()
"""The results from classification models show us that in our case, Logistic Regression would be a better choice
"""
# Let us now add another column for the momentum overlay:
#pd.set_option('display.max_rows', 100)
data_open['long'] = np.where(data_open.EMA_1 > data_open.EMA_5, 1,0)
data_open['Signal'] = data_open['long'].diff()
# We need to shift the signals one step down.
# For instance, if the signal was -1 on Jan 6th, 2022, the trade will be made at the open price of Jan 7th, 2022
# Therefore, we need to shift the signals one step down
data_open['Signal'] = data_open['Signal'].shift(1)
data_open
#Creating another dataframe to do some testing with signals
# Here we are integrating the moving average signals along with the direction signals
opens = data_open.copy(deep=True)
condition = [((opens['Signal']==1) & (opens['direction']=='UP')),
((opens['Signal']==1) & (opens['direction']=='DOWN')),
((opens['Signal']==0) & (opens['direction']=='UP')),
((opens['Signal']==0) & (opens['direction']=='DOWN')),
((opens['Signal']==-1) & (opens['direction']=='UP')),
((opens['Signal']==-1) & (opens['direction']=='DOWN'))]
values = [1,0,0,0,0,-1]
opens['momentum_signal'] = np.select(condition,values)
opens
"""# **Integration: Applying ML Model on the 'opens' dataframe and predicting values**
* These predicted values will be added to another column -- 'ml_predict'
"""
# actual price data for the ML model
live_px_data = opens.copy(deep=True)
# removing all columns except 'open_hadf' and 'EMA_5'
live_px_data = live_px_data.drop(['open','EMA_1','direction','hadf_pct_change','hadf_log_return','long','Signal','momentum_signal'],1)
# using the ML model that we created earlier to make predictions
ml_pred = model.predict(live_px_data)
# converting the numpy array to a pandas dataframe
ml_dataframe = pd.DataFrame(ml_pred, columns=['ml_predict'])
# assigning the same index to the predictions dataframe
ml_dataframe.index = opens.index
#concatenating the ml_dataframe to opens dataframe
opens = pd.concat([opens,ml_dataframe],axis=1)
opens
"""# **FINAL INTEGRATION OF ML MODEL WITH MOMENTUM FILTER**"""
fin_mod = opens.copy(deep=True)
conditions = [((fin_mod['momentum_signal']==1) & (fin_mod['ml_predict']=='UP')),
((fin_mod['momentum_signal']==1) & (fin_mod['ml_predict']=='DOWN')),
((fin_mod['momentum_signal']==0) & (fin_mod['ml_predict']=='UP')),
((fin_mod['momentum_signal']==0) & (fin_mod['ml_predict']=='DOWN')),
((fin_mod['momentum_signal']==-1) & (fin_mod['ml_predict']=='UP')),
((fin_mod['momentum_signal']==-1) & (fin_mod['ml_predict']=='DOWN'))]
values_fin = [1,0,0,0,0,-1]
fin_mod['final_signal'] = np.select(conditions,values_fin)
final_data = fin_mod.drop(['EMA_1','hadf_pct_change','hadf_log_return','direction','long','Signal'],1)
# displaying the final data with only necesary columns:
final_data
"""# **BACKTESTING**
* Implementation of a backtesting engine for the combined model
* Utilizes the VectorBT library
"""
# Initializing the portfolio settings for VectorBT
# 1. We will be using the HADF data because it generates signals with little noise
px = pd.to_numeric(final_data['open_hadf'],errors='coerce')
# 2. Setting the initial portfolio cash to 100,000 USD and Trade Size = 1 contract
vbt.settings.portfolio['init_cash'] = 100000.
vbt.settings.portfolio['size'] = 1
# Initializing moving averages for VectorBT
fast_ma = vbt.MA.run(px, 1, short_name='fast')
slow_ma = vbt.MA.run(px, 5, short_name='slow')
# Generate LONG crossover signals
dmac_entries_long = fast_ma.ma_crossed_above(slow_ma)
dmac_exits_long = fast_ma.ma_crossed_below(slow_ma)
# Build partfolio, which internally calculates the equity curve
dmac_long_pf = vbt.Portfolio.from_signals(px, dmac_entries_long, dmac_exits_long)
# Storing the trades/trade log in a DataFrame
df_test = pd.DataFrame(dmac_long_pf.trades.records)
# Modifying the trades in the DataFrame to adjust for Heikin Ashi Factor and Slippage
df_mod = df_test.copy(deep=True)
slippage = 8
df_mod['entry_price'] = df_mod['entry_price'] + slippage
df_mod['exit_price'] = df_mod['exit_price'] - slippage
df_mod['pnl'] = df_mod['exit_price']-df_mod['entry_price']
"""# **PnL Statistics - LONG Signals on E-mini S&P 500 Futures**
* Profit/Loss analysis of LONG signal trades and overall model on the E-mini S&P 500 futures
"""
# Equity curve for LONG Trade Signals
df_mod['cumulative_pnl_points'] = pd.Series(dtype='float')
for i in range(len(df_mod)):
if i == 0:
df_mod['cumulative_pnl_points'][i] = df_mod['pnl'][i]
else:
df_mod['cumulative_pnl_points'][i] = df_mod['pnl'][i] + df_mod['cumulative_pnl_points'][i-1]
df_mod['cumulative_pnl_usd'] = df_mod['cumulative_pnl_points']*50.0
df_mod['cumulative_pnl_usd'].plot(figsize=(10,4),xlabel='Trades',ylabel='Cumulative Profit (in USD)',title='Cumulative Profit/Loss in USD | LONG Trades | E-mini S&P 500 Futures | HYBRID MODEL')
# Distribution of Returns for LONG Trade Signals
df_results = df_mod.drop(['col','entry_idx','entry_fees','entry_idx','exit_fees','direction','parent_id'],1)
x = df_results['return']
plt.figure(figsize=(10,8))
plt.hist(x, bins=40)
plt.xlabel('Returns (per each trade)')
plt.ylabel('Number of Trades')
plt.ylim(0,150)
plt.title('Distribution of Trade Returns | LONG Trades | E-mini S&P 500 Futures | HYBRID MODEL')
plt.show()
# Calculating the total POINTS the strategy made (not in dollars, since we are using futures)
total_long_pts = df_mod['pnl'].sum()
# Calculating the starting and ending price of the time series to compute the Buy-and-Hold return
starting_price = int(data_open['open'][0])
current_price = int(data_open['open'][-1])
# PRINT FINAL TRADE AND PORTFOLIO STATISTICS
# size of each trade and other stats
trade_size = 1
initial_capital = 100000
long_pnl = total_long_pts*50*trade_size
buyhold_pnl = (current_price-starting_price)*50*trade_size
strat_pct_return = float(100*((long_pnl+initial_capital)-initial_capital)/(initial_capital))
buyhold_pct_return = float(100*((initial_capital+buyhold_pnl)-initial_capital)/(initial_capital))
print("Trade Size: 1 contract")
print("Total Number of Completed Trades: ",df_test['status'].sum(axis=0,skipna=True))
print("Total LONG Pts: ",total_long_pts)
print("")
# PnL Statistics -- Strategy vs Buy-and-Hold the Underlying
print("***** PnL Since Inception *****")
print("")
print("LONG Trades PnL: $",long_pnl)
print("Portfolio Value at the End of Hybrid Strategy: $",int(initial_capital+long_pnl))
print("Model Return: ",strat_pct_return,"%")
print("")
print("***** Benchmark Buy & Hold PnL *****")
print("")
print("Buy & Hold PnL: $",buyhold_pnl)
print("Portfolio Value at the End of Buy and Hold Strategy: $",int(initial_capital+buyhold_pnl))
print("Buy and Hold Strategy Return: ",buyhold_pct_return,"%")
print("")
# LONG Trade Signal PnL Statistics
print("LONG Signals PnL Stats (in $)")
print("")
print((50*trade_size*df_mod['pnl']).describe())
print("")
ymin = -4000
ymax = 11000
plt.figure(figsize=(20,8))
plt.plot(df_mod['pnl']*50)
plt.axhline(y=0, linestyle='dashed', color='red')
x1,x2,y1,y2 = plt.axis()
plt.axis((x1,x2,ymin,ymax))
plt.title("Individual Trade PnL - LONG TRADES | E-mini S&P 500 Futures")
plt.show()
"""# **PnL Statistics - SHORT Signals on E-mini S&P 500 Futures**
* Profit/Loss analysis of SHORT signal trades and overall model on the E-mini S&P 500 futures
"""
# SHORT Trade Signals
fast_ma_short = vbt.MA.run(px, 1, short_name='fast')
slow_ma_short = vbt.MA.run(px, 5, short_name='slow')
dmac_entries_short = fast_ma_short.ma_crossed_below(slow_ma_short)
dmac_exits_short = fast_ma_short.ma_crossed_above(slow_ma_short)
# Build portfolio, which internally calculates the equity curve
dmac_short_pf = vbt.Portfolio.from_signals(px, dmac_entries_short, dmac_exits_short)
# Storing the trades/trade log in a DataFrame
df_test_short = pd.DataFrame(dmac_short_pf.trades.records)
# Modifying the trades in the DataFrame to adjust for Heikin Ashi Factor and Slippage
df_mod_short = df_test_short.copy(deep=True)
slippage_short = 7
df_mod_short['entry_price'] = df_mod_short['entry_price'] - slippage_short
df_mod_short['exit_price'] = df_mod_short['exit_price'] + slippage_short
df_mod_short['pnl'] = df_mod_short['exit_price']-df_mod_short['entry_price']
df_mod_short['pnl'] = -1*df_mod_short['pnl']
# Calculating the total POINTS the strategy made (not in dollars, since we are using futures)
total_short_pts = df_mod_short['pnl'].sum()
short_trade_size = 1
short_pnl = total_short_pts*50*short_trade_size
# Plotting the Equity Curve for SHORT Trade Signals
df_mod_short['cumulative_pnl_points'] = pd.Series(dtype='float')
for i in range(len(df_mod_short)):
if i == 0:
df_mod_short['cumulative_pnl_points'][i] = df_mod_short['pnl'][i]
else:
df_mod_short['cumulative_pnl_points'][i] = df_mod_short['pnl'][i] + df_mod_short['cumulative_pnl_points'][i-1]
df_mod_short['cumulative_pnl_usd'] = df_mod_short['cumulative_pnl_points']*50.0
df_mod_short['cumulative_pnl_usd'].plot(figsize=(10,4),xlabel='Trades',ylabel='Cumulative Profit (in USD)',title='Cumulative Profit/Loss in USD | SHORT Trades | E-mini S&P 500 Futures | HYBRID MODEL')
# Distribution of Returns for SHORT Trade Signals
y = df_mod_short['return']
plt.figure(figsize=(10,8))
plt.hist(y, bins=70)
plt.xlabel('Returns (per each trade)')
plt.ylabel('Number of Trades')
plt.ylim(0,150)
plt.title('Distribution of Trade Returns | SHORT Trades | E-mini S&P 500 Futures | HYBRID MODEL')
plt.show()
# SHORT Trade Signal PnL Statistics
print("SHORT Signals PnL Stats (in $)")
print("")
print((50*trade_size*df_mod_short['pnl']).describe())
print("")
ymin = -4000
ymax = 25000
plt.figure(figsize=(20,8))
plt.plot(df_mod_short['pnl']*50)
#plt.axhline(y=100, linestyle='dashed', color='black')
plt.axhline(y=0, linestyle='dashed', color='red') #y=150 for NQ
#plt.axhline(y=300, linestyle='dashed', color='blue')
x1,x2,y1,y2 = plt.axis()
plt.axis((x1,x2,ymin,ymax))
plt.title("Individual Trade PnL - SHORT TRADES | E-mini S&P 500 Futures")
plt.show()
"""# **MODEL RESULTS ON EURODOLLAR FUTURES**
##### Now we will test our final model and its robustness on the Eurodollar Futures market. This market has a completely different microstructure and moves very slowly
"""
ge = tv.get_hist(symbol='GE',exchange='CME',interval=Interval.in_weekly,n_bars=5000,fut_contract=1)
ge_px = ge.drop(['symbol','volume'],1) #drop the columns for symbol and volume (not needed in the analysis)
#Converting candlesticks to Heikin Ashi
hadf = heikin_ashi(ge_px)
data_open_ge = pd.DataFrame(hadf['open'])
data_open_ge.columns = ['open_hadf']
data_open_ge['open'] = pd.DataFrame(ge_px['open'])
# BACKTESTING STUFF
#portfolio settings for VectorBT
px_ge = pd.to_numeric(data_open_ge['open_hadf'],errors='coerce')
vbt.settings.portfolio['init_cash'] = 100000. # $100000
vbt.settings.portfolio['size'] = 1
# Moving averages for VectorBT
fast_ma_long_ge = vbt.MA.run(px_ge, 1, short_name='fast')
slow_ma_long_ge = vbt.MA.run(px_ge, 5, short_name='slow')
# Generate LONG crossover signals
dmac_entries_long_ge = fast_ma_long_ge.ma_crossed_above(slow_ma_long_ge)
dmac_exits_long_ge = fast_ma_long_ge.ma_crossed_below(slow_ma_long_ge)
# Build partfolio, which internally calculates the equity curve
# Volume is set to np.inf by default to buy/sell everything
# You don't have to pass freq here because our data is already perfectly time-indexed
dmac_long_pf_ge = vbt.Portfolio.from_signals(px_ge, dmac_entries_long_ge, dmac_exits_long_ge)
# Storing the trades/trade log in a DataFrame
df_test_ge = pd.DataFrame(dmac_long_pf_ge.trades.records)
# Modifying the trades in the DataFrame to adjust for Heikin Ashi Factor and Slippage
df_mod_ge = df_test_ge.copy(deep=True)
slippage_long_ge = 0
df_mod_ge['entry_price'] = df_mod_ge['entry_price'] + slippage_long_ge
df_mod_ge['exit_price'] = df_mod_ge['exit_price'] - slippage_long_ge
df_mod_ge['pnl'] = df_mod_ge['exit_price']-df_mod_ge['entry_price']
# Calculating the total POINTS the strategy made (not in dollars, since we are using futures)
total_long_pts_ge = df_mod_ge['pnl'].sum()
trade_size_ge = 1
long_pnl_ge = total_long_pts_ge*2500*trade_size_ge
"""# **PnL Statistics - LONG Signals on Eurodollar Futures**
* Profit/Loss analysis of LONG signal trades and overall model on the Eurodollar futures
"""
# Plotting the Equity Curve for LONG Trade Signals - Eurodollar futures
df_mod_ge['cumulative_pnl_points'] = pd.Series(dtype='float')
for i in range(len(df_mod_ge)):
if i == 0:
df_mod_ge['cumulative_pnl_points'][i] = df_mod_ge['pnl'][i]
else:
df_mod_ge['cumulative_pnl_points'][i] = df_mod_ge['pnl'][i] + df_mod_ge['cumulative_pnl_points'][i-1]
df_mod_ge['cumulative_pnl_usd'] = df_mod_ge['cumulative_pnl_points']*2500.0
df_mod_ge['cumulative_pnl_usd'].plot(figsize=(10,4),xlabel='Trades',ylabel='Cumulative Profit (in USD)',title='Cumulative Profit/Loss in USD - LONG TRADES - HYBRID MODEL | Eurodollar Futures')
# Plotting the distribution of returns for LONG Trade Signals - Eurodollar Futures
x_ge = df_mod_ge['return']
plt.figure(figsize=(10,8))
plt.hist(x_ge, bins=40)
plt.xlabel('Returns (per each trade)')
plt.ylabel('Number of Trades')
plt.ylim(0,100)
plt.title('Distribution of LONG Trade Returns -- HYBRID MODEL | Eurodollar Futures')
plt.show()
# LONG Trade Signal PnL Statistics - Eurodollar futures
import matplotlib.pyplot as plt
print("Trade Size:",trade_size_ge, "contract")
print("")
# total points captured using LONG trades (adjusted for commissions and slippage)
print("Total LONG Pts: ",total_long_pts_ge)
print("LONG Trades Pnl: ", long_pnl_ge,"USD")
print("")
# general trade analysis
print("PnL Stats for LONG Eurodollar (in $)")
print("")
print((2500*trade_size_ge*df_mod_ge['pnl']).describe())
print("")
ymin = -4000
ymax = 11000
plt.figure(figsize=(20,8))
plt.plot(df_mod_ge['pnl']*2500)
plt.axhline(y=0, linestyle='dashed', color='red')
x1,x2,y1,y2 = plt.axis()
plt.axis((x1,x2,ymin,ymax))
plt.title("Individual Trade PnL - LONG TRADES | Eurodollar Futures")
plt.show()
"""# **PnL Statistics - SHORT Signals on Eurodollar Futures**
* Profit/Loss analysis of SHORT signal trades and overall model on the Eurodollar futures
"""
# Moving averages for VectorBT
fast_ma_short_ge = vbt.MA.run(px_ge, 1, short_name='fast')
slow_ma_short_ge = vbt.MA.run(px_ge, 5, short_name='slow')
# Generate SHORT crossover signals
dmac_entries_short_ge = fast_ma_short_ge.ma_crossed_below(slow_ma_short_ge)
dmac_exits_short_ge = fast_ma_short_ge.ma_crossed_above(slow_ma_short_ge)
# Build portfolio, which internally calculates the equity curve
dmac_short_pf_ge = vbt.Portfolio.from_signals(px_ge, dmac_entries_short_ge, dmac_exits_short_ge)
# Storing the trades/trade log in a DataFrame
df_test_short_ge = pd.DataFrame(dmac_short_pf_ge.trades.records)
# Modifying the trades in the DataFrame to adjust for Heikin Ashi Factor and Slippage
df_mod_short_ge = df_test_short_ge.copy(deep=True)
slippage_short_ge = 0
df_mod_short_ge['entry_price'] = df_mod_short_ge['entry_price'] - slippage_short_ge
df_mod_short_ge['exit_price'] = df_mod_short_ge['exit_price'] + slippage_short_ge
df_mod_short_ge['pnl'] = df_mod_short_ge['exit_price']-df_mod_short_ge['entry_price']
df_mod_short_ge['pnl'] = -1*df_mod_short_ge['pnl']
# Calculating the total POINTS the strategy made (not in dollars, since we are using futures)
total_short_pts_ge = df_mod_short_ge['pnl'].sum()
short_trade_size_ge = 1
short_pnl_ge = total_short_pts_ge*2500*short_trade_size_ge
# Plotting the Equity Curve for LONG Trade Signals - Eurodollar futures
df_mod_short_ge['cumulative_pnl_points'] = pd.Series(dtype='float')
for i in range(len(df_mod_short_ge)):
if i == 0:
df_mod_short_ge['cumulative_pnl_points'][i] = df_mod_short_ge['pnl'][i]
else:
df_mod_short_ge['cumulative_pnl_points'][i] = df_mod_short_ge['pnl'][i] + df_mod_short_ge['cumulative_pnl_points'][i-1]
df_mod_short_ge['cumulative_pnl_usd'] = df_mod_short_ge['cumulative_pnl_points']*2500.0
df_mod_short_ge['cumulative_pnl_usd'].plot(figsize=(10,4),xlabel='Trades',ylabel='Cumulative Profit (in USD)',title='Cumulative Profit/Loss in USD - SHORT TRADES - HYBRID MODEL | Eurodollar Futures')
# SHORT Trade Signal PnL Statistics - Eurodollar futures
print("Trade Size:",short_trade_size_ge, "contract")
print("")
# total points captured using LONG trades (adjusted for commissions and slippage)
print("Total SHORT Pts: ",total_short_pts_ge)
print("SHORT Trades Pnl: ", short_pnl_ge,"USD")
print("")
# general trade analysis
print("PnL Stats for SHORT Eurodollar Signals (in $)")
print("")
print((2500*short_trade_size_ge*df_mod_short_ge['pnl']).describe())
print("")
ymin = -4000
ymax = 6000
plt.figure(figsize=(20,8))
plt.plot(df_mod_short_ge['pnl']*2500)
plt.axhline(y=0, linestyle='dashed', color='red') #y=150 for NQ
plt.title("Individual Trade PnL - SHORT TRADES | Eurodollar Futures")
x1,x2,y1,y2 = plt.axis()
plt.axis((x1,x2,ymin,ymax))
plt.show()
# Plotting the distribution of returns for SHORT Trade Signals - Eurodollar Futures
y_ge = df_mod_short_ge['return']
plt.figure(figsize=(10,8))
plt.hist(y_ge, bins=40)
plt.xlabel('Returns (per each trade)')
plt.ylabel('Number of Trades')
plt.ylim(0,100)
plt.title('Distribution of SHORT Trade Returns | Eurodollar Futures | HYBRID MODEL')
plt.show()