Skip to content

Commit

Permalink
Fixed backtest.py to not rely on .results() to be ran. Added dynamica…
Browse files Browse the repository at this point in the history
…lly setting rows and columns for machine learning (wrapper.py and interface.py) for ease of use in implementing into outside methods. Updated sample.py and README.md to show how to use the new functionality.
  • Loading branch information
MaxwellMendenhall committed Apr 7, 2024
1 parent 26bcc48 commit f3b3c30
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 47 deletions.
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,17 @@ After you have your data-frame prepped you can make an instance of the strategy
```python
strategy = InvertedHammer()
backtest = Backtest(df, strategy)
print(backtest.results())
print(backtest.get_results())
```

After that, we have all the data we need for machine learning to take place. Just declare an instance of the machine learning class and pass the need info into it. The machine learning takes place when the `run` function is called on the class. We can dump the model (saving the model to be used as a standalone file) with the `dump_model` function. 

```python
ml = MachineLearning(ml_class=RandomForestRegressorTrainer,
df=df,
results=backtest.get_trades())
df=df,
results=backtest.get_trades(),
rows=10,
columns=['EMA_Diff', 'SMA_Diff', 'MACD_hist'])
ml.run(dp_pattern=CandleStickDataProcessing.calculate_inverted_hammer_features)
ml.dump_model(filename='YOUR FILE NAME')
```
Expand All @@ -69,7 +71,7 @@ model, columns, rows = ml.get_util()
data = ml.get_data()

ml_backtest = Backtest(data, strategy, model=model, columns=columns, rows=rows, cs_pattern=True)
print(ml_backtest.results())
print(ml_backtest.get_results())
```

Thats it! You should see similar output text wise as the outputs provided above. A more in depth _**how to use**_ guide to customize your machine learning and strategy can be found below.
Expand Down
55 changes: 31 additions & 24 deletions ml_backtest/backtest/backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ def __init__(self, data: pd.DataFrame, strategy: Strategy,
self.__columns = columns
self.__rows = rows
self.__cs_pattern = cs_pattern
self.__backtest = None

self.__run()
self.__results()

def __run(self):
dates = self.__data['date'].values
Expand All @@ -55,7 +59,7 @@ def __run(self):
for index in tqdm(range(len(dates)), total=len(dates)):
# Assuming strategy.on_data can be adapted or is simplified for demonstration
self.__strategy.on_data(index, lows[:index + 1], highs[:index + 1],
close[:index + 1], open[:index + 1], dates[:index + 1])
close[:index + 1], open[:index + 1], dates[:index + 1])

for position in list(self.__strategy.positions): # Iterate over a shallow copy if positions are modified
exit_price = None
Expand Down Expand Up @@ -129,33 +133,36 @@ def __close_position(self, position, current_price):
self.__completed_trades.append(position)
self.__strategy.in_position = False

def results(self) -> pd.DataFrame:
self.__run()
def __results(self) -> pd.DataFrame:

wins = self.__short_wins + self.__long_wins
loses = self.__short_loses + self.__long_loses
backtest_result = [{'start time': self.__start_time,
'end time': self.__end_time,
'# of trades': self.__trade_counter,
'# of wins': wins,
'# of loses': loses,
'win rate': f'{np.around((wins / (wins + loses)) * 100, decimals=2)}%',
'# of long wins': self.__long_wins,
'# of long loses': self.__long_loses,
'# of long evens': self.__long_evens,
'# of short wins': self.__short_wins,
'# of short loses': self.__short_loses,
'# of short evens': self.__short_evens,
'net profit': np.around(self.__gross_profit + self.__gross_loss, decimals=2),
'max drawdown': f'-{np.around(self.__max_drawdown, decimals=2)}',
'gross profit': np.around(self.__gross_profit, decimals=2),
'gross loss': np.around(self.__gross_loss, decimals=2),
'profit factor': np.around(self.__gross_profit / abs(self.__gross_loss), decimals=2)}]

backtest_df = pd.DataFrame(backtest_result)
backtest_df = backtest_df.T
return backtest_df
self.__backtest_result = [{'start time': self.__start_time,
'end time': self.__end_time,
'# of trades': self.__trade_counter,
'# of wins': wins,
'# of loses': loses,
'win rate': f'{np.around((wins / (wins + loses)) * 100, decimals=2)}%',
'# of long wins': self.__long_wins,
'# of long loses': self.__long_loses,
'# of long evens': self.__long_evens,
'# of short wins': self.__short_wins,
'# of short loses': self.__short_loses,
'# of short evens': self.__short_evens,
'net profit': np.around(self.__gross_profit + self.__gross_loss, decimals=2),
'max drawdown': f'-{np.around(self.__max_drawdown, decimals=2)}',
'gross profit': np.around(self.__gross_profit, decimals=2),
'gross loss': np.around(self.__gross_loss, decimals=2),
'profit factor': np.around(self.__gross_profit / abs(self.__gross_loss),
decimals=2)}]

self.__backtest_df = pd.DataFrame(self.__backtest_result)
self.__backtest_df = self.__backtest_df.T
return self.__backtest_df

def get_trades(self) -> pd.DataFrame:
completed_trades_df = pd.DataFrame(self.__completed_trades)
return completed_trades_df

def get_results(self) -> pd.DataFrame:
return self.__backtest_df
11 changes: 7 additions & 4 deletions ml_backtest/interfaces/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
from datetime import datetime
from typing import final
from sklearn.base import BaseEstimator
from typing import Optional
from typing import Optional, List
import numpy as np


class MachineLearningInterface:

def __init__(self, data: pd.DataFrame):
def __init__(self, data: pd.DataFrame,
rows: Optional[int] = None,
columns: Optional[List[str]] = None):

if type(self).get_model != MachineLearningInterface.get_model:
raise TypeError("get_model method should not be overridden")
Expand All @@ -18,8 +20,8 @@ def __init__(self, data: pd.DataFrame):
self.data = data
self.model = None
self.predictions = None
self.get_rows = 10
self.get_columns = ['Close']
self.get_rows = rows if rows is not None else 10
self.get_columns = columns if columns is not None else ['Close']

else:
print('Data being passed into MachineLearningWorker is not a list of type DataContainer.')
Expand Down Expand Up @@ -59,6 +61,7 @@ def predict(self, x_train, y_train, x_test, y_test):
"""
raise NotImplementedError("This method must be implemented by a subclass")

@final
def get_model(self):
"""
Returns the model used in training.
Expand Down
12 changes: 9 additions & 3 deletions ml_backtest/machine_learning/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from ml_backtest.interfaces import MachineLearningInterface, TargetInterface
from sklearn.model_selection import train_test_split
from ml_backtest.machine_learning import DataProcessing
from typing import Type
from typing import Type, List, Optional
import pandas as pd
import os

Expand All @@ -12,14 +12,20 @@ class MachineLearning:

def __init__(self, ml_class: Type[MachineLearningInterface],
df: pd.DataFrame, results: pd.DataFrame,
target_class: Type[TargetInterface] = None):
target_class: Type[TargetInterface] = None,
rows: Optional[int] = None,
columns: Optional[List[str]] = None):
self.__df = df
self.__results = results

if target_class is not None:
self.__target_class = target_class(trades=self.__results, data=self.__df)
self.__target_class.target_engineer()
self.__results = self.__target_class.trades
self.__ml = ml_class(self.__df)
if rows is not None and columns is not None:
self.__ml = ml_class(data=self.__df, rows=rows, columns=columns)
else:
self.__ml = ml_class(data=self.__df)

def run(self, dp_pattern=None) -> None:
self.__ml.feature_engineer()
Expand Down
19 changes: 10 additions & 9 deletions ml_backtest/models/rfr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,21 @@
from sklearn.metrics import mean_squared_error
import pandas as pd
import talib
from typing import Optional, List


class RandomForestRegressorTrainer(MachineLearningInterface):

def __init__(self, data: pd.DataFrame):
super().__init__(data)
# this is the number of rows before each target you want trained
self.get_rows = 10
# these are the columns you want trained, in my case I want the column
# that is already there and a column I am adding in the feature_engineer()
# method
def __init__(self, data: pd.DataFrame, rows: Optional[int] = None,
columns: Optional[List[str]] = None):
super().__init__(data, rows, columns)
# Additional initialization specific to RandomForestRegressorTrainer can go here.
# For example, setting up model-specific parameters or preprocessing steps.
# self.model_specific_param = some_value

# 'EMA_Diff', 'SMA_Diff', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist'
self.get_columns = ['EMA_Diff', 'SMA_Diff', 'MACD_hist']
# If there's additional setup required for the RandomForest model,
# that doesn't fit the pattern provided by MachineLearningInterface,
# it can be performed here.

def feature_engineer(self):
# here is where you can add addition columns of features you want to be used in training
Expand Down
8 changes: 5 additions & 3 deletions sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@
strategy = InvertedHammer()

backtest = Backtest(Data.data(), strategy)
print(backtest.results())
print(backtest.get_results())

ml = MachineLearning(ml_class=RandomForestRegressorTrainer,
df=Data.data(),
results=backtest.get_trades())
results=backtest.get_trades(),
rows=10,
columns=['EMA_Diff', 'SMA_Diff', 'MACD_hist'])
ml.run(dp_pattern=CandleStickDataProcessing.calculate_inverted_hammer_features)
ml.dump_model(filename='YOUR MODEL FILE NAME')
model, columns, rows = ml.get_util()
data = ml.get_data()

# make sure to pass ml.get_data() in as the data for ml backtesting
ml_backtest = Backtest(data, strategy, model=model, columns=columns, rows=rows, cs_pattern=True)
print(ml_backtest.results())
print(ml_backtest.get_results())

0 comments on commit f3b3c30

Please sign in to comment.