Fixed backtest.py to not rely on .results() to be ran. Added dynamica…

…lly setting rows and columns for machine learning (wrapper.py and interface.py) for ease of use in implementing into outside methods. Updated sample.py and README.md to show how to use the new functionality.
MaxwellMendenhall · Apr 7, 2024 · f3b3c30 · f3b3c30
1 parent 26bcc48
commit f3b3c30
Show file tree

Hide file tree

Showing 6 changed files with 68 additions and 47 deletions.
diff --git a/README.md b/README.md
@@ -49,15 +49,17 @@ After you have your data-frame prepped you can make an instance of the strategy
 ```python
 strategy = InvertedHammer()
 backtest = Backtest(df, strategy)
-print(backtest.results())
+print(backtest.get_results())
 ```
 
 After that, we have all the data we need for machine learning to take place. Just declare an instance of the machine learning class and pass the need info into it. The machine learning takes place when the `run` function is called on the class. We can dump the model (saving the model to be used as a standalone file) with the `dump_model` function.&#x20;
 
 ```python
 ml = MachineLearning(ml_class=RandomForestRegressorTrainer,
-                         df=df,
-                         results=backtest.get_trades())
+                    df=df,
+                    results=backtest.get_trades(),
+                    rows=10,
+                    columns=['EMA_Diff', 'SMA_Diff', 'MACD_hist'])
 ml.run(dp_pattern=CandleStickDataProcessing.calculate_inverted_hammer_features)
 ml.dump_model(filename='YOUR FILE NAME')
 ```
@@ -69,7 +71,7 @@ model, columns, rows = ml.get_util()
 data = ml.get_data()
 
 ml_backtest = Backtest(data, strategy, model=model, columns=columns, rows=rows, cs_pattern=True)
-print(ml_backtest.results())
+print(ml_backtest.get_results())
 ```
 
 Thats it! You should see similar output text wise as the outputs provided above. A more in depth _**how to use**_ guide to customize your machine learning and strategy can be found below.

diff --git a/ml_backtest/backtest/backtest.py b/ml_backtest/backtest/backtest.py
@@ -34,6 +34,10 @@ def __init__(self, data: pd.DataFrame, strategy: Strategy,
         self.__columns = columns
         self.__rows = rows
         self.__cs_pattern = cs_pattern
+        self.__backtest = None
+
+        self.__run()
+        self.__results()
 
     def __run(self):
         dates = self.__data['date'].values
@@ -55,7 +59,7 @@ def __run(self):
         for index in tqdm(range(len(dates)), total=len(dates)):
             # Assuming strategy.on_data can be adapted or is simplified for demonstration
             self.__strategy.on_data(index, lows[:index + 1], highs[:index + 1],
-                                  close[:index + 1], open[:index + 1], dates[:index + 1])
+                                    close[:index + 1], open[:index + 1], dates[:index + 1])
 
             for position in list(self.__strategy.positions):  # Iterate over a shallow copy if positions are modified
                 exit_price = None
@@ -129,33 +133,36 @@ def __close_position(self, position, current_price):
         self.__completed_trades.append(position)
         self.__strategy.in_position = False
 
-    def results(self) -> pd.DataFrame:
-        self.__run()
+    def __results(self) -> pd.DataFrame:
 
         wins = self.__short_wins + self.__long_wins
         loses = self.__short_loses + self.__long_loses
-        backtest_result = [{'start time': self.__start_time,
-                            'end time': self.__end_time,
-                            '# of trades': self.__trade_counter,
-                            '# of wins': wins,
-                            '# of loses': loses,
-                            'win rate': f'{np.around((wins / (wins + loses)) * 100, decimals=2)}%',
-                            '# of long wins': self.__long_wins,
-                            '# of long loses': self.__long_loses,
-                            '# of long evens': self.__long_evens,
-                            '# of short wins': self.__short_wins,
-                            '# of short loses': self.__short_loses,
-                            '# of short evens': self.__short_evens,
-                            'net profit': np.around(self.__gross_profit + self.__gross_loss, decimals=2),
-                            'max drawdown': f'-{np.around(self.__max_drawdown, decimals=2)}',
-                            'gross profit': np.around(self.__gross_profit, decimals=2),
-                            'gross loss': np.around(self.__gross_loss, decimals=2),
-                            'profit factor': np.around(self.__gross_profit / abs(self.__gross_loss), decimals=2)}]
-
-        backtest_df = pd.DataFrame(backtest_result)
-        backtest_df = backtest_df.T
-        return backtest_df
+        self.__backtest_result = [{'start time': self.__start_time,
+                                   'end time': self.__end_time,
+                                   '# of trades': self.__trade_counter,
+                                   '# of wins': wins,
+                                   '# of loses': loses,
+                                   'win rate': f'{np.around((wins / (wins + loses)) * 100, decimals=2)}%',
+                                   '# of long wins': self.__long_wins,
+                                   '# of long loses': self.__long_loses,
+                                   '# of long evens': self.__long_evens,
+                                   '# of short wins': self.__short_wins,
+                                   '# of short loses': self.__short_loses,
+                                   '# of short evens': self.__short_evens,
+                                   'net profit': np.around(self.__gross_profit + self.__gross_loss, decimals=2),
+                                   'max drawdown': f'-{np.around(self.__max_drawdown, decimals=2)}',
+                                   'gross profit': np.around(self.__gross_profit, decimals=2),
+                                   'gross loss': np.around(self.__gross_loss, decimals=2),
+                                   'profit factor': np.around(self.__gross_profit / abs(self.__gross_loss),
+                                                              decimals=2)}]
+
+        self.__backtest_df = pd.DataFrame(self.__backtest_result)
+        self.__backtest_df = self.__backtest_df.T
+        return self.__backtest_df
 
     def get_trades(self) -> pd.DataFrame:
         completed_trades_df = pd.DataFrame(self.__completed_trades)
         return completed_trades_df
+
+    def get_results(self) -> pd.DataFrame:
+        return self.__backtest_df
diff --git a/ml_backtest/interfaces/interface.py b/ml_backtest/interfaces/interface.py
@@ -3,13 +3,15 @@
 from datetime import datetime
 from typing import final
 from sklearn.base import BaseEstimator
-from typing import Optional
+from typing import Optional, List
 import numpy as np
 
 
 class MachineLearningInterface:
 
-    def __init__(self, data: pd.DataFrame):
+    def __init__(self, data: pd.DataFrame,
+                 rows: Optional[int] = None,
+                 columns: Optional[List[str]] = None):
 
         if type(self).get_model != MachineLearningInterface.get_model:
             raise TypeError("get_model method should not be overridden")
@@ -18,8 +20,8 @@ def __init__(self, data: pd.DataFrame):
             self.data = data
             self.model = None
             self.predictions = None
-            self.get_rows = 10
-            self.get_columns = ['Close']
+            self.get_rows = rows if rows is not None else 10
+            self.get_columns = columns if columns is not None else ['Close']
 
         else:
             print('Data being passed into MachineLearningWorker is not a list of type DataContainer.')
@@ -59,6 +61,7 @@ def predict(self, x_train, y_train, x_test, y_test):
         """
         raise NotImplementedError("This method must be implemented by a subclass")
 
+    @final
     def get_model(self):
         """
         Returns the model used in training.

diff --git a/ml_backtest/machine_learning/wrapper.py b/ml_backtest/machine_learning/wrapper.py
@@ -3,7 +3,7 @@
 from ml_backtest.interfaces import MachineLearningInterface, TargetInterface
 from sklearn.model_selection import train_test_split
 from ml_backtest.machine_learning import DataProcessing
-from typing import Type
+from typing import Type, List, Optional
 import pandas as pd
 import os
 
@@ -12,14 +12,20 @@ class MachineLearning:
 
     def __init__(self, ml_class: Type[MachineLearningInterface],
                  df: pd.DataFrame, results: pd.DataFrame,
-                 target_class: Type[TargetInterface] = None):
+                 target_class: Type[TargetInterface] = None,
+                 rows: Optional[int] = None,
+                 columns: Optional[List[str]] = None):
         self.__df = df
         self.__results = results
+
         if target_class is not None:
             self.__target_class = target_class(trades=self.__results, data=self.__df)
             self.__target_class.target_engineer()
             self.__results = self.__target_class.trades
-        self.__ml = ml_class(self.__df)
+        if rows is not None and columns is not None:
+            self.__ml = ml_class(data=self.__df, rows=rows, columns=columns)
+        else:
+            self.__ml = ml_class(data=self.__df)
 
     def run(self, dp_pattern=None) -> None:
         self.__ml.feature_engineer()

diff --git a/ml_backtest/models/rfr.py b/ml_backtest/models/rfr.py
@@ -3,20 +3,21 @@
 from sklearn.metrics import mean_squared_error
 import pandas as pd
 import talib
+from typing import Optional, List
 
 
 class RandomForestRegressorTrainer(MachineLearningInterface):
 
-    def __init__(self, data: pd.DataFrame):
-        super().__init__(data)
-        # this is the number of rows before each target you want trained
-        self.get_rows = 10
-        # these are the columns you want trained, in my case I want the column
-        # that is already there and a column I am adding in the feature_engineer()
-        # method
+    def __init__(self, data: pd.DataFrame, rows: Optional[int] = None,
+                 columns: Optional[List[str]] = None):
+        super().__init__(data,  rows, columns)
+        # Additional initialization specific to RandomForestRegressorTrainer can go here.
+        # For example, setting up model-specific parameters or preprocessing steps.
+        # self.model_specific_param = some_value
 
-        # 'EMA_Diff', 'SMA_Diff', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist'
-        self.get_columns = ['EMA_Diff', 'SMA_Diff', 'MACD_hist']
+        # If there's additional setup required for the RandomForest model,
+        # that doesn't fit the pattern provided by MachineLearningInterface,
+        # it can be performed here.
 
     def feature_engineer(self):
         # here is where you can add addition columns of features you want to be used in training

diff --git a/sample.py b/sample.py
@@ -15,16 +15,18 @@
     strategy = InvertedHammer()
 
     backtest = Backtest(Data.data(), strategy)
-    print(backtest.results())
+    print(backtest.get_results())
 
     ml = MachineLearning(ml_class=RandomForestRegressorTrainer,
                          df=Data.data(),
-                         results=backtest.get_trades())
+                         results=backtest.get_trades(),
+                         rows=10,
+                         columns=['EMA_Diff', 'SMA_Diff', 'MACD_hist'])
     ml.run(dp_pattern=CandleStickDataProcessing.calculate_inverted_hammer_features)
     ml.dump_model(filename='YOUR MODEL FILE NAME')
     model, columns, rows = ml.get_util()
     data = ml.get_data()
 
     # make sure to pass ml.get_data() in as the data for ml backtesting
     ml_backtest = Backtest(data, strategy, model=model, columns=columns, rows=rows, cs_pattern=True)
-    print(ml_backtest.results())
+    print(ml_backtest.get_results())