diff --git a/Jerry's tests/ASQ_Backtest_Jerry.ipynb b/Jerry's tests/ASQ_Backtest_Jerry.ipynb new file mode 100644 index 000000000..a9b754e86 --- /dev/null +++ b/Jerry's tests/ASQ_Backtest_Jerry.ipynb @@ -0,0 +1,342 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from tqdm import tqdm # tqdm库用于显示循环的进度条\n", + "import scipy\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "def load_data(target='LDOBUSD'):\n", + " data = pd.read_csv(f'../bookticker/{target}-bookTicker-2023-06.csv') # JF版本是05,我下载了06,还可以测试最新一个月/一周的数据\n", + " data = data.rename(columns={\n", + " 'best_bid_price': 'bp', \n", + " 'best_bid_qty': 'bv',\n", + " 'best_ask_price': 'ap', \n", + " 'best_ask_qty': 'av',\n", + " 'event_time': 'time' \n", + " })\n", + " data['trade_date'] = pd.to_datetime(data['time'] // 1000 // 60 // 60 // 24, unit='d')\n", + " return data\n", + "\n", + "\"\"\"\n", + "用到5种数据,买一/卖一的价格和数量,时间\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "def get_market_speed(data: pd.DataFrame, price_int: float):\n", + " \"\"\"\n", + " 该函数用于计算市价单/taker订单的到达速度,是在下一个cell的函数get_params中被调用的\n", + " 该函数中max和min是在下一个函数get_params中通过groupby计算出来的,不是数据源自带的\n", + " 如果想单独使用这个函数,需要先通过groupby计算出来max和min\n", + " \"\"\"\n", + " deltalist = np.linspace(price_int, price_int * 10, 10)\n", + " deltadict = {}\n", + "\n", + " for delta in deltalist:\n", + " price_interval = delta\n", + " ask_limit_order_hit = data[('ap', 'max')].shift(-1) > (data[('ap', 'last')] + price_interval)\n", + " bid_limit_order_hit = data[('bp', 'min')].shift(-1) < (data[('bp', 'last')] - price_interval)\n", + " limit_order_hit = (ask_limit_order_hit | bid_limit_order_hit).astype(int)\n", + " deltas = pd.Series(limit_order_hit[limit_order_hit == 1].index).diff().apply(lambda x: x / 10)\n", + " deltadict[delta] = deltas\n", + " # 这里的max和min是在下一个函数get_params中通过groupby计算出来的,不是数据源自带的\n", + "\n", + " lambdas = pd.DataFrame([[key,1/deltadict[key].mean()] for key in deltadict.keys()],\n", + " columns=['delta','lambda_delta']).set_index('delta')\n", + "\n", + " def exp_fit(x,a,b):\n", + " y = a*np.exp(-b*x)\n", + " return y\n", + "\n", + " paramsB, cv = scipy.optimize.curve_fit(exp_fit, np.array(lambdas.index), np.array(lambdas['lambda_delta'].values))\n", + " A, k = paramsB\n", + " # curve_fit(function, xdata, ydata),其中function是要拟合的函数,xdata是自变量,ydata是因变量,输出结果是function的参数\n", + " return A, k\n", + "\n", + "\"\"\"\n", + "该函数在下一个cell中被调用,用于计算market_speed\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "def get_params(data: pd.DataFrame, price_int: float, time_step: int):\n", + " data = data.copy(deep=True) \n", + "\n", + " # 1. 按照time_step的频率把数据采样起来(相当于订单的更新的频率不能太高)\n", + " data['ms-index'] = data['time'] // time_step # 整除取结果,从而合并数据\n", + " prices = data.groupby('ms-index').agg({\n", + " 'ap': ['last', 'max', 'min'],\n", + " 'bp': ['last', 'max', 'min']\n", + " }) # 计算每组的last, max, min,从下文开始注意区分prices和price\n", + " \n", + " # 2. 估计当前每条数据平均多少时间,用于估计交易时间\n", + " min_index, max_index = prices.index[0], prices.index[prices.shape[0]-1]\n", + " ave_time = time_step * (max_index - min_index) / prices.shape[0]\n", + "\n", + " # 2. 波动率\n", + " prices['mid'] = (prices[('ap', 'last')] + prices[('bp', 'last')]) / 2\n", + " # 这里可以修改,修改为计算自己的price model, 加入对不平衡、成交量等因素的考虑\n", + " sigma = np.log(prices['mid']).diff().std() * np.sqrt(24 * 60 * 60 * 1000 / ave_time) # 用对数收益率计算波动率\n", + " \n", + " # 5. 市价单到达速率参数,调用上一个cell的get_market_speed函数,其中的prices由本cell第6行的prices得到\n", + " A, k = get_market_speed(prices, price_int)\n", + "\n", + " return sigma, A, k\n", + "\n", + "def get_bid_spread(sigma, A, k, gamma, q):\n", + " var1 = (1 / gamma) * np.log(1 + gamma / k)\n", + " var2 = (2 * q + 1) / 2 * np.sqrt(sigma**2 * gamma / (2 * k * A) * (1 + gamma/k)**(1+k/gamma))\n", + " return var1 + var2\n", + "\n", + "def get_ask_spread(sigma, A, k, gamma, q):\n", + " var1 = (1 / gamma) * np.log(1 + gamma / k)\n", + " var2 = (2 * q - 1) / 2 * np.sqrt(sigma**2 * gamma / (2 * k * A) * (1 + gamma/k)**(1+k/gamma))\n", + " return var1 - var2" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "def evaluation(data: pd.DataFrame, price_int: float, time_step: int, params: dict):\n", + " \"\"\"\n", + " 输入是一段历史成交的数据,来进行策略回测\n", + " 并没有加入库存量相关的交易规则\n", + " 只使用了best bid and ask进行回测,没有考虑订单在订单薄上的位置\n", + "\n", + " \"\"\" \n", + " data = data.copy(deep=True) # 这里的data是第一步的load data读取的CSV文件的处理结果\n", + " date = data['trade_date'].iloc[0]\n", + " \n", + " # 1. 按照time_step的频率把数据合并起来\n", + " data['ms-index'] = data['time'] // time_step \n", + " prices = data.groupby('ms-index').agg({\n", + " 'ap': ['last', 'max', 'min'],\n", + " 'bp': ['last', 'max', 'min']\n", + " }) # 将data按time_step的时间间隔合并分组成一个新的名为prices的dataframe\n", + " \n", + " prices['mid'] = (prices[('ap', 'last')] + prices[('bp', 'last')]) / 2 # 这里可以修改,修改为计算自己的price model, dmp等\n", + " \n", + " # 4. 回测\n", + " N = prices.shape[0]\n", + " q = np.array([params['q']] + [0] * (N-1)) # 补上上一轮回测的结果; q代表净持仓量,当q>0时,代表多头,q<0时,代表空头\n", + " x = np.array([params['x']] + [0] * (N-1), dtype=float)\n", + " pnl = np.array([params['pnl']] + [0] * (N-1), dtype=float)\n", + " fees = np.array([params['fees']] + [0] * (N-1), dtype=float)\n", + " ra = np.zeros(N)\n", + " rb = np.zeros(N)\n", + "\n", + " sigma = params['sigma']\n", + " A = params['A']\n", + " k = params['k']\n", + " gamma = params['gamma']\n", + " fee_rate = params['fee_rate']\n", + "\n", + " for i in tqdm(range(N-1), desc=f'[evaluation {date}] '):\n", + " \n", + " ra[i] = prices['mid'].iloc[i] + get_ask_spread(sigma, A, k, gamma, q[i])\n", + " rb[i] = prices['mid'].iloc[i] - get_bid_spread(sigma, A, k, gamma, q[i])\n", + "\n", + " # 这里限制了价格一定得是挂单方的价格,不会变成吃单\n", + " ra[i] = max(prices[('ap', 'last')].iloc[i], np.floor(ra[i] * (1 / price_int)) * price_int)\n", + " rb[i] = min(prices[('bp', 'last')].iloc[i], np.ceil(rb[i] * (1 / price_int)) * price_int)\n", + "\n", + " buy = 0\n", + " sell = 0\n", + "\n", + " if prices[('ap', 'max')].iloc[i+1] > ra[i]:\n", + " sell = 1\n", + " \n", + " if prices[('bp', 'min')].iloc[i+1] < rb[i]:\n", + " buy = 1\n", + "\n", + " q[i+1] = q[i] + buy - sell # q代表净持仓量,当q>0时,代表多头,q<0时,代表空头;这里只是记录了q的变化,没有记录开平仓的信号\n", + " x[i+1] = x[i] + sell*ra[i] - buy*rb[i]\n", + " pnl[i+1] = x[i+1] + q[i+1] * prices['mid'].iloc[i+1]\n", + " fees[i+1] = fees[i] + sell*ra[i] * fee_rate + buy*rb[i] * fee_rate\n", + " return pnl, x, q, fees" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "This cell is to set up the parameters for the backtest\n", + "\"\"\"\n", + "\n", + "target = 'LDOBUSD'\n", + "fee_rate = 1.4e-4 # 1.4/10000,在本程序中是返佣手续费率,因为后面衡量的是pnl+fees\n", + "\n", + "trade_interval = 1000 # 在下一个cell中被用作time_step输入给evaluation函数,初始时间单位是毫秒,当trader_interval=1000时除完之后变成了秒\n", + "gamma = 0.01 # gamma取值代表风险度,该如何优化?\n", + "\n", + "price_int_dict = {'LDOBUSD': 0.0001, 'LTCBUSD': 0.01, 'BTCBUSD': 0.10, 'ETHBUSD': 0.01}\n", + "# price_int_dict中的是每个品种的价格变动最小单位吗?暂时认为是\n", + "price_int = price_int_dict[target]\n", + "data = load_data(target=target)\n", + "trade_dates = sorted(list(data['trade_date'].unique()))" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[evaluation 2023-05-16 00:00:00] : 100%|██████████| 33370/33370 [00:50<00:00, 655.06it/s]\n", + "[evaluation 2023-05-17 00:00:00] : 100%|██████████| 49993/49993 [01:36<00:00, 518.24it/s]\n", + "[evaluation 2023-05-18 00:00:00] : 100%|██████████| 75498/75498 [02:10<00:00, 579.07it/s]\n", + "[evaluation 2023-05-19 00:00:00] : 100%|██████████| 70253/70253 [01:52<00:00, 623.25it/s]\n", + "[evaluation 2023-05-20 00:00:00] : 100%|██████████| 56879/56879 [01:49<00:00, 519.86it/s]\n", + "[evaluation 2023-05-21 00:00:00] : 100%|██████████| 56274/56274 [01:04<00:00, 878.07it/s]\n", + "[evaluation 2023-05-22 00:00:00] : 100%|██████████| 60805/60805 [01:30<00:00, 670.07it/s]\n", + "[evaluation 2023-05-23 00:00:00] : 100%|██████████| 61767/61767 [01:46<00:00, 577.44it/s]\n", + "[evaluation 2023-05-24 00:00:00] : 100%|██████████| 67487/67487 [01:16<00:00, 882.60it/s]\n", + "[evaluation 2023-05-25 00:00:00] : 100%|██████████| 66971/66971 [01:51<00:00, 599.55it/s]\n", + "[evaluation 2023-05-26 00:00:00] : 100%|██████████| 67846/67846 [02:09<00:00, 521.94it/s]\n", + "[evaluation 2023-05-27 00:00:00] : 100%|██████████| 59954/59954 [01:55<00:00, 517.70it/s]\n", + "[evaluation 2023-05-28 00:00:00] : 100%|██████████| 66805/66805 [02:07<00:00, 522.33it/s]\n", + "[evaluation 2023-05-29 00:00:00] : 100%|██████████| 69053/69053 [02:12<00:00, 520.86it/s]\n", + "[evaluation 2023-05-30 00:00:00] : 100%|██████████| 64900/64900 [01:42<00:00, 632.71it/s]\n" + ] + } + ], + "source": [ + "last_q = 0\n", + "last_x = 0\n", + "last_pnl = 0\n", + "last_fees = 0\n", + "\n", + "results = [[], [], [], []]\n", + "for i in range(len(trade_dates) - 1):\n", + " valid_data = data[data['trade_date'] == trade_dates[i]]\n", + " test_data = data[data['trade_date'] == trade_dates[i]]\n", + "\n", + " sigma, A, k = get_params(valid_data, time_step=trade_interval, price_int=price_int)\n", + " params = {'q': last_q, 'x': last_x, 'pnl': last_pnl, 'sigma': sigma, 'A': A, 'k': k, 'gamma': gamma, 'fees': last_fees, 'fee_rate': fee_rate}\n", + " pnl, x, q, fees = evaluation(test_data, price_int=price_int, time_step=trade_interval, params=params)\n", + " # 这里的time_step传入的值是在上一个cell中的trade_interval, 代表每次回测的时间间隔,单位是毫秒,trade_interval=1000时,代表1000毫秒/1秒\n", + " # 这里的price_int传入的值是在上一个cell中的price_int_dict[target],代表每个品种的价格变动最小单位,单位是美元,price_int_dict[target] = 0.0001时,代表0.0001美元\n", + " # 这里的params传入的值是在上一个cell中的params,代表上一次回测的结果,包括q, x, pnl, sigma, A, k, gamma, fees, fee_rate\n", + " # evaluation返回的结果是pnl, x, q, fees的numpy array数组,分别代表每个时间点的pnl, x, q, fees\n", + " results[0].append(pnl)\n", + " results[1].append(x)\n", + " results[2].append(q)\n", + " results[3].append(fees)\n", + "\n", + " last_q = q[-1]\n", + " last_x = x[-1]\n", + " last_pnl = pnl[-1]\n", + " last_fees = fees[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, axes = plt.subplots(2, 1, figsize=(20, 8))\n", + "ax1, ax2 = axes\n", + "\n", + "dates_str = [pd.to_datetime(str(item)).strftime('%Y.%m.%d') for item in trade_dates][1:]\n", + "# 去掉第一个,因为第一天没有回测,只用来确定参数\n", + "\n", + "xticks = [0]\n", + "for item in results[0][:-1]:\n", + " xticks.append(xticks[-1] + item.shape[0])\n", + "\n", + "ax1.plot(np.concatenate(results[0], axis=0), label='pnl')\n", + "ax1.plot(np.concatenate(results[3], axis=0), label='fees')\n", + "ax1.plot(np.concatenate(results[0], axis=0) + np.concatenate(results[3], axis=0) , label='pnl+fees')\n", + "ax1.grid()\n", + "ax1.legend()\n", + "ax1.set_xticks(xticks)\n", + "ax1.set_xticklabels(dates_str)\n", + "ax1.set_xlabel('Time')\n", + "ax1.set_ylabel('Value')\n", + "\n", + "ax2.plot(np.concatenate(results[2], axis=0), label='pos')\n", + "ax2.grid()\n", + "ax2.legend()\n", + "ax2.set_xticks(xticks)\n", + "ax2.set_xticklabels(dates_str)\n", + "ax2.set_xlabel('Time')\n", + "ax2.set_ylabel('Volume')\n", + "plt.suptitle(target)\n", + "fig.tight_layout()\n", + "plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "finance", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Jerry's tests/CSV_Reader_test.py b/Jerry's tests/CSV_Reader_test.py new file mode 100644 index 000000000..852ecdeeb --- /dev/null +++ b/Jerry's tests/CSV_Reader_test.py @@ -0,0 +1,36 @@ +import pandas as pd +from nautilus_trader.core.datetime import dt_to_unix_nanos + +# timestamp_ns = pd.Timestamp("2022-01-01").value +# print(timestamp_ns) +# start = dt_to_unix_nanos(pd.Timestamp('2022-01-01', tz='UTC')) +# print(start) +# end_time=pd.Timestamp("2022-01-01 23:00:00").value +# print(end_time) +# 设置显示选项 +pd.set_option('display.max_columns', None) # 显示所有列 +pd.set_option('display.width', None) # 确保每一列都能完全显示 +pd.set_option('display.max_rows', None) # 显示所有行 +pd.set_option('display.max_colwidth', None) # 显示每一列的完整内容 + +# filename = r"D:\下载\DAT_ASCII_EURUSD_T_202308.csv" +# filename = r"D:\下载\BTC-USDT-220107.OK.csv" +# filename = r"D:\下载\LDOBUSD-bookTicker-2023-06\LDOBUSD-bookTicker-2023-06.csv" +filename = r"D:\下载\BTCUSDT-bookTicker-2023-09-17\BTCUSDT-bookTicker-2023-09-17.csv" +data = pd.read_csv(filename, nrows=10000) +data['spread'] = data['best_ask_price'] - data['best_bid_price'] +data['midprice'] = (data['best_ask_price'] + data['best_bid_price']) / 2 +data['midprice_change'] = data['midprice'].diff() +data['imbalance_-1to1'] = -data['best_ask_qty'] + data['best_bid_qty'] / data['best_ask_qty'] + data['best_bid_qty'] # 取值是-1到1之间,-1表示完全由卖单构成,1表示完全由买单构成 +data['imbalance_0to1'] = data['best_ask_qty'] / data['best_ask_qty'] + data['best_bid_qty'] # 取值是0到1之间,0表示完全由卖单构成,1表示完全由买单构成 +data['wmp'] = data['midprice'] + data['spread'] * data['imbalance_-1to1']/2 +data['wmp_change'] = data['wmp'].diff() +data['dmp'] = data['midprice'] + data['spread'] * data['imbalance_-1to1'] * (data['imbalance_-1to1'] * data['imbalance_-1to1'] + 1) / 2 # 尚未完成,需要结合DMP公式考虑fee + +print(data) +print(data.dtypes) # 查看每一列的数据类型 +print(data.head()) # 查看head +print(data.columns) # 查看列名 +# +# # pd.reset_option('display.max_columns') + diff --git a/Jerry's tests/External_Data_Loader.py b/Jerry's tests/External_Data_Loader.py new file mode 100644 index 000000000..e96264b6c --- /dev/null +++ b/Jerry's tests/External_Data_Loader.py @@ -0,0 +1,72 @@ +import os, shutil +from datetime import date +import pandas as pd +from decimal import Decimal +from nautilus_trader.model.data.tick import QuoteTick +from nautilus_trader.model.objects import Price, Quantity +from nautilus_trader.core.datetime import dt_to_unix_nanos +from nautilus_trader.persistence.catalog import ParquetDataCatalog +from nautilus_trader.persistence.external.core import process_files, write_objects +from nautilus_trader.persistence.external.readers import CSVReader +from nautilus_trader.test_kit.providers import TestInstrumentProvider +from nautilus_trader.backtest.node import BacktestNode, BacktestVenueConfig, BacktestDataConfig, BacktestRunConfig, BacktestEngineConfig +from nautilus_trader.config import ImportableStrategyConfig + + +def parser(data, instrument_id): + """ + Parser function for OKX Tick data, for use with CSV Reader + Should check + 1) the data structure of the file + 2) “as_dataframe” true or false and + use from_str for string data and from_int for int data and return a QuoteTick object + + """ + dt = pd.Timestamp(data['exchTimeMs'], unit='ms', tz='UTC') + yield QuoteTick( + instrument_id=instrument_id, + bid=Price.from_str(str(data['bidPx1'])), + ask=Price.from_str(str(data['askPx1'])), + bid_size=Quantity.from_int(data['bidCnt1']), + ask_size=Quantity.from_int(data['askCnt1']), + ts_event=dt_to_unix_nanos(dt), + ts_init=dt_to_unix_nanos(dt), + ) + +input_files = "D:\下载\BTC-USDT-220107.OK.csv" # "your_path_to_file" +CATALOG_PATH = "D:/backtest/backtest1/catalog02" # "your_path_to_catalog" +# Clear if it already exists, then create fresh +if os.path.exists(CATALOG_PATH): + shutil.rmtree(CATALOG_PATH) +os.mkdir(CATALOG_PATH) +catalog = ParquetDataCatalog(CATALOG_PATH) # Create a new ParquetDataCatalog instance + +# Use nautilus test helpers to create a BTC/USDT Crypto instrument for our purposes +maker1 = Decimal(-0.000001) +taker1 = Decimal(0.0000143) +instrument = TestInstrumentProvider.btcusdt_future_OKX(expiry=date(2022, 1, 7), maker=maker1, taker=taker1) + +# Add our new instrument to the ParquetDataCatalog and check its existence +write_objects(catalog, [instrument]) +catalog.instruments() + +# Loading the files (the header can be customized) +process_files( + glob_path=input_files, + reader=CSVReader( + block_parser=lambda x: parser(x, instrument_id=instrument.id), + header=None, + chunked=False, + as_dataframe=True, + ), + catalog=catalog, +) + +# Also manually write the instrument to the catalog +write_objects(catalog, [instrument]) + +# Using the data Catalog +start = dt_to_unix_nanos(pd.Timestamp('2022-01-01', tz='UTC')) +end = dt_to_unix_nanos(pd.Timestamp('2022-01-01 23:00:00', tz='UTC')) + +catalog.quote_ticks(start=start, end=end) \ No newline at end of file diff --git a/Jerry's tests/External_Data_Loader_OKX.py b/Jerry's tests/External_Data_Loader_OKX.py new file mode 100644 index 000000000..bbe326b44 --- /dev/null +++ b/Jerry's tests/External_Data_Loader_OKX.py @@ -0,0 +1,74 @@ +import os, shutil +import pandas as pd +from decimal import Decimal +from nautilus_trader.model.data.tick import QuoteTick +from nautilus_trader.model.objects import Price, Quantity +from nautilus_trader.core.datetime import dt_to_unix_nanos +from nautilus_trader.persistence.catalog import ParquetDataCatalog +from nautilus_trader.persistence.external.core import process_files, write_objects +from nautilus_trader.persistence.external.readers import CSVReader +from nautilus_trader.test_kit.providers import TestInstrumentProvider + + + +def parser(data, instrument_id): + """ + Parser function for OKX Tick data, for use with CSV Reader + Should check + 1) the data structure of the file + 2) “as_dataframe” true or false and + use from_str for string data and from_int for int data and return a QuoteTick object + + """ + dt = pd.Timestamp(data['exchTimeMs'], unit='ms', tz='UTC') + yield QuoteTick( + instrument_id=instrument_id, + bid=Price.from_str(str(data['bidPx1'])), + ask=Price.from_str(str(data['askPx1'])), + bid_size=Quantity.from_str(str(data['bidSz1'])), + ask_size=Quantity.from_str(str(data['askSz1'])), + ts_event=dt_to_unix_nanos(dt), + ts_init=dt_to_unix_nanos(dt), + ) + +input_files = r"D:\下载\BTC-USDT-220107.OK.csv" # "your_path_to_file" +CATALOG_PATH = r"D:/backtest/backtest1/catalog_Sz_01" # "your_path_to_catalog" +# Clear if it already exists, then create fresh +if os.path.exists(CATALOG_PATH): + shutil.rmtree(CATALOG_PATH) +os.mkdir(CATALOG_PATH) +catalog = ParquetDataCatalog(CATALOG_PATH) # Create a new ParquetDataCatalog instance + +# # For DEFAULT nautilus-trader with default function btcusdt_future_binance in TestInstrumentProvider +# instrument = TestInstrumentProvider.btcusdt_future_binance() + +# For EDITABLE nautilus-trader with customized function btcusdt_future_OKX in TestInstrumentProvider +# Use nautilus test helpers to create a BTC/USDT Crypto instrument for our purposes +maker1 = Decimal(-0.000001) +taker1 = Decimal(0.0000143) +instrument = TestInstrumentProvider.btcusdt_future_OKX(maker=maker1, taker=taker1) + +# Add our new instrument to the ParquetDataCatalog and check its existence +write_objects(catalog, [instrument]) +catalog.instruments() + +# Loading the files (the header can be customized) +process_files( + glob_path=input_files, + reader=CSVReader( + block_parser=lambda x: parser(x, instrument_id=instrument.id), + header=None, + chunked=False, + as_dataframe=True, + ), + catalog=catalog, +) + +# Also manually write the instrument to the catalog +write_objects(catalog, [instrument]) + +# Using the data Catalog +start = dt_to_unix_nanos(pd.Timestamp('2022-01-01', tz='UTC')) +end = dt_to_unix_nanos(pd.Timestamp('2022-01-01 23:00:00', tz='UTC')) + +catalog.quote_ticks(start=start, end=end) \ No newline at end of file diff --git a/Jerry's tests/OKX_BTCUSDT_Level2_Test.py b/Jerry's tests/OKX_BTCUSDT_Level2_Test.py new file mode 100644 index 000000000..03d775cf8 --- /dev/null +++ b/Jerry's tests/OKX_BTCUSDT_Level2_Test.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# ------------------------------------------------------------------------------------------------- +# This file is a backtest file with imported catalog quote tick data of OKX +# This file is using BacktestNode to run the backtest +# Customized by Jerry based on exmaples\notebooks\external_data_backtest.ipynb +# ------------------------------------------------------------------------------------------------- + +import time +from decimal import Decimal + +import pandas as pd + +from nautilus_trader.backtest.node import BacktestNode +from nautilus_trader.core.datetime import dt_to_unix_nanos +from nautilus_trader.config import BacktestRunConfig, BacktestVenueConfig, BacktestDataConfig, BacktestEngineConfig +from nautilus_trader.config import ImportableStrategyConfig +from nautilus_trader.config import LoggingConfig +from nautilus_trader.config.common import ImportableStrategyConfig +from nautilus_trader.model.data.tick import QuoteTick +from nautilus_trader.persistence.catalog import ParquetDataCatalog + + +if __name__ == "__main__": + + # Using the data Catalog + # catalog = ParquetDataCatalog.from_env() # Create a new ParquetDataCatalog instance from the environment + # ran on 08/28/23 Mon 14:50:00 but failed with no [Nautilus_Path] + CATALOG_PATH = "D:/backtest/backtest1/catalog_Sz_01" # "your_path_to_catalog" + catalog = ParquetDataCatalog(CATALOG_PATH) # Create a new ParquetDataCatalog instance + catalog.instruments() # List all instruments in the catalog + + start = dt_to_unix_nanos(pd.Timestamp('2022-01-01', tz='UTC')) + end = dt_to_unix_nanos(pd.Timestamp('2022-01-01 23:00:00', tz='UTC')) + + catalog.quote_ticks(start=start, end=end) + + # Add instruments + instrument = catalog.instruments(as_nautilus=True)[0] + + # Add a trading venue (multiple venues possible) + venues_config = [ + BacktestVenueConfig( + name="OKX", + oms_type="HEDGING", + account_type="MARGIN", + base_currency="USDT", + starting_balances=["1000000 USDT"], + ) + ] + + # Add data + data_config = [ + BacktestDataConfig( + catalog_path=str(catalog.path), + data_cls=QuoteTick, + instrument_id=instrument.id.value, + start_time=pd.Timestamp("2022-01-01").value, + end_time=pd.Timestamp("2022-01-01 00:00:30").value, + ) + ] + + # Configure your strategy + strategies = [ + ImportableStrategyConfig( + strategy_path="nautilus_trader.examples.strategies.orderbook_imbalance:OrderBookImbalance", + config_path="nautilus_trader.examples.strategies.orderbook_imbalance:OrderBookImbalanceConfig", + config=dict( + instrument_id=instrument.id.value, + max_trade_size=Decimal(1000), + use_quote_ticks=True, + trigger_min_size=0.5, + trigger_imbalance_ratio=0.4, + book_type="L1_TBBO", + # order_id_tag=instrument.selection_id, + # AttributeError: 'nautilus_trader.model.instruments.crypto_future.Cr' object has no attribute 'selection_id' + ), + ), + ] + + config = BacktestRunConfig( + engine=BacktestEngineConfig( + strategies=strategies, + logging=LoggingConfig(log_level="ERROR"), + ), + data=data_config, + venues=venues_config, + ) + + node = BacktestNode(configs=[config]) # successfully ran but no reports + + results = node.run() + print(results) diff --git a/Jerry's tests/OKX_backtest_example.ipynb b/Jerry's tests/OKX_backtest_example.ipynb new file mode 100644 index 000000000..f612cf96c --- /dev/null +++ b/Jerry's tests/OKX_backtest_example.ipynb @@ -0,0 +1,166 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "3fb0574f-6e59-41af-a0ed-f7e4a33e3717", + "metadata": { + "ExecuteTime": { + "start_time": "2023-08-27T22:47:52.310088Z", + "end_time": "2023-08-27T22:47:56.752708Z" + } + }, + "outputs": [], + "source": [ + "from decimal import Decimal\n", + "\n", + "import pandas as pd\n", + "\n", + "from nautilus_trader.backtest.node import BacktestNode\n", + "from nautilus_trader.core.datetime import dt_to_unix_nanos\n", + "from nautilus_trader.config import BacktestRunConfig, BacktestVenueConfig, BacktestDataConfig, BacktestEngineConfig\n", + "from nautilus_trader.config import ImportableStrategyConfig\n", + "from nautilus_trader.config import LoggingConfig\n", + "from nautilus_trader.examples.strategies.orderbook_imbalance import OrderBookImbalance\n", + "from nautilus_trader.examples.strategies.orderbook_imbalance import OrderBookImbalanceConfig\n", + "from nautilus_trader.model.data.tick import QuoteTick\n", + "from nautilus_trader.persistence.catalog import ParquetDataCatalog" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45d39b65-d3af-4d91-bbe7-2e3f109c0e0e", + "metadata": {}, + "outputs": [], + "source": [ + "catalog = ParquetDataCatalog.from_env()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36d3ddd1-3987-4a5d-b787-c94a491462aa", + "metadata": {}, + "outputs": [], + "source": [ + "catalog.instruments()\n", + "start = dt_to_unix_nanos(pd.Timestamp('2022-01-01', tz='UTC'))\n", + "end = dt_to_unix_nanos(pd.Timestamp('2022-01-01 23:00:00', tz='UTC'))\n", + "\n", + "catalog.quote_ticks(start=start, end=end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "265677cf-3a93-4b05-88f5-8e7c042a7860", + "metadata": {}, + "outputs": [], + "source": [ + "instrument = catalog.instruments(as_nautilus=True)[0]\n", + "\n", + "data_config=[\n", + " BacktestDataConfig(\n", + " catalog_path=str(ParquetDataCatalog.from_env().path),\n", + " data_cls=QuoteTick,\n", + " instrument_id=instrument.id.value,\n", + " start_time=pd.Timestamp(\"2022-01-01\").value,\n", + " end_time=pd.Timestamp(\"2022-01-01 23:00:00\").value,\n", + " )\n", + "]\n", + "\n", + "venues_config=[\n", + " BacktestVenueConfig(\n", + " name=\"OKX\",\n", + " oms_type=\"HEDGING\",\n", + " account_type=\"MARGIN\",\n", + " base_currency=\"USD\",\n", + " starting_balances=[\"1000000 USD\"],\n", + " )\n", + "]\n", + "\n", + "strategies = [\n", + " ImportableStrategyConfig(\n", + " strategy_path=\"nautilus_trader.examples.strategies.orderbook_imbalance:OrderBookImbalance\",\n", + " config_path=\"nautilus_trader.examples.strategies.orderbook_imbalance:OrderBookImbalanceConfig\",\n", + " config=dict(\n", + " instrument_id=instrument.id.value,\n", + " max_trade_size=Decimal(10),\n", + " order_id_tag=instrument.selection_id,\n", + " ),\n", + " ),\n", + "]\n", + "\n", + "# NautilusTrader currently exceeds the rate limit for Jupyter notebook logging (stdout output),\n", + "# this is why the `log_level` is set to \"ERROR\". If you lower this level to see\n", + "# more logging then the notebook will hang during cell execution. A fix is currently\n", + "# being investigated which involves either raising the configured rate limits for\n", + "# Jupyter, or throttling the log flushing from Nautilus.\n", + "# https://github.com/jupyterlab/jupyterlab/issues/12845\n", + "# https://github.com/deshaw/jupyterlab-limit-output\n", + "config = BacktestRunConfig(\n", + " engine=BacktestEngineConfig(\n", + " strategies=strategies,\n", + " logging=LoggingConfig(log_level=\"ERROR\"),\n", + " ),\n", + " data=data_config,\n", + " venues=venues_config,\n", + ")\n", + "\n", + "config" + ] + }, + { + "cell_type": "markdown", + "id": "77f4d5cb-621f-4d5b-843e-7c0da11073ae", + "metadata": {}, + "source": [ + "## Run the backtest!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "741b9024-6c0d-4cb9-9c28-687add29cd4e", + "metadata": {}, + "outputs": [], + "source": [ + "node = BacktestNode(configs=[config])\n", + "\n", + "result = node.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d50d1cd-d778-4e0f-b9da-ff9e44f4499f", + "metadata": {}, + "outputs": [], + "source": [ + "result" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Jerry's tests/Parquet_Reader.py b/Jerry's tests/Parquet_Reader.py new file mode 100644 index 000000000..e52a74f2e --- /dev/null +++ b/Jerry's tests/Parquet_Reader.py @@ -0,0 +1,15 @@ +import pandas as pd + +pd.set_option('display.max_columns', None) # 显示所有列 +pd.set_option('display.width', None) # 确保每一列都能完全显示 +pd.set_option('display.max_rows', None) # 显示所有行 +pd.set_option('display.max_colwidth', None) # 显示每一列的完整内容 + +filename = r"D:\backtest\backtest1\catalog_Sz_01\data\quote_tick.parquet\instrument_id=BTCUSDT_220325.OKX\1640995200008000000-1640997071853000000-0.parquet" +# Update the file extension to .parquet +data = pd.read_parquet(filename, engine='pyarrow') # Use read_parquet instead of read_csv + +print(data) +print(data.dtypes) # 查看每一列的数据类型 +print(data.head()) # 查看head +print(data.columns) # 查看列名 \ No newline at end of file diff --git a/Jerry's tests/Price_Model_XC.ipynb b/Jerry's tests/Price_Model_XC.ipynb new file mode 100644 index 000000000..fd061eac0 --- /dev/null +++ b/Jerry's tests/Price_Model_XC.ipynb @@ -0,0 +1,73 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from datetime import date,datetime\n", + "import time\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import ast\n", + "%matplotlib inline\n", + "\n", + "\"\"\"\n", + "数据源:YGGUSDT, BINANCE, 08072023\n", + "文章来源:https://www.fmz.com/bbs-topic/10198\n", + "作者:小草\n", + "\n", + "订单流数据和十档的深度数据,来自于实盘收集,更新频率都是100ms。实盘的只包含买一卖一盘口数据是实时更新的,为了简洁,暂时不用。考虑到数据太大,只保留了10万行深度数据,并且把逐档行情也独立为单独的列。\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "tick_size = 0.0001\n", + "trades = pd.read_csv('YGGUSDT_aggTrade.csv',names=['type','event_time', 'agg_trade_id','symbol', 'price', 'quantity', 'first_trade_id', 'last_trade_id',\n", + " 'transact_time', 'is_buyer_maker'])\n", + "# 导入订单流/归集成交数据(大概率是作者自己录制的,频率是100ms)" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/Jerry's tests/backtest_of_asq.ipynb b/Jerry's tests/backtest_of_asq.ipynb new file mode 100644 index 000000000..e36418da7 --- /dev/null +++ b/Jerry's tests/backtest_of_asq.ipynb @@ -0,0 +1,312 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from tqdm import tqdm\n", + "import scipy\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "def load_data(target='LDOBUSD'):\n", + " data = pd.read_csv(f'../bookticker/{target}-bookTicker-2023-05.csv')\n", + " data = data.rename(columns={\n", + " 'best_bid_price': 'bp', \n", + " 'best_bid_qty': 'bv',\n", + " 'best_ask_price': 'ap', \n", + " 'best_ask_qty': 'av',\n", + " 'event_time': 'time' \n", + " })\n", + " data['trade_date'] = pd.to_datetime(data['time'] // 1000 // 60 // 60 // 24, unit='d')\n", + " return data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "def get_market_speed(data: pd.DataFrame, price_int: float):\n", + " deltalist = np.linspace(price_int, price_int * 10, 10)\n", + " deltadict = {}\n", + "\n", + " for delta in deltalist:\n", + " price_interval = delta\n", + " ask_limit_order_hit = data[('ap', 'max')].shift(-1) > (data[('ap', 'last')] + price_interval) \n", + " bid_limit_order_hit = data[('bp', 'min')].shift(-1) < (data[('bp', 'last')] - price_interval)\n", + " limit_order_hit = (ask_limit_order_hit | bid_limit_order_hit).astype(int)\n", + " deltas = pd.Series(limit_order_hit[limit_order_hit == 1].index).diff().apply(lambda x: x / 10)\n", + " deltadict[delta] = deltas\n", + " \n", + " lambdas = pd.DataFrame([[key,1/deltadict[key].mean()] for key in deltadict.keys()],\n", + " columns=['delta','lambda_delta']).set_index('delta')\n", + " \n", + " def exp_fit(x,a,b):\n", + " y = a*np.exp(-b*x)\n", + " return y\n", + "\n", + " paramsB, cv = scipy.optimize.curve_fit(exp_fit, np.array(lambdas.index), np.array(lambdas['lambda_delta'].values))\n", + " A, k = paramsB\n", + " return A, k" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "def get_params(data: pd.DataFrame, price_int: float, time_step: int):\n", + " data = data.copy(deep=True) \n", + "\n", + " # 1. 按照time_step的频率把数据采样起来(相当于订单的更新的频率不能太高)\n", + " data['ms-index'] = data['time'] // time_step \n", + " prices = data.groupby('ms-index').agg({\n", + " 'ap': ['last', 'max', 'min'],\n", + " 'bp': ['last', 'max', 'min']\n", + " })\n", + " \n", + " # 2. 估计当前每条数据平均多少时间,用于估计交易时间\n", + " min_index, max_index = prices.index[0], prices.index[prices.shape[0]-1]\n", + " ave_time = time_step * (max_index - min_index) / prices.shape[0]\n", + "\n", + " # 2. 波动率\n", + " prices['mid'] = (prices[('ap', 'last')] + prices[('bp', 'last')]) / 2\n", + " sigma = np.log(prices['mid']).diff().std() * np.sqrt(24 * 60 * 60 * 1000 / ave_time)\n", + " \n", + " # 5. 市价单到达速率参数\n", + " A, k = get_market_speed(prices, price_int)\n", + "\n", + " return sigma, A, k\n", + "\n", + "def get_bid_spread(sigma, A, k, gamma, q):\n", + " var1 = (1 / gamma) * np.log(1 + gamma / k)\n", + " var2 = (2 * q + 1) / 2 * np.sqrt(sigma**2 * gamma / (2 * k * A) * (1 + gamma/k)**(1+k/gamma))\n", + " return var1 + var2\n", + "\n", + "def get_ask_spread(sigma, A, k, gamma, q):\n", + " var1 = (1 / gamma) * np.log(1 + gamma / k)\n", + " var2 = (2 * q - 1) / 2 * np.sqrt(sigma**2 * gamma / (2 * k * A) * (1 + gamma/k)**(1+k/gamma))\n", + " return var1 - var2" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "def evaluation(data: pd.DataFrame, price_int: float, time_step: int, params: dict):\n", + " \"\"\" 输入是一段历史成交的数据,来计算波动率\n", + " \"\"\" \n", + " data = data.copy(deep=True) \n", + " date = data['trade_date'].iloc[0]\n", + " \n", + " # 1. 按照time_step的频率把数据合并起来\n", + " data['ms-index'] = data['time'] // time_step \n", + " prices = data.groupby('ms-index').agg({\n", + " 'ap': ['last', 'max', 'min'],\n", + " 'bp': ['last', 'max', 'min']\n", + " })\n", + " \n", + " prices['mid'] = (prices[('ap', 'last')] + prices[('bp', 'last')]) / 2\n", + " \n", + " # 4. 回测\n", + " N = prices.shape[0]\n", + " q = np.array([params['q']] + [0] * (N-1)) # 补上上一轮回测的结果\n", + " x = np.array([params['x']] + [0] * (N-1), dtype=float)\n", + " pnl = np.array([params['pnl']] + [0] * (N-1), dtype=float)\n", + " fees = np.array([params['fees']] + [0] * (N-1), dtype=float)\n", + " ra = np.zeros(N)\n", + " rb = np.zeros(N)\n", + "\n", + " sigma = params['sigma']\n", + " A = params['A']\n", + " k = params['k']\n", + " gamma = params['gamma']\n", + " fee_rate = params['fee_rate']\n", + "\n", + " for i in tqdm(range(N-1), desc=f'[evaluation {date}] '):\n", + " \n", + " ra[i] = prices['mid'].iloc[i] + get_ask_spread(sigma, A, k, gamma, q[i])\n", + " rb[i] = prices['mid'].iloc[i] - get_bid_spread(sigma, A, k, gamma, q[i])\n", + "\n", + " # 这里限制了价格一定得是挂单方的价格,不会变成吃单\n", + " ra[i] = max(prices[('ap', 'last')].iloc[i], np.floor(ra[i] * (1 / price_int)) * price_int)\n", + " rb[i] = min(prices[('bp', 'last')].iloc[i], np.ceil(rb[i] * (1 / price_int)) * price_int)\n", + "\n", + " buy = 0\n", + " sell = 0\n", + "\n", + " if prices[('ap', 'max')].iloc[i+1] > ra[i]:\n", + " sell = 1\n", + " \n", + " if prices[('bp', 'min')].iloc[i+1] < rb[i]:\n", + " buy = 1\n", + "\n", + " q[i+1] = q[i] + buy - sell\n", + " x[i+1] = x[i] + sell*ra[i] - buy*rb[i]\n", + " pnl[i+1] = x[i+1] + q[i+1] * prices['mid'].iloc[i+1]\n", + " fees[i+1] = fees[i] + sell*ra[i] * fee_rate + buy*rb[i] * fee_rate\n", + " return pnl, x, q, fees" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "target = 'LDOBUSD'\n", + "fee_rate = 1.4e-4\n", + "\n", + "trade_interval = 1000 \n", + "gamma = 0.01\n", + "\n", + "price_int_dict = {'LDOBUSD': 0.0001, 'LTCBUSD': 0.01, 'BTCBUSD': 0.10, 'ETHBUSD': 0.01}\n", + "price_int = price_int_dict[target]\n", + "data = load_data(target=target)\n", + "trade_dates = sorted(list(data['trade_date'].unique()))" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[evaluation 2023-05-16 00:00:00] : 100%|██████████| 33370/33370 [00:50<00:00, 655.06it/s]\n", + "[evaluation 2023-05-17 00:00:00] : 100%|██████████| 49993/49993 [01:36<00:00, 518.24it/s]\n", + "[evaluation 2023-05-18 00:00:00] : 100%|██████████| 75498/75498 [02:10<00:00, 579.07it/s]\n", + "[evaluation 2023-05-19 00:00:00] : 100%|██████████| 70253/70253 [01:52<00:00, 623.25it/s]\n", + "[evaluation 2023-05-20 00:00:00] : 100%|██████████| 56879/56879 [01:49<00:00, 519.86it/s]\n", + "[evaluation 2023-05-21 00:00:00] : 100%|██████████| 56274/56274 [01:04<00:00, 878.07it/s]\n", + "[evaluation 2023-05-22 00:00:00] : 100%|██████████| 60805/60805 [01:30<00:00, 670.07it/s]\n", + "[evaluation 2023-05-23 00:00:00] : 100%|██████████| 61767/61767 [01:46<00:00, 577.44it/s]\n", + "[evaluation 2023-05-24 00:00:00] : 100%|██████████| 67487/67487 [01:16<00:00, 882.60it/s]\n", + "[evaluation 2023-05-25 00:00:00] : 100%|██████████| 66971/66971 [01:51<00:00, 599.55it/s]\n", + "[evaluation 2023-05-26 00:00:00] : 100%|██████████| 67846/67846 [02:09<00:00, 521.94it/s]\n", + "[evaluation 2023-05-27 00:00:00] : 100%|██████████| 59954/59954 [01:55<00:00, 517.70it/s]\n", + "[evaluation 2023-05-28 00:00:00] : 100%|██████████| 66805/66805 [02:07<00:00, 522.33it/s]\n", + "[evaluation 2023-05-29 00:00:00] : 100%|██████████| 69053/69053 [02:12<00:00, 520.86it/s]\n", + "[evaluation 2023-05-30 00:00:00] : 100%|██████████| 64900/64900 [01:42<00:00, 632.71it/s]\n" + ] + } + ], + "source": [ + "last_q = 0\n", + "last_x = 0\n", + "last_pnl = 0\n", + "last_fees = 0\n", + "\n", + "results = [[], [], [], []]\n", + "for i in range(len(trade_dates) - 1):\n", + " valid_data = data[data['trade_date'] == trade_dates[i]]\n", + " test_data = data[data['trade_date'] == trade_dates[i]]\n", + "\n", + " sigma, A, k = get_params(valid_data, time_step=trade_interval, price_int=price_int)\n", + " params = {'q': last_q, 'x': last_x, 'pnl': last_pnl, 'sigma': sigma, 'A': A, 'k': k, 'gamma': gamma, 'fees': last_fees, 'fee_rate': fee_rate}\n", + " pnl, x, q, fees = evaluation(test_data, price_int=price_int, time_step=trade_interval, params=params)\n", + " results[0].append(pnl)\n", + " results[1].append(x)\n", + " results[2].append(q)\n", + " results[3].append(fees)\n", + "\n", + " last_q = q[-1]\n", + " last_x = x[-1]\n", + " last_pnl = pnl[-1]\n", + " last_fees = fees[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, axes = plt.subplots(2, 1, figsize=(20, 8))\n", + "ax1, ax2 = axes\n", + "\n", + "dates_str = [pd.to_datetime(str(item)).strftime('%Y.%m.%d') for item in trade_dates][1:] # 去掉第一个,因为第一天没有回测,只用来确定参数\n", + "\n", + "xticks = [0]\n", + "for item in results[0][:-1]:\n", + " xticks.append(xticks[-1] + item.shape[0])\n", + "\n", + "ax1.plot(np.concatenate(results[0], axis=0), label='pnl')\n", + "ax1.plot(np.concatenate(results[3], axis=0), label='fees')\n", + "ax1.plot(np.concatenate(results[0], axis=0) + np.concatenate(results[3], axis=0) , label='pnl+fees')\n", + "ax1.grid()\n", + "ax1.legend()\n", + "ax1.set_xticks(xticks)\n", + "ax1.set_xticklabels(dates_str)\n", + "ax1.set_xlabel('Time')\n", + "ax1.set_ylabel('Value')\n", + "\n", + "ax2.plot(np.concatenate(results[2], axis=0), label='pos')\n", + "ax2.grid()\n", + "ax2.legend()\n", + "ax2.set_xticks(xticks)\n", + "ax2.set_xticklabels(dates_str)\n", + "ax2.set_xlabel('Time')\n", + "ax2.set_ylabel('Volume')\n", + "plt.suptitle(target)\n", + "fig.tight_layout()\n", + "plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "finance", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Pair_Trading_Brad_JerryModified b/Pair_Trading_Brad_JerryModified new file mode 160000 index 000000000..fb630f339 --- /dev/null +++ b/Pair_Trading_Brad_JerryModified @@ -0,0 +1 @@ +Subproject commit fb630f3393231bf3ef549e30d3bc0db1ad503e34 diff --git a/examples/live/binance_futures_market_maker.py b/examples/live/binance_futures_market_maker.py index 77202742a..5e9178cce 100644 --- a/examples/live/binance_futures_market_maker.py +++ b/examples/live/binance_futures_market_maker.py @@ -73,7 +73,7 @@ # Configure your strategy strat_config = VolatilityMarketMakerConfig( - instrument_id="ETHUSDT-PERP.BINANCE", + instrument_id="ETHUSDT-PERP.BINANCE", # Q to be solved: is this defined in a file of NT or just defined here by the user? bar_type="ETHUSDT-PERP.BINANCE-1-MINUTE-LAST-EXTERNAL", atr_period=20, atr_multiple=6.0, diff --git a/examples/notebooks/backtest_example.ipynb b/examples/notebooks/backtest_example.ipynb index b286d3b31..908f2cd58 100644 --- a/examples/notebooks/backtest_example.ipynb +++ b/examples/notebooks/backtest_example.ipynb @@ -39,8 +39,8 @@ "outputs": [], "source": [ "catalog.instruments()\n", - "start = dt_to_unix_nanos(pd.Timestamp('2020-01-01', tz='UTC'))\n", - "end = dt_to_unix_nanos(pd.Timestamp('2020-01-02', tz='UTC'))\n", + "start = dt_to_unix_nanos(pd.Timestamp('2022-01-01', tz='UTC'))\n", + "end = dt_to_unix_nanos(pd.Timestamp('2022-01-01 23:00:00', tz='UTC'))\n", "\n", "catalog.quote_ticks(start=start, end=end)" ] diff --git a/nautilus_trader/adapters/OKX/__init__.py b/nautilus_trader/adapters/OKX/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/nautilus_trader/adapters/binance/common/enums.py b/nautilus_trader/adapters/binance/common/enums.py index b5372702a..5faf9cb64 100644 --- a/nautilus_trader/adapters/binance/common/enums.py +++ b/nautilus_trader/adapters/binance/common/enums.py @@ -112,8 +112,8 @@ class BinanceAccountType(Enum): SPOT = "SPOT" MARGIN_CROSS = "MARGIN_CROSS" MARGIN_ISOLATED = "MARGIN_ISOLATED" - FUTURES_USDT = "FUTURES_USDT" - FUTURES_COIN = "FUTURES_COIN" + FUTURES_USDT = "FUTURES_USDT" # USDT-M futures + FUTURES_COIN = "FUTURES_COIN" # COIN-M futures @property def is_spot(self): diff --git a/nautilus_trader/analysis/statistics/sharpe_ratio.py b/nautilus_trader/analysis/statistics/sharpe_ratio.py index a1512594d..802f15bc8 100644 --- a/nautilus_trader/analysis/statistics/sharpe_ratio.py +++ b/nautilus_trader/analysis/statistics/sharpe_ratio.py @@ -29,7 +29,7 @@ class SharpeRatio(PortfolioStatistic): Parameters ---------- - period : int, default 252 + period : int, default 252 # 252 trading days in a year in general markets, can be changed to 365 for crypto The trading period in days. """ diff --git a/nautilus_trader/examples/strategies/AS_Market_Maker.py b/nautilus_trader/examples/strategies/AS_Market_Maker.py new file mode 100644 index 000000000..ef9e850a5 --- /dev/null +++ b/nautilus_trader/examples/strategies/AS_Market_Maker.py @@ -0,0 +1,214 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright 2023 Jerry Li @ Positive Venture Group +# +# +# This file is a template for Q-quant market making strategies as Avelleneda-Stoikov (AS) market maker. +# This strategy is based on tick level data and is designed for HFT market making. +# ------------------------------------------------------------------------------------------------- + +from decimal import Decimal +from typing import Optional + +from nautilus_trader.config import StrategyConfig +from nautilus_trader.model.data.tick import QuoteTick +from nautilus_trader.model.enums import BookType +from nautilus_trader.model.enums import OrderSide +from nautilus_trader.model.enums import TimeInForce +from nautilus_trader.model.enums import book_type_from_str +from nautilus_trader.model.identifiers import InstrumentId +from nautilus_trader.model.instruments.base import Instrument +from nautilus_trader.model.orderbook.book import OrderBook +from nautilus_trader.model.orderbook.data import BookOrder +from nautilus_trader.model.orderbook.data import OrderBookData +from nautilus_trader.trading.strategy import Strategy + + +# *** Avelleneda-Stoikov (AS) Market making model. *** +# *** In the initial model, it loses from trades themselves and profits from the maker transaction fee rebate. *** + + +class OrderBookImbalanceConfig(StrategyConfig): + """ + Configuration for ``OrderBookImbalance`` instances. + + Parameters + ---------- + instrument_id : InstrumentId + The instrument ID for the strategy. + max_trade_size : str + The max position size per trade (volume on the level can be less). + trigger_min_size : float + The minimum size on the larger side to trigger an order. + order_id_tag : str + The unique order ID tag for the strategy. Must be unique + amongst all running strategies for a particular trader ID. + oms_type : OmsType + The order management system type for the strategy. This will determine + how the `ExecutionEngine` handles position IDs (see docs). + book_type : BookType {``L1_TBBO``, ``L2_MBP``, ``L3_MBO``} + The order book type to use for the strategy. + use_quote_ticks : bool + Whether to use quote ticks instead of order book data. When set to True, + the strategy will use the "L1_TBBO" orderbook and subscribe quotetick data + no matter what the book_type is; When set to False, the strategy will use + the book_type orderbook and subscribe to orderbook delta data + subscribe_ticker : bool + Whether to subscribe to ticker data. When set to True, the strategy will + subscribe to ticker data in addition to the data source specified by book_type and use_quote_ticks. + gamma : float + The risk aversion parameter. + Q: float + The maximum position size. + A: float + The parameter of the exponential market speed function (y = A*np.exp(-k*x)). + k: float + The parameter of the number of times of the exponential market speed function. + sigma: float + The volotility (std) of the target price + trade_interval: int + The frequency of trading. The unit is ms + fee_rate: float + The transaction fee rate + """ + + instrument_id: str + max_trade_size: Decimal + trigger_min_size: float = 0.5 + trigger_imbalance_ratio: float = 0.4 + book_type: str = "L2_MBP" + use_quote_ticks: bool = False + subscribe_ticker: bool = False + + +class OrderBookImbalance(Strategy): + """ + A simple strategy that sends FOK limit orders when there is a bid/ask + imbalance in the order book. + + Cancels all orders and closes all positions on stop. + + Parameters + ---------- + config : OrderbookImbalanceConfig + The configuration for the instance. + """ + + def __init__(self, config: OrderBookImbalanceConfig): + assert 0 < config.trigger_imbalance_ratio < 1 + super().__init__(config) + + # Configuration + self.instrument_id = InstrumentId.from_str(config.instrument_id) + self.max_trade_size = Decimal(config.max_trade_size) + self.trigger_min_size = config.trigger_min_size + self.trigger_imbalance_ratio = config.trigger_imbalance_ratio + self.instrument: Optional[Instrument] = None + if self.config.use_quote_ticks: + assert self.config.book_type == "L1_TBBO" + self.book_type: BookType = book_type_from_str(self.config.book_type) + self._book = None # type: Optional[OrderBook] + + def on_start(self): + """Actions to be performed on strategy start.""" + self.instrument = self.cache.instrument(self.instrument_id) + if self.instrument is None: + self.log.error(f"Could not find instrument for {self.instrument_id}") + self.stop() + return + + if self.config.use_quote_ticks: + book_type = BookType.L1_TBBO + self.subscribe_quote_ticks(self.instrument.id) + else: + book_type = book_type_from_str(self.config.book_type) + self.subscribe_order_book_deltas(self.instrument.id, book_type) + if self.config.subscribe_ticker: + self.subscribe_ticker(self.instrument.id) + self._book = OrderBook.create(instrument=self.instrument, book_type=book_type) + + def on_order_book_delta(self, data: OrderBookData): + """Actions to be performed when a delta is received.""" + if not self._book: + self.log.error("No book being maintained.") + return + + self._book.apply(data) + if self._book.spread(): + self.check_trigger() + + def on_quote_tick(self, tick: QuoteTick): + """Actions to be performed when a delta is received.""" + bid = BookOrder( + price=tick.bid.as_double(), + size=tick.bid_size.as_double(), + side=OrderSide.BUY, + ) + ask = BookOrder( + price=tick.ask.as_double(), + size=tick.ask_size.as_double(), + side=OrderSide.SELL, + ) + + self._book.clear() + self._book.update(bid) + self._book.update(ask) + if self._book.spread(): + self.check_trigger() + + def on_order_book(self, order_book: OrderBook): + """Actions to be performed when an order book update is received.""" + self._book = order_book + if self._book.spread(): + self.check_trigger() + + def check_trigger(self): + """Check for trigger conditions.""" + if not self._book: + self.log.error("No book being maintained.") + return + + if not self.instrument: + self.log.error("No instrument loaded.") + return + + bid_size = self._book.best_bid_qty() + ask_size = self._book.best_ask_qty() + if not (bid_size and ask_size): + return + + smaller = min(bid_size, ask_size) + larger = max(bid_size, ask_size) + ratio = smaller / larger + self.log.info( + f"Book: {self._book.best_bid_price()} @ {self._book.best_ask_price()} ({ratio=:0.4f})", # ratio was 0.2 initially + ) + if larger > self.trigger_min_size and ratio < self.trigger_imbalance_ratio: + if len(self.cache.orders_inflight(strategy_id=self.id)) > 0: + pass + elif bid_size > ask_size: + order = self.order_factory.limit( + instrument_id=self.instrument.id, + price=self.instrument.make_price(self._book.best_ask_price()), + order_side=OrderSide.BUY, + quantity=self.instrument.make_qty(ask_size), + post_only=False, + time_in_force=TimeInForce.FOK, + ) + self.submit_order(order) + else: + order = self.order_factory.limit( + instrument_id=self.instrument.id, + price=self.instrument.make_price(self._book.best_bid_price()), + order_side=OrderSide.SELL, + quantity=self.instrument.make_qty(bid_size), + post_only=False, + time_in_force=TimeInForce.FOK, + ) + self.submit_order(order) + + def on_stop(self): + """Actions to be performed when the strategy is stopped.""" + if self.instrument is None: + return + self.cancel_all_orders(self.instrument.id) + self.close_all_positions(self.instrument.id) diff --git a/nautilus_trader/examples/strategies/orderbook_imbalance.py b/nautilus_trader/examples/strategies/orderbook_imbalance.py index 178f5b645..85c7e527f 100644 --- a/nautilus_trader/examples/strategies/orderbook_imbalance.py +++ b/nautilus_trader/examples/strategies/orderbook_imbalance.py @@ -61,8 +61,8 @@ class OrderBookImbalanceConfig(StrategyConfig): instrument_id: str max_trade_size: Decimal - trigger_min_size: float = 100.0 - trigger_imbalance_ratio: float = 0.20 + trigger_min_size: float = 0.5 + trigger_imbalance_ratio: float = 0.4 book_type: str = "L2_MBP" use_quote_ticks: bool = False subscribe_ticker: bool = False @@ -168,7 +168,7 @@ def check_trigger(self): larger = max(bid_size, ask_size) ratio = smaller / larger self.log.info( - f"Book: {self._book.best_bid_price()} @ {self._book.best_ask_price()} ({ratio=:0.2f})", + f"Book: {self._book.best_bid_price()} @ {self._book.best_ask_price()} ({ratio=:0.4f})", # ratio was 0.2 initially ) if larger > self.trigger_min_size and ratio < self.trigger_imbalance_ratio: if len(self.cache.orders_inflight(strategy_id=self.id)) > 0: diff --git a/nautilus_trader/test_kit/providers.py b/nautilus_trader/test_kit/providers.py index aa4b4a616..8c9fd7fb4 100644 --- a/nautilus_trader/test_kit/providers.py +++ b/nautilus_trader/test_kit/providers.py @@ -395,6 +395,53 @@ def default_fx_ccy(symbol: str, venue: Venue = None) -> CurrencyPair: ) @staticmethod + def btcusdt_future_OKX(expiry: Optional[date] = None, maker: Decimal = 0.00001, + taker: Decimal = 0.0000143) -> CryptoFuture: + """ + Return the OKX BTCUSDT instrument for backtesting. + + Parameters + ---------- + expiry : date, optional + The expiry date for the contract. + maker : Decimal,`optional`, maker transaction fee + taker : Decimal,`optional`, taker transaction fee + + Returns + ------- + CryptoFuture + + """ + if expiry is None: + expiry = date(2022, 3, 25) + return CryptoFuture( + instrument_id=InstrumentId( + symbol=Symbol(f"BTCUSDT_{expiry.strftime('%y%m%d')}"), + venue=Venue("OKX"), + ), + native_symbol=Symbol("BTCUSDT"), + underlying=BTC, + quote_currency=USDT, + settlement_currency=USDT, + expiry_date=expiry, + price_precision=2, + size_precision=6, + price_increment=Price(1e-02, precision=2), + size_increment=Quantity(1e-06, precision=6), + max_quantity=Quantity(9000, precision=6), + min_quantity=Quantity(1e-06, precision=6), + max_notional=None, + min_notional=Money(10.00000000, USDT), + max_price=Price(1000000, precision=2), + min_price=Price(0.01, precision=2), + margin_init=Decimal(0), + margin_maint=Decimal(0), + maker_fee=maker, + taker_fee=taker, + ts_event=0, + ts_init=0, + ) + @staticmethod def equity(symbol: str = "AAPL", venue: str = "NASDAQ"): return Equity( instrument_id=InstrumentId(symbol=Symbol(symbol), venue=Venue(venue)),