microsoft · lihuoran · Aug 24, 2022 · Jul 29, 2022 · Jul 29, 2022 · Aug 3, 2022
diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py
@@ -345,4 +345,4 @@ def format_decisions(
     return res
 
 
-__all__ = ["Order", "backtest"]
+__all__ = ["Order", "backtest", "get_strategy_executor"]
diff --git a/qlib/backtest/decision.py b/qlib/backtest/decision.py
@@ -135,6 +135,11 @@ def parse_dir(direction: Union[str, int, np.integer, OrderDir, np.ndarray]) -> U
         else:
             raise NotImplementedError(f"This type of input is not supported")
 
+    @property
+    def key(self) -> tuple:
+        """A hashable & unique key to identify this order. Usually used as the key in a dict."""
+        return self.stock_id, self.start_time.replace(hour=0, minute=0, second=0), self.direction
+
 
 class OrderHelper:
     """

diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py
@@ -114,7 +114,7 @@ def __init__(
         self.track_data = track_data
         self._trade_exchange = trade_exchange
         self.level_infra = LevelInfrastructure()
-        self.level_infra.reset_infra(common_infra=common_infra)
+        self.level_infra.reset_infra(common_infra=common_infra, executor=self)
         self._settle_type = settle_type
         self.reset(start_time=start_time, end_time=end_time, common_infra=common_infra)
         if common_infra is None:
@@ -124,6 +124,9 @@ def __init__(
         self.dealt_order_amount: Dict[str, float] = defaultdict(float)
         self.deal_day = None
 
+        # whether the current executor is collecting data
+        self.is_collecting = False
+
     def reset_common_infra(self, common_infra: CommonInfrastructure, copy_trade_account: bool = False) -> None:
         """
         reset infrastructure for trading
@@ -256,6 +259,8 @@ def collect_data(
         object
             trade decision
         """
+        self.is_collecting = True
+
         if self.track_data:
             yield trade_decision
 
@@ -296,6 +301,8 @@ def collect_data(
 
         if return_value is not None:
             return_value.update({"execute_result": res})
+
+        self.is_collecting = False
         return res
 
     def get_all_executors(self) -> List[BaseExecutor]:
@@ -473,6 +480,9 @@ def _collect_data(
                 # do nothing and just step forward
                 sub_cal.step()
 
+        # Lef inner strategy know that the outer level execution is done.
+        self.inner_strategy.post_upper_level_exe_step()
+
         return execute_result, {"inner_order_indicators": inner_order_indicators, "decision_list": decision_list}
 
     def post_inner_exe_step(self, inner_exe_res: List[object]) -> None:

diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py
@@ -21,6 +21,9 @@
 from ..data.data import Cal
 
 
+SAOE_DATA_KEY = "saoe_data"
+
+
 class TradeCalendarManager:
     """
     Manager for trading calendar
@@ -235,7 +238,9 @@ def update(self, other: BaseInfrastructure) -> None:
 
 class CommonInfrastructure(BaseInfrastructure):
     def get_support_infra(self) -> Set[str]:
-        return {"trade_account", "trade_exchange"}
+        # SAOE_DATA_KEY is used to store SAOE (single asset order execution) information that should be shared by
+        # all strategies. It should be dict.
+        return {"trade_account", "trade_exchange", SAOE_DATA_KEY}
 
 
 class LevelInfrastructure(BaseInfrastructure):
@@ -248,7 +253,7 @@ def get_support_infra(self) -> Set[str]:
         sub_level_infra:
         - **NOTE**: this will only work after _init_sub_trading !!!
         """
-        return {"trade_calendar", "sub_level_infra", "common_infra"}
+        return {"trade_calendar", "sub_level_infra", "common_infra", "executor"}
 
     def reset_cal(
         self,

diff --git a/qlib/data/dataset/__init__.py b/qlib/data/dataset/__init__.py
@@ -615,4 +615,4 @@ def _prepare_seg(self, slc: slice, **kwargs) -> TSDataSampler:
         return tsds
 
 
-__all__ = ["Optional"]
+__all__ = ["Optional", "Dataset", "DatasetH"]
diff --git a/qlib/rl/aux_info.py b/qlib/rl/aux_info.py
@@ -3,7 +3,7 @@
 
 from __future__ import annotations
 
-from typing import Optional, TYPE_CHECKING, Generic, TypeVar
+from typing import TYPE_CHECKING, Generic, Optional, TypeVar
 
 from qlib.typehint import final
 

diff --git a/qlib/rl/data/exchange_wrapper.py b/qlib/rl/data/exchange_wrapper.py
@@ -6,6 +6,7 @@
 import pandas as pd
 
 from qlib.backtest import Exchange, Order
+
 from .pickle_styled import IntradayBacktestData
 
 

diff --git a/qlib/rl/from_neutrader/config.py b/qlib/rl/from_neutrader/config.py
diff --git a/qlib/rl/from_neutrader/feature.py b/qlib/rl/from_neutrader/feature.py
diff --git a/qlib/rl/order_execution/constants.py b/qlib/rl/order_execution/constants.py
@@ -0,0 +1,12 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from typing import TypeVar
+
+import numpy as np
+import pandas as pd
+
+FINEST_GRANULARITY = "1min"
+COARSEST_GRANULARITY = "1day"
+ONE_SEC = pd.Timedelta("1s")  # use 1 second to exclude the right interval point
+float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
diff --git a/qlib/rl/order_execution/interpreter.py b/qlib/rl/order_execution/interpreter.py
@@ -14,15 +14,15 @@
 from qlib.constant import EPS
 from qlib.rl.data import pickle_styled
 from qlib.rl.interpreter import ActionInterpreter, StateInterpreter
+from qlib.rl.order_execution.state import SAOEState
 from qlib.typehint import TypedDict
 
-from .simulator_simple import SAOEState
-
 __all__ = [
     "FullHistoryStateInterpreter",
     "CurrentStepStateInterpreter",
     "CategoricalActionInterpreter",
     "TwapRelativeActionInterpreter",
+    "FullHistoryObs",
 ]
 
 

diff --git a/qlib/rl/order_execution/policy.py b/qlib/rl/order_execution/policy.py
@@ -1,5 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
+
 from __future__ import annotations
 
 from pathlib import Path

diff --git a/qlib/rl/order_execution/reward.py b/qlib/rl/order_execution/reward.py
@@ -7,10 +7,9 @@
 
 import numpy as np
 
+from qlib.rl.order_execution.state import SAOEMetrics, SAOEState
 from qlib.rl.reward import Reward
 
-from .simulator_simple import SAOEMetrics, SAOEState
-
 __all__ = ["PAPenaltyReward"]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -345,4 +345,4 @@ def format_decisions(
		return res


		__all__ = ["Order", "backtest"]
		__all__ = ["Order", "backtest", "get_strategy_executor"]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -615,4 +615,4 @@ def _prepare_seg(self, slc: slice, **kwargs) -> TSDataSampler:
		return tsds


		__all__ = ["Optional"]
		__all__ = ["Optional", "Dataset", "DatasetH"]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,6 +6,7 @@
		import pandas as pd

		from qlib.backtest import Exchange, Order

		from .pickle_styled import IntradayBacktestData


Expand Down