From 1016121b58ca0a818ecd8645977594475a8072d9 Mon Sep 17 00:00:00 2001 From: Marcus Read Date: Tue, 28 Sep 2021 22:24:22 +0100 Subject: [PATCH 1/2] Add `ExchangeCalendar.trading_index`. Adds: - `ExchangeCalendar.trading_index`. - `_TradingIndex` to `calendar_helpers.py`. - `TestTradingIndex` to `test_calendar_helpers.py` - `ExchangeCalendarTestBase.test_trading_index`. Adds following classes to `errors.py`: - `IndexOverlapError` - `IntervalsOverlapError` - `IndicesOverlapError` Also: - Changes `ExchangeCalendar` properties `default_side` and `valid_sides` to class methods. - Adds `Answers.get_session_minutes`. --- exchange_calendars/calendar_helpers.py | 238 +++++++++ exchange_calendars/errors.py | 35 ++ exchange_calendars/exchange_calendar.py | 229 +++++++- tests/test_calendar_helpers.py | 671 +++++++++++++++++++++++- tests/test_exchange_calendar.py | 136 ++++- 5 files changed, 1278 insertions(+), 31 deletions(-) diff --git a/exchange_calendars/calendar_helpers.py b/exchange_calendars/calendar_helpers.py index 57c29120..b62561ec 100644 --- a/exchange_calendars/calendar_helpers.py +++ b/exchange_calendars/calendar_helpers.py @@ -1,6 +1,7 @@ from __future__ import annotations import typing import datetime +import contextlib import numpy as np import pandas as pd @@ -357,3 +358,240 @@ def parse_session( if ts not in calendar.schedule.index: raise errors.NotSessionError(calendar, ts, param_name) return ts + + +class _TradingIndex: + """Create a trading index. + + Credit to @Stryder-Git at pandas_market_calendars for showing the way + with a vectorised solution to creating trading indices. + + Parameters + ---------- + All parameters as ExchangeCalendar.trading_index + """ + + def __init__( + self, + calendar: ExchangeCalendar, + start: Date, + end: Date, + period: pd.Timedelta, + closed: str, # Literal["left", "right", "both", "neither"] when min python 3.8 + force_close: bool, + force_break_close: bool, + curtail_overlaps: bool, + ): + self.closed = closed + self.force_break_close = force_break_close + self.force_close = force_close + self.curtail_overlaps = curtail_overlaps + + # get session bound values over requested range + slice_start = calendar.all_sessions.searchsorted(start) + slice_end = calendar.all_sessions.searchsorted(end, side="right") + slce = slice(slice_start, slice_end) + + self.interval_nanos = period.value + self.dtype = np.int64 if self.interval_nanos < 3000000000 else np.int32 + + self.opens = calendar.market_opens_nanos[slce] + self.closes = calendar.market_closes_nanos[slce] + self.break_starts = calendar.market_break_starts_nanos[slce] + self.break_ends = calendar.market_break_ends_nanos[slce] + + self.mask = self.break_starts != pd.NaT.value # break mask + self.has_break = self.mask.any() + + self.defaults = { + "closed": self.closed, + "force_close": self.force_close, + "force_break_close": self.force_break_close, + } + + @property + def closed_right(self) -> bool: + return self.closed in ["right", "both"] + + @property + def closed_left(self) -> bool: + return self.closed in ["left", "both"] + + def verify_non_overlapping(self): + """Raise IndicesOverlapError if indices will overlap.""" + if not self.closed_right: + return + + def _check( + start_nanos: np.ndarray, end_nanos: np.ndarray, next_start_nanos: np.ndarray + ): + """Raise IndicesOverlap Error if indices would overlap. + + `next_start_nanos` describe start of (sub)session that follows and could + overlap with (sub)session described by `start_nanos` and `end_nanos`. + + All inputs should be of same length. + """ + num_intervals = np.ceil((end_nanos - start_nanos) / self.interval_nanos) + right = start_nanos + num_intervals * self.interval_nanos + if self.closed == "right" and (right > next_start_nanos).any(): + raise errors.IndicesOverlapError() + if self.closed == "both" and (right >= next_start_nanos).any(): + raise errors.IndicesOverlapError() + + if self.has_break: + if not self.force_break_close: + _check( + self.opens[self.mask], + self.break_starts[self.mask], + self.break_ends[self.mask], + ) + + if not self.force_close: + opens, closes, next_opens = ( + self.opens[:-1], + self.closes[:-1], + self.opens[1:], + ) + _check(opens, closes, next_opens) + if self.has_break: + mask = self.mask[:-1] + _check(self.break_ends[:-1][mask], closes[mask], next_opens[mask]) + + def _create_index_for_sessions( + self, + start_nanos: np.ndarray, + end_nanos: np.ndarray, + force_close: bool, + ) -> np.ndarray: + """Create nano array of indices for sessions of given bounds.""" + if start_nanos.size == 0: + return start_nanos + + # evaluate number of indices for each session + num_intervals = (end_nanos - start_nanos) / self.interval_nanos + num_indices = np.ceil(num_intervals).astype("int") + + if force_close: + if self.closed_right: + on_freq = (num_intervals == num_indices).all() + if not on_freq: + num_indices -= 1 # add the close later + else: + on_freq = False + + if self.closed == "both": + num_indices += 1 + elif self.closed == "neither": + num_indices -= 1 + + # by session, evaluate a range of int such that indices of a session + # could be evaluted from [ session_open + (freq * i) for i in range ] + start = 0 if self.closed_left else 1 + func = np.vectorize(lambda stop: np.arange(start, stop), otypes=[np.ndarray]) + stop = num_indices if self.closed_left else num_indices + 1 + ranges = np.concatenate(func(stop), axis=0, dtype=self.dtype) + + # evaluate index as nano array + base = start_nanos.repeat(num_indices) + index = base + ranges * self.interval_nanos + + if force_close and not on_freq: + index = np.concatenate((index, end_nanos)) + index.sort() + + return index + + def _trading_index(self) -> np.ndarray: + """Create trading index as nano array. + + Notes + ----- + If `self.has_break` then index is returned UNSORTED. Why? + Returning unsorted allows `trading_index_intervals` to create + indices for the left and right sides and then arrange the right + in the same order as the sorted left. Although as required, there + are rare circumstances in which the resulting right side will not + be in ascending order (it will later be curtailed or an error + raised). This can happen when, for example, a calendar has breaks, + `force_break_close` is False although `force_close` is True and the + period is sufficiently long that the right side of the last + interval of a morning subsession exceeds the day close, i.e. + exceeds the right side of the subsequent interval. In these cases, + sorting the right index by value would result in the indices + becoming unsynced with the corresponding left indices. + """ + if self.has_break: + + # sessions with breaks + index_am = self._create_index_for_sessions( + self.opens[self.mask], + self.break_starts[self.mask], + self.force_break_close, + ) + + index_pm = self._create_index_for_sessions( + self.break_ends[self.mask], self.closes[self.mask], self.force_close + ) + + # sessions without a break + index_day = self._create_index_for_sessions( + self.opens[~self.mask], self.closes[~self.mask], self.force_close + ) + + # put it all together + index = np.concatenate((index_am, index_pm, index_day)) + + else: + index = self._create_index_for_sessions( + self.opens, self.closes, self.force_close + ) + + return index + + def trading_index(self) -> pd.DatetimeIndex: + """Create trading index as a DatetimeIndex.""" + self.verify_non_overlapping() + index = self._trading_index() + if self.has_break: + index.sort() + return pd.DatetimeIndex(index, tz="UTC") + + @contextlib.contextmanager + def _override_defaults(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + yield + for k, v in self.defaults.items(): + setattr(self, k, v) + + def trading_index_intervals(self) -> pd.IntervalIndex: + """Create trading index as a pd.IntervalIndex.""" + with self._override_defaults( + closed="left", force_close=False, force_break_close=False + ): + left = self._trading_index() + + if not (self.force_close or self.force_break_close): + if self.has_break: + left.sort() + right = left + self.interval_nanos + else: + with self._override_defaults(closed="right"): + right = self._trading_index() + if self.has_break: + # See _trading_index.__doc__ for note on what's going on here. + indices = left.argsort() + left.sort() + right = right[indices] + + overlaps_next = right[:-1] > left[1:] + if overlaps_next.any(): + if self.curtail_overlaps: + right[:-1][overlaps_next] = left[1:][overlaps_next] + else: + raise errors.IntervalsOverlapError() + + left = pd.DatetimeIndex(left, tz="UTC") + right = pd.DatetimeIndex(right, tz="UTC") + return pd.IntervalIndex.from_arrays(left, right, self.closed) diff --git a/exchange_calendars/errors.py b/exchange_calendars/errors.py index 2cf537f1..94b865e1 100644 --- a/exchange_calendars/errors.py +++ b/exchange_calendars/errors.py @@ -276,3 +276,38 @@ def __str__(self) -> str: or self.minute > self.calendar.last_trading_minute ) return msg + + +class IndexOverlapError(ValueError): + """Periods implied by indices overlap.""" + + +class IntervalsOverlapError(IndexOverlapError): + """Intervals of requested trading index would overlap.""" + + # pylint: disable=missing-return-type-doc + def __str__(self): # noqa: D105 + return ( + "Unable to create trading index as intervals would overlap." + " This can occur when the frequency is longer than a break or" + " the period between one session's close and the next" + " session's open. To shorten intervals that would otherwise" + " overlap either pass `curtail_overlaps` as True or pass" + " `force_close` and/or `force_break_close` as True." + ) + + +class IndicesOverlapError(IndexOverlapError): + """Indices of requested trading index would overlap.""" + + # pylint: disable=missing-return-type-doc + def __str__(self): # noqa: D105 + return ( + "Unable to create trading index as an indice would fall to the" + " right of (later than) the subsequent indice. This can occur" + " when the frequency is longer than a break or the frequency" + " is longer than the period between one session's close and" + " the next session's open. Consider passing `closed` as" + " 'left' or passing `force_close` and/or `force_break_close`" + " as True." + ) diff --git a/exchange_calendars/exchange_calendar.py b/exchange_calendars/exchange_calendar.py index 4904d244..9dd4dd0c 100644 --- a/exchange_calendars/exchange_calendar.py +++ b/exchange_calendars/exchange_calendar.py @@ -44,6 +44,7 @@ parse_trading_minute, parse_session, parse_date, + _TradingIndex, ) from .utils.memoize import lazyval from .utils.pandas_utils import days_at_time @@ -216,10 +217,10 @@ def __init__( end: Date | None = None, side: str | None = None, ): - side = side if side is not None else self.default_side - if side not in self.valid_sides: + side = side if side is not None else self.default_side() + if side not in self.valid_sides(): raise ValueError( - f"`side` must be in {self.valid_sides} although received as {side}." + f"`side` must be in {self.valid_sides()} although received as {side}." ) self._side = side @@ -590,18 +591,18 @@ def day(self): weekmask=self.weekmask, ) - @property - def valid_sides(self) -> list[str]: + @classmethod + def valid_sides(cls) -> list[str]: """List of valid `side` options.""" - if self.close_times == self.open_times: + if cls.close_times == cls.open_times: return ["left", "right"] else: return ["both", "left", "right", "neither"] - @property - def default_side(self) -> str: + @classmethod + def default_side(cls) -> str: """Default `side` option.""" - if self.close_times == self.open_times: + if cls.close_times == cls.open_times: return "right" else: return "both" @@ -1950,6 +1951,216 @@ def minutes_count_for_sessions_in_range( nanos = session_diff - break_diff return (nanos // NANOSECONDS_PER_MINUTE).sum() + def trading_index( + self, + start: Date, + end: Date, + period: pd.Timedelta | str, + intervals: bool = True, + closed: str = "left", # when move to min 3.8 Literal["left", "right", "both", "neither"] + force_close: bool = False, + force_break_close: bool = False, + curtail_overlaps: bool = False, + parse: bool = True, + ) -> pd.DatetimeIndex | pd.IntervalIndex: + """Create a trading index. + + Create a trading index of given `period` over a given range of + dates. + + NB. Which minutes the calendar treats as trading minutes, according + to `self.side`, is irrelevant in the evaluation of the trading + index. + + NB. Execution time is related to the number of indices created. The + longer the range of dates covered and/or the shorter the period + (i.e. higher the frequency), the longer the execution. Whilst an + index with 4000 indices might be created in a couple of + miliseconds, a high frequency index with 2 million indices might + take a second or two. + + Parameters + ---------- + start + Start of session range over which to create index. + + end + End of session range over which to create index. + + period + If `intervals` is True, the length of each interval. If + `intervals` is False, the distance between indices. Period + should be passed as a pd.Timedelta or a str that's acceptable + as a single input to pd.Timedelta. `period` cannot be greater + than 1 day. + + Examples of valid `period` input: + pd.Timedelta(minutes=15), pd.Timedelta(minutes=15, hours=2) + '15min', '15T', '1H', '4h', '1d', '30s', '2s', '500ms'. + Examples of invalid `period` input: + '15minutes', '2d'. + + intervals : default: True + True to return trading index as a pd.IntervalIndex with indices + representing explicit intervals. + + False to return trading index as a pd.DatetimeIndex with + indices that implicitely represent a period according to + `closed`. + + If `period` is '1d' then trading index will be returned as a + pd.DatetimeIndex. + + closed : {"left", "right", "both", "neither"} + (ignored if `period` is '1d'.) + + If `intervals` is True, the side that intervals should be + closed on. Must be either "left" or "right" (any time during a + session must belong to one interval and one interval only). + + If `intervals` is False, the side of each period that an + indice should be defined. The first and last indices of each + (sub)session will be defined according to: + "left" - include left side of first period, do not include + right side of last period. + "right" - do not include left side of first period, include + right side of last period. + "both" - include both left side of first period and right + side of last period. + "neither" - do not include either left side of first period + or right side of last period. + NB if `period` is not a factor of the (sub)session length then + "right" or "both" will result in an indice being defined after + the (sub)session close. See `force_close` and + `force_break_close`. + + force_close : default: False + (ignored if `period` is '1d') + (irrelevant if `intervals` is False and `closed` is "left" or + "neither") + + Defines behaviour if right side of a session's last period + falls after the session close. + + If True, defines right side of this period as session close. + + If False, defines right side of this period after the session + close. In this case the represented period will include a + non-trading period. + + force_break_close : default: False + (ignored if `period` is '1d'.) + (irrelevant if `intervals` is False and `closed` is "left" or + "neither.) + + Defines behaviour if right side of last pre-break period falls + after the start of the break. + + If True, defines right side of this period as break start. + + If False, defines right side of this period after the break + start. In this case the represented period will include a + non-trading period. + + curtail_overlaps : default: False + (ignored if `period` is '1d') + (irrelevant if (`intervals` is False) or (`force_close` and + `force_break_close` are both True).) + + Defines action to take if a period ends after the start of the + next period. (This can occur if `period` is longer + than a break or the gap between one session's close and the + next session's open.) + + If True, the right of the earlier of two overlapping + periods will be curtailed to the left of the latter period. + (NB consequently the period length will not be constant for + all periods.) + + If False, will raise IntervalsOverlapError. + + parse : default: True + Determines if `start` and `end` values are parsed. If these + arguments are passed as pd.Timestamp with no time component + and tz as UTC then can pass `parse` as False to save around + 500µs on the execution. + + Returns + ------- + pd.IntervalIndex or pd.DatetimeIndex + Trading index. + + If `intervals` is False or `period` is '1d' then returned as a + pd.DatetimeIndex. + If `intervals` is True (default) returned as pd.IntervalIndex. + + Raises + ------ + exchange_calendars.errors.IntervalsOverlapError + If `intervals` is True and right side of one or more indices + would fall after the left of the subsequent indice. This can + occur if `period` is longer than a break or the gap between one + session's close and the next session's open. + + exchange_calendars.errors.IntervalsOverlapError + If `intervals` is False and an indice would otherwise fall to + the right of the subsequent indice. This can occur if `period` + is longer than a break or the gap between one session's close + and the next session's open. + + Credit to @Stryder-Git at pandas_market_calendars for showing the + way with a vectorised solution to creating trading indices (a + variation of which is employed within the underlying _TradingIndex + class). + """ + if parse: + start = self._parse_session_range_end(start) + end = self._parse_session_range_end(end) + + if not isinstance(period, pd.Timedelta): + try: + period = pd.Timedelta(period) + except ValueError: + msg = ( + f"`period` receieved as '{period}' although takes type" + " 'pd.Timedelta' or a type 'str' that is valid as a single input" + " to 'pd.Timedelta'. Examples of valid input: pd.Timestamp('15T')," + " '15min', '15T', '1H', '4h', '1d', '5s', 500ms'." + ) + raise ValueError(msg) from None + + if period > pd.Timedelta(1, "D"): + msg = ( + "`period` cannot be greater than one day although received as" + f" '{period}'." + ) + raise ValueError(msg) + + if period == pd.Timedelta(1, "D"): + return self.sessions_in_range(start, end) + + if intervals and closed in ["both", "neither"]: + raise ValueError( + f"If `intervals` is True then `closed` cannot be '{closed}'." + ) + + # method exposes public methods of _TradingIndex. + _trading_index = _TradingIndex( + self, + start, + end, + period, + closed, + force_close, + force_break_close, + curtail_overlaps, + ) + + if not intervals: + return _trading_index.trading_index() + else: + return _trading_index.trading_index_intervals() + # Internal methods called by constructor. def _special_dates(self, calendars, ad_hoc_dates, start_date, end_date): diff --git a/tests/test_calendar_helpers.py b/tests/test_calendar_helpers.py index e9db030e..62080d02 100644 --- a/tests/test_calendar_helpers.py +++ b/tests/test_calendar_helpers.py @@ -4,23 +4,24 @@ from collections import abc import re +import itertools +import operator -import pandas as pd import pytest +import pandas as pd +import numpy as np +from hypothesis import given, settings, assume, strategies as st -from exchange_calendars import ( - ExchangeCalendar, - get_calendar, - errors, -) +from exchange_calendars import calendar_utils, errors, ExchangeCalendar from exchange_calendars import calendar_helpers as m +from .test_exchange_calendar import Answers # TODO tests for next_divider_idx, previous_divider_idx, compute_all_minutes (#15) -@pytest.fixture -def one_minute() -> abc.Iterator[pd.DateOffset]: - yield pd.DateOffset(minutes=1) +@pytest.fixture(scope="class") +def one_minute() -> abc.Iterator[pd.Timedelta]: + yield pd.Timedelta(1, "T") @pytest.fixture @@ -30,8 +31,8 @@ def one_day() -> abc.Iterator[pd.DateOffset]: # all fixtures with respect to XHKG @pytest.fixture -def calendar() -> abc.Iterator[ExchangeCalendar]: - yield get_calendar("XHKG") +def calendar() -> abc.Iterator[calendar_utils.XHKGExchangeCalendar]: + yield calendar_utils.XHKGExchangeCalendar() @pytest.fixture @@ -289,3 +290,651 @@ def test_parse_trading_minute( match="is later than the last trading minute of calendar", ): m.parse_trading_minute(calendar, minute_too_late, param_name) + + +class TestTradingIndex: + """Tests for _TradingIndex. + + Subjects selected calendars (each of a unique behaviour) to fuzz tests + verifying expected behaviour / no unexpected errors. These tests cover + all date ranges, periods (from 1 minute to 1 day) and options. + + Also includes: + - concrete tests to verify overlap handling. + - parsing tests for ExchangeCalendar.trading_index. + + NOTE: `_TradingIndex` is also tested via + `ExchangeCalendarTestBase.test_trading_index` which tests a multitude + of concrete cases (options as default values). + """ + + calendar_names = ["XLON", "XHKG", "CMES", "24/7"] + """Selection of calendars with a particular behaviour: + "XLON" - calendars without breaks. + "XHKG" - calendars with breaks. + "CMES" - 24 hour calendar, not 7 days a week. + "24/7" - 24 hour calendar. + """ + + # Fixtures + + @pytest.fixture(scope="class") + def answers(self) -> abc.Iterator[dict[str, Answers]]: + """Dict of answers for tested calendars, key as name, value as Answers.""" + d = {} + for name in self.calendar_names: + cal_cls = calendar_utils._default_calendar_factories[name] + d[name] = Answers(name, cal_cls.default_side()) + return d + + @pytest.fixture(scope="class") + def calendars(self, answers) -> abc.Iterator[dict[str, ExchangeCalendar]]: + """Dict of tested calendars, key as name, value as calendar.""" + d = {} + for name, ans in answers.items(): + cls = calendar_utils._default_calendar_factories[name] + d[name] = cls(start=ans.first_session, end=ans.last_session) + return d + + @pytest.fixture(scope="class", params=calendar_names) + def calendars_with_answers( + self, request, calendars, answers + ) -> abc.Iterator[tuple[ExchangeCalendar, Answers]]: + """Parameterized fixture.""" + yield (calendars[request.param], answers[request.param]) + + # Helper strategies + + @staticmethod + @st.composite + def _st_times_different( + draw, ans + ) -> st.SearchStrategy[tuple[pd.Timestamp, pd.Timestamp]]: + """SearchStrategy for two consecutive sessions with different times.""" + session = draw(st.sampled_from(ans.sessions_next_time_different.to_list())) + next_session = ans.get_next_session(session) + return (session, next_session) + + @staticmethod + @st.composite + def _st_start_end( + draw, ans + ) -> st.SearchStrategy[tuple[pd.Timestamp, pd.Timestamp]]: + """SearchStrategy for start and end dates in calendar range and + a calendar specific maximum distance.""" + first = ans.first_session.tz_convert(None) + last = ans.last_session.tz_convert(None) + + one_day = pd.Timedelta(1, "D") + # reasonable to quicken test by limiting 24/7 as rules for 24/7 are unchanging. + distance = ( + pd.DateOffset(weeks=2) if ans.name == "24/7" else pd.DateOffset(years=1) + ) + + end = draw(st.datetimes(min(first + distance, last), last)) + end = pd.Timestamp(end).floor("D") + start = draw(st.datetimes(max(end - distance, first), end - one_day)) + start = pd.Timestamp(start).floor("D") + start, end = start.tz_localize("UTC"), end.tz_localize("UTC") + assume(not ans.answers[start:end].empty) + return start, end + + def st_start_end( + self, ans: Answers + ) -> st.SearchStrategy[tuple[pd.Timestamp, pd.Timestamp]]: + """SearchStrategy for trading index start and end dates.""" + st_startend = self._st_start_end(ans) + if not ans.sessions_next_time_different.empty: + st_times_differ = self._st_times_different(ans) + st_startend = st.one_of(st_startend, st_times_differ) + return st_startend + + @staticmethod + @st.composite + def st_periods( + draw, + minimum: pd.Timedelta = pd.Timedelta(1, "T"), + maximum: pd.Timedelta = pd.Timedelta(1, "D") - pd.Timedelta(1, "T"), + ) -> st.SearchStrategy[pd.Timedelta]: + """SearchStrategy for a period between a `minimum` and `maximum`.""" + period = draw(st.integers(minimum.seconds // 60, maximum.seconds // 60)) + return pd.Timedelta(period, "T") + + # Helper methods + + @staticmethod + def could_overlap(ans: Answers, slc: slice, has_break) -> bool: + """Query if there's at least one period at which intervals overlap. + + Can right side of last interval of any session/subsession of a + slice of Answers fall later than the left side of the first + interval of the next session/subsession? + """ + can_overlap = False + if has_break: + duration = ans.break_starts[slc] - ans.opens[slc] + gap = ans.break_ends[slc] - ans.break_starts[slc] + can_overlap = (gap < duration).any() + if not can_overlap: + duration = ans.closes[slc] - ans.opens[slc] + gap = ans.opens.shift(-1)[slc] - ans.closes[slc] + can_overlap = (gap < duration).any() + return can_overlap + + @staticmethod + def evaluate_overrun( + starts: pd.Series, + ends: pd.Series, + period: pd.Timedelta, + ) -> pd.Series: + """Evaluate extent that right side of last interval exceeds end. + + For session/subsessions with `starts` and `ends` evaluate the + distance beyond the `ends` that the right of the last interval will + fall (where interval length is `period`). + """ + duration = ends - starts + shortfall = duration % period + on_end_mask = shortfall == pd.Timedelta(0) + overrun = period - shortfall + overrun[on_end_mask] = pd.Timedelta(0) + return overrun + + @staticmethod + def sessions_bounds( + ans: Answers, + slc: slice, + period: pd.Timedelta, + closed: str | None, + force_break_close: bool, + force_close: bool, + curtail: bool = False, + ) -> tuple[pd.Series, pd.Series]: + """First and last trading indice of each session/subsession. + + `closed` should be passed as None if evaluating bounds for an + intervals index. + """ + closed_left = closed in [None, "left", "both"] + closed_right = closed in [None, "right", "both"] + + opens = ans.opens[slc] + closes = ans.closes[slc] + has_break = ans.break_starts[slc].notna().any() + + def bounds(start: pd.Series, end: pd.Series, force: bool): + """Evaluate bounds of trading index by session/subsession. + + Parameters + ---------- + start + Index: pd.DatetimeIndex + session. Must be the same as `end.index`. + Value: pd.DatetimeIndex + Start time of session or a subsession of session (where + session is index value). + + end + As for `start` albeit indicating end times. + """ + lower_bounds = start if closed_left else start + period + if force: + if (lower_bounds > end).any(): + # period longer than session/subsession duration + lower_bounds[lower_bounds > end] = end + return lower_bounds, end + + duration = end - start + func = np.ceil if closed_right else np.floor + num_periods = func(duration / period) + if not closed_right and closed is not None: + num_periods[duration % period == pd.Timedelta(0)] -= 1 + + upper_bounds = start + (num_periods * period) + + if closed == "neither" and (num_periods == 0).any: # edge case + # lose bounds where session/subsession has no indice. + upper_bounds = upper_bounds[num_periods != 0] + lower_bounds = lower_bounds[upper_bounds.index] + + return lower_bounds, upper_bounds + + if has_break: + break_starts = ans.break_starts[slc] + break_ends = ans.break_ends[slc] + mask = break_starts.notna() # which sessions have a break + + # am sessions bounds + am_lower, am_upper = bounds( + opens[mask], break_starts[mask], force_break_close + ) + + # pm sessions bounds + pm_lower, pm_upper = bounds(break_ends[mask], closes[mask], force_close) + + # sessions without breaks + if (~mask).any(): + day_lower, day_upper = bounds(opens[~mask], closes[~mask], force_close) + else: + day_upper = day_lower = pd.Series([], dtype="datetime64[ns, UTC]") + + lower_bounds = pd.concat((am_lower, pm_lower, day_lower)) + upper_bounds = pd.concat((am_upper, pm_upper, day_upper)) + + else: + lower_bounds, upper_bounds = bounds(opens, closes, force_close) + + if curtail and not (force_close and force_break_close): + indices = lower_bounds.argsort() + lower_bounds.sort_values(inplace=True) + upper_bounds = upper_bounds[indices] + curtail_mask = upper_bounds > lower_bounds.shift(-1) + if curtail_mask.any(): + upper_bounds[curtail_mask] = lower_bounds.shift(-1)[curtail_mask] + + return lower_bounds, upper_bounds + + # Fuzz tests for unexpected errors and return behaviour. + + @given( + data=st.data(), + force_close=st.booleans(), + force_break_close=st.booleans(), + ) + @settings(deadline=None) + def test_indices_fuzz( + self, + data, + calendars_with_answers, + force_close: bool, + force_break_close: bool, + one_minute, + ): + """Fuzz for unexpected errors and options behaviour. + + Expected errors tested for separately. + + 'period' limited to avoid IndicesOverlapError. + + 'start' and 'end' set to: + be within calendar bounds. + dates covering at least one session. + """ + cal, ans = calendars_with_answers + start, end = data.draw(self.st_start_end(ans)) + slc = ans.sessions.slice_indexer(start, end) + has_break = ans.break_starts[slc].notna().any() + + closed_options = ["left", "neither", "right", "both"] + if ans.sessions[slc].isin(ans.sessions_without_gap_after).any(): + closed_options = closed_options[:-1] # lose "both" option + closed = data.draw(st.sampled_from(closed_options)) + closed_right = closed in ["right", "both"] + + max_period = pd.Timedelta(1, "D") - one_minute + + params_allow_overlap = closed_right and not (force_break_close and force_close) + if params_allow_overlap: + can_overlap = self.could_overlap(ans, slc, has_break) + else: + can_overlap = False + + if has_break and can_overlap: + # filter out periods that will definitely overlap. + max_period = (ans.break_ends[slc] - ans.opens[slc]).min() + + # guard against "neither" returning empty. Tested for under seprate test. + if closed == "neither": + if has_break and not force_break_close: + am_length = (ans.break_starts[slc] - ans.opens[slc]).min() - one_minute + pm_length = (ans.closes[slc] - ans.break_ends[slc]).min() - one_minute + max_period = min(max_period, am_length, pm_length) + elif not force_close: + min_length = (ans.closes[slc] - ans.opens[slc]).min() - one_minute + max_period = min(max_period, min_length) + + period = data.draw(self.st_periods(maximum=max_period)) + + if can_overlap: + # assume no overlaps (i.e. reject test parameters if would overlap). + op = operator.ge if closed == "both" else operator.gt + if has_break: + mask = ans.break_starts[slc].notna() + overrun = self.evaluate_overrun( + ans.opens[slc][mask], ans.break_starts[slc][mask], period + ) + break_duration = (ans.break_ends[slc] - ans.break_starts[slc]).dropna() + assume(not op(overrun, break_duration).any()) + overrun = self.evaluate_overrun(ans.opens[slc], ans.closes[slc], period) + sessions_gap = ans.opens[slc].shift(-1) - ans.closes[slc] + assume(not op(overrun, sessions_gap).any()) + + ti = m._TradingIndex( + cal, start, end, period, closed, force_close, force_break_close, False + ) + index = ti.trading_index() + + # Assertions + + assert isinstance(index, pd.DatetimeIndex) + assert not index.empty + + lower_bounds, upper_bounds = self.sessions_bounds( + ans, slc, period, closed, force_break_close, force_close, False + ) + + assert lower_bounds.isin(index).all() + assert upper_bounds.isin(index).all() + + # verify that all indices are within bounds of a session or subsession. + bv = pd.Series(False, index) + for lower_bound, upper_bound in zip(lower_bounds, upper_bounds): + bv = bv | ((index >= lower_bound) & (index <= upper_bound)) + assert bv.all() + + @given( + data=st.data(), + force_break_close=st.booleans(), + curtail=st.booleans(), + ) + @settings(deadline=None) + def test_intervals_fuzz( + self, + data, + calendars_with_answers, + force_break_close: bool, + curtail: bool, + one_minute, + ): + """Fuzz for unexpected errors and options behaviour. + + Expected errors tested for separately. + + `period` limited to avoid IntervalsOverlapError. + + 'start' and 'end' set to: + be within calendar bounds. + dates covering at least one session. + """ + cal, ans = calendars_with_answers + start, end = data.draw(self.st_start_end(ans)) + slc = ans.sessions.slice_indexer(start, end) + has_break = ans.break_starts[slc].notna().any() + + force_close = data.draw(st.booleans()) + closed = data.draw(st.sampled_from(["left", "right"])) + max_period = pd.Timedelta(1, "D") - one_minute + + params_allow_overlap = not curtail and not (force_break_close and force_close) + if params_allow_overlap: + can_overlap = self.could_overlap(ans, slc, has_break) + else: + can_overlap = False + + if has_break and can_overlap: + # filter out periods that will definitely overlap. + max_period = (ans.break_ends[slc] - ans.opens[slc]).min() + + period = data.draw(self.st_periods(maximum=max_period)) + + if can_overlap: + # assume no overlaps + if has_break: + mask = ans.break_starts[slc].notna() + overrun = self.evaluate_overrun( + ans.opens[slc][mask], ans.break_starts[slc][mask], period + ) + break_duration = (ans.break_ends[slc] - ans.break_starts[slc]).dropna() + assume(not (overrun > break_duration).any()) + overrun = self.evaluate_overrun(ans.opens[slc], ans.closes[slc], period) + sessions_gap = ans.opens[slc].shift(-1) - ans.closes[slc] + assume(not (overrun > sessions_gap).any()) + + ti = m._TradingIndex( + cal, start, end, period, closed, force_close, force_break_close, curtail + ) + index = ti.trading_index_intervals() + + # assertions + + assert isinstance(index, pd.IntervalIndex) + assert not index.empty + + lower_bounds, upper_bounds = self.sessions_bounds( + ans, slc, period, None, force_break_close, force_close, curtail + ) + + assert lower_bounds.isin(index.left).all() + assert upper_bounds.isin(index.right).all() + + # verify that all intervals are within bounds of a session or subsession + bv = pd.Series(False, index) + for lower_bound, upper_bound in zip(lower_bounds, upper_bounds): + bv = bv | ((index.left >= lower_bound) & (index.right <= upper_bound)) + + @given(data=st.data(), calendar_name=st.sampled_from(["XLON", "XHKG"])) + @settings(deadline=None) + def test_for_empty_with_neither_fuzz(self, data, calendars, answers, calendar_name): + """Fuzz for specific condition that returns empty DatetimeIndex. + + Fuzz for expected empty DatetimeIndex when closed "neither" and + period is longer than any session/subsession. + """ + cal, ans = calendars[calendar_name], answers[calendar_name] + start, end = data.draw(self.st_start_end(ans)) + slc = ans.sessions.slice_indexer(start, end) + has_break = ans.break_starts[slc].notna().any() + + if has_break: + max_am_length = (ans.break_starts[slc] - ans.opens[slc]).max() + max_pm_length = (ans.closes[slc] - ans.break_ends[slc]).max() + min_period = max(max_am_length, max_pm_length) + else: + min_period = (ans.closes[slc] - ans.opens[slc]).max() + + period = data.draw(self.st_periods(minimum=min_period)) + + closed = "neither" + forces = [False, False] + + ti = m._TradingIndex(cal, start, end, period, closed, *forces, False) + index = ti.trading_index() + assert index.empty + + @given( + data=st.data(), + intervals=st.booleans(), + force_close=st.booleans(), + force_break_close=st.booleans(), + curtail_overlaps=st.booleans(), + ) + @settings(deadline=None) + def test_daily_fuzz( + self, + data, + calendars_with_answers, + intervals: bool, + force_close: bool, + force_break_close: bool, + curtail_overlaps: bool, + ): + """Fuzz for unexpected errors and return behaviour.""" + cal, ans = calendars_with_answers + + if intervals: + closed_options = ["left", "right"] + else: + closed_options = ["left", "right", "neither", "both"] + closed = data.draw(st.sampled_from(closed_options)) + + start, end = data.draw(self.st_start_end(ans)) + period = pd.Timedelta(1, "D") + forces_and_curtails = [force_close, force_break_close, curtail_overlaps] + + index = cal.trading_index( + start, end, period, intervals, closed, *forces_and_curtails, parse=False + ) + assert isinstance(index, pd.DatetimeIndex) + assert not index.empty + pd.testing.assert_index_equal(index.normalize(), index) + + # Tests for expected errors. + + @pytest.mark.parametrize("name", ["XHKG", "24/7", "CMES"]) + @given(data=st.data(), closed=st.sampled_from(["right", "both"])) + @settings(deadline=None) + def test_overlap_error_fuzz( + self, data, name, calendars, answers, closed, one_minute + ): + """Fuzz for expected IndicesOverlapError. + + NB. Test should exclude calendars, such as "XLON", for which + indices cannot overlap. These are calendars where a + session/subsession duration is less than the subsequent gap + between that session/subsession and the next. Passing any slice of + the answers for such a calendar to `could_overlap` would return + False. That such calendars cannot have overlapping indices is + verified by `test_indices_fuzz` and `test_intervals_fuzz` which + place no restraints on the period that these calendars can be + tested against (at least between 0 minutes and 1 day exclusive). + """ + cal, ans = calendars[name], answers[name] + start, end = data.draw(self.st_start_end(ans)) + slc = ans.sessions.slice_indexer(start, end) + has_break = ans.break_starts[slc].notna().any() + + # filter out periods that will definitely not cause an overlap. + if has_break: + min_period = (ans.break_ends[slc] - ans.break_starts[slc]).min() + else: + min_period = (ans.opens.shift(-1)[slc] - ans.closes[slc]).min() + + period = data.draw(self.st_periods(minimum=max(one_minute, min_period))) + + # assume overlaps (i.e. reject test parameters if does not overlap) + op = operator.ge if closed == "both" else operator.gt + if has_break: + mask = ans.break_starts[slc].notna() + overrun = self.evaluate_overrun( + ans.opens[slc][mask], ans.break_starts[slc][mask], period + ) + break_duration = (ans.break_ends[slc] - ans.break_starts[slc]).dropna() + assume(op(overrun, break_duration).any()) + else: + overrun = self.evaluate_overrun(ans.opens[slc], ans.closes[slc], period) + sessions_gap = ans.opens[slc].shift(-1) - ans.closes[slc] + assume(op(overrun, sessions_gap).any()) + + forces_and_curtails = [False, False, False] + + ti = m._TradingIndex(cal, start, end, period, closed, *forces_and_curtails) + with pytest.raises(errors.IndicesOverlapError): + ti.trading_index() + + if closed == "right": + with pytest.raises(errors.IntervalsOverlapError): + ti.trading_index_intervals() + + @pytest.fixture(params=[True, False]) + def curtail_all(self, request) -> abc.Iterator[bool]: + """Parameterized fixture of all values for 'curtail_overlaps'.""" + yield request.param + + @pytest.fixture(scope="class") + def cal_start_end( + self, calendars + ) -> abc.Iterator[tuple[ExchangeCalendar], pd.Timestamp, pd.Timestamp]: + """(calendar, start, end) parameters for parsing and concrete overlap tests.""" + yield ( + calendars["XHKG"], + pd.Timestamp("2018-01-01", tz="UTC"), + pd.Timestamp("2018-12-31", tz="UTC"), + ) + + @pytest.fixture(params=itertools.product(("105T", "106T"), ("right", "both"))) + def ti_for_overlap( + self, request, cal_start_end, curtail_all + ) -> abc.Iterator[m._TradingIndex]: + """_TradingIndex fixture against which to test for overlaps. + + '105T' is the edge case where last right indice of am subsession would + coincide with first left indice of pm subsession. + '106T' is one minute to the right of this. + """ + cal, start, end = cal_start_end + period, closed = request.param + period = pd.Timedelta(period) + forces_and_curtails = [False, False, curtail_all] + yield m._TradingIndex(cal, start, end, period, closed, *forces_and_curtails) + + def test_overlaps(self, ti_for_overlap, answers): + """Test 'curtail_overlaps' and for overlaps against concrete parameters.""" + ti = ti_for_overlap + period = pd.Timedelta(ti.interval_nanos) + period_106 = period == pd.Timedelta("106T") + + if period_106 or ti.closed == "both": + with pytest.raises(errors.IndicesOverlapError): + ti.trading_index() + + if ti.closed == "both": + return + + if not ti.curtail_overlaps and period_106: + # won't raise on "105T" as right side of last interval of am + # session won't clash on coinciding with left side of first + # interval of pm session as one of these sides will always be + # open (in this case the left side). NB can't close on both + # sides. + with pytest.raises(errors.IntervalsOverlapError): + ti.trading_index_intervals() + else: + index = ti.trading_index_intervals() + assert index.is_non_overlapping_monotonic + assert index.right.isin(answers["XHKG"].break_ends).any() + + @pytest.fixture(params=("right", "both")) + def ti_for_overlap_error_negative_case( + self, request, cal_start_end, curtail_all + ) -> abc.Iterator[m._TradingIndex]: + """_TradingIndex fixture against which to test for no overlaps. + + 104T is the edge case, one minute short of coinciding with pm subsession open. + """ + cal, start, end = cal_start_end + period, closed = pd.Timedelta("104T"), request.param + forces_and_curtails = [False, False, curtail_all] + yield m._TradingIndex(cal, start, end, period, closed, *forces_and_curtails) + + def test_overlaps_2(self, ti_for_overlap_error_negative_case): + """Test for no overlaps against concrete edge case.""" + ti = ti_for_overlap_error_negative_case + index = ti.trading_index() + assert isinstance(index, pd.DatetimeIndex) + if ti.closed == "right": + index = ti.trading_index_intervals() + assert isinstance(index, pd.IntervalIndex) + assert index.is_non_overlapping_monotonic + + # PARSING TESTS + + def test_parsing_errors(self, cal_start_end): + cal, start, end = cal_start_end + error_msg = ( + "`period` cannot be greater than one day although received as" + f" '{pd.Timedelta('2d')}'." + ) + with pytest.raises(ValueError, match=error_msg): + cal.trading_index(start, end, "2d", parse=False) + + error_msg = "If `intervals` is True then `closed` cannot be 'neither'." + with pytest.raises(ValueError, match=re.escape(error_msg)): + cal.trading_index( + start, end, "20T", intervals=True, closed="neither", parse=False + ) + + error_msg = "If `intervals` is True then `closed` cannot be 'both'." + with pytest.raises(ValueError, match=re.escape(error_msg)): + cal.trading_index( + start, end, "20T", intervals=True, closed="both", parse=False + ) diff --git a/tests/test_exchange_calendar.py b/tests/test_exchange_calendar.py index e7221268..a98bd27a 100644 --- a/tests/test_exchange_calendar.py +++ b/tests/test_exchange_calendar.py @@ -30,11 +30,7 @@ _default_calendar_aliases, _default_calendar_factories, ) -from exchange_calendars.errors import ( - CalendarNameCollision, - InvalidCalendarName, - NoSessionsError, -) +from exchange_calendars import errors from exchange_calendars.exchange_calendar import ExchangeCalendar, days_at_time from .test_utils import T @@ -68,12 +64,12 @@ def test_register_calendar(self, dispatcher, dummy_cal): assert dummy_cal == retr_cal # Try to register again, expecting a name collision - with pytest.raises(CalendarNameCollision): + with pytest.raises(errors.CalendarNameCollision): dispatcher.register_calendar("DMY", dummy_cal) # Deregister the calendar and ensure that it is removed dispatcher.deregister_calendar("DMY") - with pytest.raises(InvalidCalendarName): + with pytest.raises(errors.InvalidCalendarName): dispatcher.get_calendar("DMY") def test_register_calendar_type(self, dispatcher, dummy_cal_type): @@ -84,7 +80,7 @@ def test_register_calendar_type(self, dispatcher, dummy_cal_type): def test_both_places_are_checked(self, dispatcher, dummy_cal): # if instance is registered, can't register type with same name dispatcher.register_calendar("DMY", dummy_cal) - with pytest.raises(CalendarNameCollision): + with pytest.raises(errors.CalendarNameCollision): dispatcher.register_calendar_type("DMY", type(dummy_cal)) dispatcher.deregister_calendar("DMY") @@ -92,7 +88,7 @@ def test_both_places_are_checked(self, dispatcher, dummy_cal): # if type is registered, can't register instance with same name dispatcher.register_calendar_type("DMY", type(dummy_cal)) - with pytest.raises(CalendarNameCollision): + with pytest.raises(errors.CalendarNameCollision): dispatcher.register_calendar("DMY", dummy_cal) def test_force_registration(self, dispatcher, dummy_cal_type): @@ -341,6 +337,33 @@ def get_sessions_minutes( return dtis[0].union_many(dtis[1:]) + def get_session_minutes( + self, session: pd.Timestamp + ) -> tuple[pd.DatetimeIndex, ...]: + """Get trading minutes a single `session`. + + Returns + ------- + tuple[pd.DatetimeIndex, ...] + If `session` has a break, returns 2-tuple where: + [0] minutes of am session. + [1] minutes of pm session. + If `session` does not have a break, returns 1-tuple with + element holding minutes of session. + """ + first = self.first_minutes[session] + last = self.last_minutes[session] + last_am = self.last_am_minutes[session] + first_pm = self.first_pm_minutes[session] + + if pd.isna(last_am): + return (pd.date_range(first, last, freq="T"),) + else: + return ( + pd.date_range(first, last_am, freq="T"), + pd.date_range(first_pm, last, freq="T"), + ) + # --- Evaluated general calendar properties --- @functools.lru_cache(maxsize=4) @@ -2010,7 +2033,9 @@ def early_closes( def test_base_integrity(self, calendar_cls, non_valid_overrides): cls = calendar_cls for name in non_valid_overrides: - assert getattr(cls, name) == getattr(ExchangeCalendar, name) + on_cls, on_base = getattr(cls, name), getattr(ExchangeCalendar, name) + # covers properties, instance methods and class mathods... + assert (on_cls == on_base or on_cls.__qualname__ == on_base.__qualname__) def test_calculated_against_csv(self, default_calendar_with_answers): calendar, ans = default_calendar_with_answers @@ -2063,7 +2088,7 @@ def test_invalid_input(self, calendar_cls, sides, default_answers, name): f" there would be no sessions between the requested `start` ('{start}')" f" and `end` ('{end}') dates." ) - with pytest.raises(NoSessionsError, match=re.escape(error_msg)): + with pytest.raises(errors.NoSessionsError, match=re.escape(error_msg)): calendar_cls(start=start, end=end) def test_bound_start(self, calendar_cls, start_bound, today): @@ -3107,6 +3132,95 @@ def test_minutes_count_for_sessions_in_range(self, all_calendars_with_answers): # Additional belt-and-braces test to reconcile with cal.all_minutes assert f(ans.first_session, ans.last_session) == len(cal.all_minutes) + def test_trading_index(self, calendars, answers): + """Test trading index with options as default values. + + Tests multitude of concrete cases covering product of all + session blocks and various periods. + + Assumes default value (False) for each of `force_close`, + `force_break_close` and `curtail_overlaps`. See test class + `test_exchange_calendars.TestTradingIndex` for more comprehensive + fuzz testing of select calendars (and parsing testing). + """ + cal, ans = calendars["left"], answers["left"] + + def unite(dtis: list[pd.DatetimeIndex]) -> pd.DatetimeIndex: + return dtis[0].append(dtis[1:]) # append to not sort or remove duplicates + + for _, sessions in ans.session_block_generator(): + for mins in [5, 17, 60, 123, 333, 1033]: + period = pd.Timedelta(mins, "T") + dtis = [] + for session in sessions: + indexes = ans.get_session_minutes(session) + for i, index in enumerate(indexes): + # Create closed 'both' trading index for each session/subsession + if i == 0 and len(indexes) == 2: + ends = ans.break_starts + else: + ends = ans.closes + # index for a 'left' calendar, add end so evaluated as if 'both' + index = index.append(pd.DatetimeIndex([ends[session]])) + + index = index[::mins] # only want every period + if not index[-1] == ends[session]: + # if period doesn't coincide with end, add right side of + # last interval which lies beyond end. + last_indice = index[-1] + period + index = index.append(pd.DatetimeIndex([last_indice])) + dtis.append(index) + + both_index = unite(dtis) + left_index = unite([dti[:-1] for dti in dtis]) + right_index = unite([dti[1:] for dti in dtis]) + neither_index = unite([dti[1:-1] for dti in dtis]) + + overlaps = (right_index[:-1] > left_index[1:]).any() + if overlaps: + both_overlaps = overlaps + else: + both_overlaps = False + for dti, next_dti in zip(dtis, dtis[1:]): + if dti[-1] == next_dti[0]: + both_overlaps = True + break + + def get_index(closed: str, intervals: bool): + start, end = sessions[0], sessions[-1] + return cal.trading_index( + start, end, period, intervals, closed, parse=False + ) + + def tst_indices_index( + expected: pd.DatetimeIndex, closed: str, overlaps: bool + ): + if not overlaps: + rtrn = get_index(closed, False) + pd.testing.assert_index_equal(expected, rtrn) + else: + with pytest.raises(errors.IndicesOverlapError): + get_index(closed, False) + + tst_indices_index(both_index, "both", both_overlaps) + tst_indices_index(left_index, "left", False) + tst_indices_index(right_index, "right", overlaps) + tst_indices_index(neither_index, "neither", False) + + def tst_intervals_index(closed: str, overlaps: bool): + if not overlaps: + rtrn = get_index(closed, True) + expected = pd.IntervalIndex.from_arrays( + left_index, right_index, closed + ) + pd.testing.assert_index_equal(expected, rtrn) + else: + with pytest.raises(errors.IntervalsOverlapError): + get_index(closed, True) + + tst_intervals_index("left", overlaps) + tst_intervals_index("right", overlaps) + class EuronextCalendarTestBase(ExchangeCalendarTestBase): """Common calendar-specific fixtures for Euronext exchanges.""" From 4d12163d793f3789dcd622d4abc00ab1e816c867 Mon Sep 17 00:00:00 2001 From: Marcus Read Date: Fri, 15 Oct 2021 23:52:44 +0100 Subject: [PATCH 2/2] Update setup.py Adds hypothesis to extras_require. --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 8ab1de37..f3d2dc18 100644 --- a/setup.py +++ b/setup.py @@ -81,6 +81,7 @@ "pytest-benchmark", "pytest-xdist", "pip-tools", + "hypothesis", ], }, )