-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathutils.py
160 lines (125 loc) · 3.29 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import pandas as pd
import numpy as np
### Initial Operations
def returns(df):
"""
close-to-close returns
"""
return df.close / df.close.shift(1) - 1
def vwap(df):
"""
volume-weighted average price
"""
return (df.volume * df.close) / df.volume
def adv(df, d):
"""
adv{d} = average daily dollar volume for the past d days
"""
return df.volume.rolling(d).mean()
###
def rank(df):
"""
Cross-sectional percentile rank.
:param df:
:return:
"""
return df.rank(pct=True)
def stddev(df, d):
"""
Rolling standard deviation over the last d days.
:param df:
:param d:
:return:
"""
return df.rolling(d).std()
def scale(df, a=1):
"""
rescaled x such that sum(abs(x)) = a (the default is a = 1)
:param df:
:param a:
:return:
"""
return df.abs() / df.replace([np.inf, -np.inf], np.nan).abs().sum(skipna=True)
def product(df, d):
"""
time-series product over the past d days
:param df: data frame containing prices
:param d: number of days to look back (rolling window)
:return: Pandas series
"""
return df.rolling(d).apply(np.product)
def decay_linear(df, d):
"""
weighted moving average over the past d days with linearly decaying
weights d, d – 1, …, 1 (rescaled to sum up to 1)
:param df: data frame containing prices
:param d: number of days to look back (rolling window)
:return: Pandas series
"""
return df.ewm(d).mean()
def delta(df, d):
"""
today’s value of x minus the value of x d days ago
"""
return df - df.shift(d)
def corr(x, y, d):
"""
time-serial correlation of x and y for the past d days
"""
return x.rolling(d).corr(y)
def cov(x, y, d):
"""
time-serial covariance of x and y for the past d days
"""
return x.rolling(d).cov(y)
def delay(df, d):
"""
value of x d days ago
"""
return df.shift(d)
### Time-Series Operations
def ts_max(df, d=10):
"""
The rolling max over the last d days.
:param df: data frame containing prices
:param d: number of days to look back (rolling window)
:return: Pandas series
"""
return df.rolling(d).max()
def ts_min(df, d=10):
"""
The rolling min over the last d days.
:param df: data frame containing prices
:param d: number of days to look back (rolling window)
:return: Pandas series
"""
return df.rolling(d).min()
def ts_argmax(df, d):
"""
Gets the day, ts_max(x, d) occured on.
:param df: dataframe
:param d: number of days to look back (rolling window)
:return: Pandas Series
"""
return df.rolling(d).apply(np.argmax).add(1)
def ts_argmin(df, d):
"""
Gets the day, ts_min(x, d) occured on.
:param df: dataframe
:param d: number of days to look back (rolling window)
:return: Pandas Series
"""
return df.rolling(d).apply(np.argmin).add(1)
def ts_rank(df, d):
"""
time-series rank in the past d days
:param df: dataframe
:param d: number of days to look back (rolling window)
:return: Pandas Series
"""
return df.rolling(d).apply(lambda x: pd.Series(x).rank(pct=True).iloc[-1])
def ts_sum(df, d):
"""
time-series sum over the past d days
"""
return df.rolling(d).sum()