-
Notifications
You must be signed in to change notification settings - Fork 0
/
envs.py
85 lines (66 loc) · 2.44 KB
/
envs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""
Environment(s):
-- Portfolio allocation with different price dynamics
-- Saved dynamics for Github
@date: Sept 2024
@author: Anthony Coache
"""
# numpy
import numpy as np
# pytorch
import torch as T
class Environment():
def __init__(self, params):
"""constructor
"""
self.params = params # all parameters
self.device = params["device"]
self.r0 = params["r0"] # initial interest rate
self.w0 = params["w0"] # initial wealth
self.T = params["T"] # time horizon of the problem
self.dt = params["dt"] # time elapsed between periods
self.sqrt_dt = np.sqrt(self.dt)
self.t = T.linspace(0, self.T, int(self.T/self.dt+1)) # time space
self.S_t = T.load(
'sim_' + str(len(self.t)-1) + 'pers.pt', map_location=self.device)
self.names_tickers = T.load(
'names_tickers.pt', map_location=self.device)
self.S0 = self.S_t[0,0,:].to(self.device)
def __repr__(self):
return f"<Environment T:{self.T}, dt:{self.dt}>"
def __str__(self):
descrp = "*** Portfolio allocation environment -- Saved dynamics ***\n"
for key, val in self.params.items():
descrp += '* {}: {}\n'.format(key, val)
return descrp
def reset(self, Nsims=1):
"""initialization of the environment with initial states
Parameters
----------
Nsims : int
Number of realizations for the initial state
Returns
-------
t_0, w_0 : torch.Tensor
Tensors for the initial time, prices, and wealth
"""
t_0 = T.zeros(Nsims, device=self.device)
w_0 = self.w0 * T.ones(Nsims, device=self.device)
return t_0, w_0
def step(self, t_t, S_t, S_tp1, w_t, pi_t):
"""simulation engine
Parameters
----------
t_t, S_t, w_t, pi_t : torch.Tensor
Tensors for the time, prices, wealth and policy
Returns
-------
t_tp1, S_tp1, w_tp1 : torch.Tensor
Tensors for the next time, prices, and wealth
cost : torch.Tensor
Tensor for the cost during this period
"""
t_tp1 = t_t + self.dt # update time step
w_tp1 = w_t * T.sum(pi_t * S_tp1 / S_t, axis=-1) # update wealth
reward = w_tp1 - w_t # compute reward
return t_tp1, w_tp1, -T.clamp(reward, min=-5.0, max=5.0)