-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathassignment2.py
46 lines (31 loc) · 1.05 KB
/
assignment2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import numpy as np
import tools
import grader
def evaluate_policy(env, V, pi, gamma, theta):
delta = float('inf')
while delta > theta:
delta = 0
for s in env.S:
v = V[s]
bellman_update(env, V, pi, s, gamma)
delta = max(delta, abs(v - V[s]))
return V
def bellman_update(env, V, pi, s, gamma):
"""Mutate ``V`` according to the Bellman update equation."""
# YOUR CODE HERE
raise NotImplementedError()
# set up test environment
num_spaces = 10
num_prices = 4
env = tools.ParkingWorld(num_spaces, num_prices)
# build test policy
city_policy = np.zeros((num_spaces + 1, num_prices))
city_policy[:, 1] = 1
gamma = 0.9
theta = 0.1
V = np.zeros(num_spaces + 1)
V = evaluate_policy(env, V, city_policy, gamma, theta)
# test the value function
answer = [80.04, 81.65, 83.37, 85.12, 86.87, 88.55, 90.14, 91.58, 92.81, 93.78, 87.77]
# make sure the value function is within 2 decimal places of the correct answer
assert grader.near(V, answer, 1e-2)