-
Notifications
You must be signed in to change notification settings - Fork 60
/
Copy pathutils.py
98 lines (68 loc) · 3.99 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
def layer(input_layer, num_next_neurons, is_output=False):
num_prev_neurons = int(input_layer.shape[1])
shape = [num_prev_neurons, num_next_neurons]
if is_output:
weight_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)
bias_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)
else:
# 1/sqrt(f)
fan_in_init = 1 / num_prev_neurons ** 0.5
weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
weights = tf.get_variable("weights", shape, initializer=weight_init)
biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init)
dot = tf.matmul(input_layer, weights) + biases
if is_output:
return dot
relu = tf.nn.relu(dot)
return relu
def layer_goal_nn(input_layer, num_next_neurons, is_output=False):
num_prev_neurons = int(input_layer.shape[1])
shape = [num_prev_neurons, num_next_neurons]
fan_in_init = 1 / num_prev_neurons ** 0.5
weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
weights = tf.get_variable("weights", shape, initializer=weight_init)
biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init)
dot = tf.matmul(input_layer, weights) + biases
if is_output:
return dot
relu = tf.nn.relu(dot)
return relu
# Below function prints out options and environment specified by user
def print_summary(FLAGS,env):
print("\n- - - - - - - - - - -")
print("Task Summary: ","\n")
print("Environment: ", env.name)
print("Number of Layers: ", FLAGS.layers)
print("Time Limit per Layer: ", FLAGS.time_scale)
print("Max Episode Time Steps: ", env.max_actions)
print("Retrain: ", FLAGS.retrain)
print("Test: ", FLAGS.test)
print("Visualize: ", FLAGS.show)
print("- - - - - - - - - - -", "\n\n")
# Below function ensures environment configurations were properly entered
def check_validity(model_name, goal_space_train, goal_space_test, end_goal_thresholds, initial_state_space, subgoal_bounds, subgoal_thresholds, max_actions, timesteps_per_action):
# Ensure model file is an ".xml" file
assert model_name[-4:] == ".xml", "Mujoco model must be an \".xml\" file"
# Ensure upper bounds of range is >= lower bound of range
if goal_space_train is not None:
for i in range(len(goal_space_train)):
assert goal_space_train[i][1] >= goal_space_train[i][0], "In the training goal space, upper bound must be >= lower bound"
if goal_space_test is not None:
for i in range(len(goal_space_test)):
assert goal_space_test[i][1] >= goal_space_test[i][0], "In the training goal space, upper bound must be >= lower bound"
for i in range(len(initial_state_space)):
assert initial_state_space[i][1] >= initial_state_space[i][0], "In initial state space, upper bound must be >= lower bound"
for i in range(len(subgoal_bounds)):
assert subgoal_bounds[i][1] >= subgoal_bounds[i][0], "In subgoal space, upper bound must be >= lower bound"
# Make sure end goal spaces and thresholds have same first dimension
if goal_space_train is not None and goal_space_test is not None:
assert len(goal_space_train) == len(goal_space_test) == len(end_goal_thresholds), "End goal space and thresholds must have same first dimension"
# Makde sure suboal spaces and thresholds have same dimensions
assert len(subgoal_bounds) == len(subgoal_thresholds), "Subgoal space and thresholds must have same first dimension"
# Ensure max action and timesteps_per_action are postive integers
assert max_actions > 0, "Max actions should be a positive integer"
assert timesteps_per_action > 0, "Timesteps per action should be a positive integer"