-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
258 lines (236 loc) · 8.85 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import tensorflow as tf
from tensorflow.contrib.solvers.python.ops.util import l2norm
import numpy as np
import cv2 as cv
from baselines.a2c.utils import conv, lstm, lnlstm, conv_to_fc, fc, ortho_init
def grad_clip(loss, max_grad_norm, scope_list):
'''
:param loss:
:param params:
:param max_grad_norm:
:param scope: a list consist of variable scopes
:return:
'''
params_list = []
for scope in scope_list:
List = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = scope)
print(len(List))
params_list += List
grads = tf.gradients(loss, params_list)
# for i, grad in enumerate(grads):
# if grad is None:
# grads[i] = tf.zeros(shape=params_list[i].get_shape(), dtype=params_list[i].dtype)
global_norm = 0.
if max_grad_norm is not None:
grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
global_norm = tf.sqrt(sum([l2norm(t) ** 2 for t in grads]))
grads = list(zip(grads, params_list))
return grads, global_norm
def grad_clip_joint(loss_joint, max_grad_norm, scope_list_joint):
'''
:param loss_joint: [loss1, loss2, loss3, ...]
:param max_grad_norm:
:param scope_list_joint: [scope_list1(with respect to loss1), scope_list2, scope_list3, ..,]
:return:
'''
grads_joint = []
params_list_joint = []
seg_points = [int(0)]
for i, loss in enumerate(loss_joint):
params_list = []
for scope in scope_list_joint[i]:
List = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
print(len(List))
params_list += List
grads_joint += tf.gradients(loss, params_list)
params_list_joint += params_list
seg_points.append(len(params_list))
if max_grad_norm is not None:
grads_joint, grad_norm = tf.clip_by_global_norm(grads_joint, max_grad_norm)
grads_joint_return = []
global_norm_return = []
for i in range(len(seg_points)-1):
grads = grads_joint[seg_points[i]:seg_points[i+1]]
params_list = params_list_joint[seg_points[i]:seg_points[i+1]]
global_norm = tf.sqrt(sum([l2norm(t) ** 2 for t in grads]))
grads = list(zip(grads, params_list))
grads_joint_return.append(grads)
global_norm_return.append(global_norm)
return grads_joint_return, global_norm_return
def l2norm1(a):
'''
:param a: shape = [bs, dim]
:return: l2norm of a along axis = 1, shape = [bs]
'''
return tf.sqrt(tf.reduce_sum(tf.square(a), 1))
def compute_cosine(a, b):
'''
:param a: shape = [bs, dim]
:param b: shape = [bs, dim]
:return: cosine similarity along axis=1
'''
numer = tf.reduce_sum(a*b, axis=1)
deno = tf.stop_gradient(l2norm1(a)*l2norm1(b))
return numer / (deno + 1e-3)
def np_l2norm1(a):
return np.sqrt(np.sum(np.square(a), axis=1))
def np_compute_cosine(a, b):
numer = np.sum(a*b, axis=1)
deno = np_l2norm1(a) * np_l2norm1(b)
return numer / (deno + 1e-3)
def explained_variance(ypred,y):
"""
Computes fraction of variance that ypred explains about y.
Returns 1 - Var[y-ypred] / Var[y]
interpretation:
ev=0 => might as well have predicted zero
ev=1 => perfect prediction
ev<0 => worse than just predicting zero
"""
assert y.ndim == 1 and ypred.ndim == 1
vary = np.var(y)
return np.nan if vary==0 else 1 - np.var(y-ypred)/vary
def batch_norm(x, scope, nstep, var_eps = 1e-3):
'''
:param x:
:param scope:
:param worker_step: time step of worker
:param var_eps:
:return:
'''
if nstep == 1:
return x
shape = list(x.get_shape())
mean, var = tf.nn.moments(x, axes = list(range(len(shape) - 1)))
depth = shape[-1].value
with tf.variable_scope(scope):
scale = tf.get_variable(shape = [depth], name = 'scale', dtype = tf.float32)
offset = tf.get_variable(shape = [depth], name = 'offset', dtype = tf.float32)
x_ = scale * ((x - mean) / tf.sqrt(var + var_eps)) + offset
return x_
def show_img(img, scope):
# print(img.shape)
# print((img[:,:,0:1] == img[:,:,1:2]).all())
# print(img.shape)
cv.namedWindow(scope, cv.WINDOW_AUTOSIZE)
cv.imshow(scope, cv.resize(img[:,:,0], (3*84, 3*84)))
cv.waitKey(1)
def conv_unequ_size(x, scope, *, nf, rf, stride, pad='VALID', init_scale=1.0, data_format='NHWC'):
'''
:param x:
:param scope:
:param nf:
:param rf: a 2-element list, [kernel_h, kernel_w]
:param stride: a 2-element list, [h, w]
:param pad:
:param init_scale:
:param data_format:
:return:
'''
if data_format == 'NHWC':
channel_ax = 3
strides = [1, stride[0], stride[1], 1]
bshape = [1, 1, 1, nf]
elif data_format == 'NCHW':
channel_ax = 1
strides = [1, 1, stride[0], stride[1]]
bshape = [1, nf, 1, 1]
else:
raise NotImplementedError
nin = x.get_shape()[channel_ax].value
wshape = [rf[0], rf[1], nin, nf]
with tf.variable_scope(scope):
w = tf.get_variable("w", wshape, initializer=ortho_init(init_scale))
b = tf.get_variable("b", [1, nf, 1, 1], initializer=tf.constant_initializer(0.0))
if data_format == 'NHWC': b = tf.reshape(b, bshape)
return b + tf.nn.conv2d(x, w, strides=strides, padding=pad, data_format=data_format)
def deconv(x, scope, *, nf, rf, stride, output_shape, pad='VALID', init_scale=1.0, data_format='NHWC'):
if data_format == 'NHWC':
channel_ax = 3
strides = [1, stride, stride, 1]
bshape = [1, 1, 1, nf]
elif data_format == 'NCHW':
channel_ax = 1
strides = [1, 1, stride, stride]
bshape = [1, nf, 1, 1]
else:
raise NotImplementedError
nin = x.get_shape()[channel_ax].value
wshape = [rf, rf, nf, nin]
with tf.variable_scope(scope):
w = tf.get_variable("w", wshape, initializer=ortho_init(init_scale))
b = tf.get_variable("b", [1, nf, 1, 1], initializer=tf.constant_initializer(0.0))
if data_format == 'NHWC': b = tf.reshape(b, bshape)
return b + tf.nn.conv2d_transpose(x, w, output_shape, strides=strides, padding=pad, data_format=data_format)
def deconv_unequ_size(x, scope, *, nf, rf, stride, output_shape, pad='VALID', init_scale=1.0, data_format='NHWC'):
'''
:param x:
:param scope:
:param nf:
:param rf: a 2-element list, [kernel_h, kernel_w]
:param stride: a 2-element list, [h, w]
:param pad:
:param init_scale:
:param data_format:
:return:
'''
if data_format == 'NHWC':
channel_ax = 3
strides = [1, stride[0], stride[1], 1]
bshape = [1, 1, 1, nf]
elif data_format == 'NCHW':
channel_ax = 1
strides = [1, 1, stride[0], stride[1]]
bshape = [1, nf, 1, 1]
else:
raise NotImplementedError
nin = x.get_shape()[channel_ax].value
wshape = [rf[0], rf[1], nf, nin]
with tf.variable_scope(scope):
w = tf.get_variable("w", wshape, initializer=ortho_init(init_scale))
b = tf.get_variable("b", [1, nf, 1, 1], initializer=tf.constant_initializer(0.0))
if data_format == 'NHWC': b = tf.reshape(b, bshape)
return b + tf.nn.conv2d_transpose(x, w, output_shape, strides=strides, padding=pad, data_format=data_format)
def compute_ir(h, goal):
ir = 1. * (-np_l2norm1(h - goal) / (np_l2norm1(goal) + np_l2norm1(h)))
mask = (ir < -0.03).astype(ir.dtype)
ir = mask*ir + (1-mask)*(ir+10.)
return ir
def obs_preprocess(obs):
'''
:param obs: input shape = [bs, h, w, c=3*x]
:return: output shape = [bs, 84, 84, c = x]
'''
frame_num = obs.shape[-1] // 3
obs_list = [obs[:, :, :, i:i+3] for i in range(frame_num)]
obs_gray_list = [single_obs.mean(-1, keepdims=True).astype(np.float32) for single_obs in obs_list]
obs_gray = np.concatenate(tuple(obs_gray_list), axis=-1) / 255.
obs_ = []
for i in range(obs.shape[0]):
obs_.append(cv.resize(obs_gray[i], (84, 84)))
return np.asarray(obs_, dtype=np.float32)
def noise_and_argmax(logits):
# Add noise then take the argmax
p = tf.nn.softmax(logits=logits)
a = tf.multinomial(p, num_samples=1)
# noise = tf.random_uniform(tf.shape(logits))
# return tf.argmax(logits, 1)
return tf.squeeze(a, axis=-1)
def dense_layers(h, cell, name, activ=tf.nn.relu, num_layers=1):
h_ = h
for i in range(num_layers):
h_ = activ(fc(h_, name+str(i+1), nh=cell))
return h_
def generate_noise(size):
'''
:param size: a integer tuple
:return:
'''
return np.random.normal(0., 0.1, size=size)
def tf_l2norm(X, axis=-1, keep_dims=False):
return tf.sqrt(tf.reduce_sum(X*X, axis=axis, keep_dims=keep_dims))
def tf_normalize(X, axis=0):
mean, var = tf.nn.moments(X, axes=axis, keep_dims=True)
return -(X - tf.stop_gradient(mean)) / (tf.sqrt(tf.stop_gradient(var)) + 1e-3)
def mse(pred, target):
return tf.square(pred-target)/2.