forked from spragunr/deep_q_rl
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathupdates.py
126 lines (97 loc) · 4.82 KB
/
updates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""
Gradient update rules for the deep_q_rl package.
Some code here is modified from the Lasagne package:
https://github.com/Lasagne/Lasagne/blob/master/LICENSE
"""
import theano
import theano.tensor as T
from lasagne.updates import get_or_compute_grads
from collections import OrderedDict
import numpy as np
# The MIT License (MIT)
# Copyright (c) 2014 Sander Dieleman
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# The MIT License (MIT)
# Copyright (c) 2014 Sander Dieleman
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
def deepmind_rmsprop(loss_or_grads, params, learning_rate,
rho, epsilon):
"""RMSProp updates [1]_.
Scale learning rates by dividing with the moving average of the root mean
squared (RMS) gradients.
Parameters
----------
loss_or_grads : symbolic expression or list of expressions
A scalar loss expression, or a list of gradient expressions
params : list of shared variables
The variables to generate update expressions for
learning_rate : float or symbolic scalar
The learning rate controlling the size of update steps
rho : float or symbolic scalar
Gradient moving average decay factor
epsilon : float or symbolic scalar
Small value added for numerical stability
Returns
-------
OrderedDict
A dictionary mapping each parameter to its update expression
Notes
-----
`rho` should be between 0 and 1. A value of `rho` close to 1 will decay the
moving average slowly and a value close to 0 will decay the moving average
fast.
Using the step size :math:`\\eta` and a decay factor :math:`\\rho` the
learning rate :math:`\\eta_t` is calculated as:
.. math::
r_t &= \\rho r_{t-1} + (1-\\rho)*g^2\\\\
\\eta_t &= \\frac{\\eta}{\\sqrt{r_t + \\epsilon}}
References
----------
.. [1] Tieleman, T. and Hinton, G. (2012):
Neural Networks for Machine Learning, Lecture 6.5 - rmsprop.
Coursera. http://www.youtube.com/watch?v=O3sxAc4hxZU (formula @5:20)
"""
grads = get_or_compute_grads(loss_or_grads, params)
updates = OrderedDict()
for param, grad in zip(params, grads):
value = param.get_value(borrow=True)
acc_grad = theano.shared(np.zeros(value.shape, dtype=value.dtype),
broadcastable=param.broadcastable)
acc_grad_new = rho * acc_grad + (1 - rho) * grad
acc_rms = theano.shared(np.zeros(value.shape, dtype=value.dtype),
broadcastable=param.broadcastable)
acc_rms_new = rho * acc_rms + (1 - rho) * grad ** 2
updates[acc_grad] = acc_grad_new
updates[acc_rms] = acc_rms_new
updates[param] = (param - learning_rate *
(grad /
T.sqrt(acc_rms_new - acc_grad_new **2 + epsilon)))
return updates