-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathhack_findmap.py
173 lines (129 loc) · 5.29 KB
/
hack_findmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
'''
Wholly copied and modified from:
pymc/tuning/starting.py
https://github.com/pymc-devs/pymc3/commit/7427adb98c3fc6a7617415e9309c2bf0dc80d8bb
Purpose is to create a find_MAP() that does NOT compute gradients if not
required. This is a very dirty hack and I intend to edit in a new branch
of pymc3 and submit a pull request
@editor: jon.sedar@applied.ai
@date: 25 Nov 2015
'''
from scipy import optimize
import numpy as np
from numpy import isfinite, nan_to_num, logical_not
from pymc3.core import *
from pymc3.vartypes import discrete_types, typefilter
from inspect import getargspec
__all__ = ['find_MAP', 'scipyminimize']
def find_MAP(start=None, vars=None, fmin=None, return_raw=False,
disp=False, model=None, *args, **kwargs):
"""
CONTAINS LOCAL HACK TO AVOID CALCULATING THE GRADIENT WHEN WE DON'T NEED IT.
DOING SO CAUSES MEMORY OVERFLOWS WHEN USING LARGE DATASET (100000 x 15ish )
Sets state to the local maximum a posteriori point given a model.
Current default of fmin_Hessian does not deal well with optimizing close
to sharp edges, especially if they are the minimum.
Parameters
----------
start : `dict` of parameter values (Defaults to `model.test_point`)
vars : list
List of variables to set to MAP point (Defaults to all continuous).
fmin : function
Optimization algorithm (Defaults to `scipy.optimize.fmin_bfgs` unless
discrete variables are specified in `vars`, then
`scipy.optimize.fmin_powell` which will perform better).
return_raw : Bool
Whether to return extra value returned by fmin (Defaults to `False`)
disp : Bool
Display helpful warnings, and verbose output of `fmin` (Defaults to
`False`)
model : Model (optional if in `with` context)
*args, **kwargs
Extra args passed to fmin
"""
model = modelcontext(model)
if start is None:
start = model.test_point
if vars is None:
vars = model.cont_vars
vars = inputvars(vars)
disc_vars = list(typefilter(vars, discrete_types))
if disc_vars and disp:
print("Warning: vars contains discrete variables. MAP " +
"estimates may not be accurate for the default " +
"parameters. Defaulting to non-gradient minimization " +
"fmin_powell.")
if fmin is None:
if disc_vars:
fmin = optimize.fmin_powell
else:
fmin = optimize.fmin_bfgs
allinmodel(vars, model)
start = Point(start, model=model)
bij = DictToArrayBijection(ArrayOrdering(vars), start)
logp = bij.mapf(model.fastlogp)
## dlogp = bij.mapf(model.fastdlogp(vars))
def logp_o(point):
return nan_to_high(-logp(point))
def grad_logp_o(point):
return nan_to_num(-dlogp(point))
# Check to see if minimization function actually uses the gradient
## if 'fprime' in getargspec(fmin).args:
## r = fmin(logp_o, bij.map(
## start), fprime=grad_logp_o, disp=disp, *args, **kwargs)
## else:
r = fmin(logp_o, bij.map(start), disp=disp, *args, **kwargs)
if isinstance(r, tuple):
mx0 = r[0]
else:
mx0 = r
mx = bij.rmap(mx0)
if (not allfinite(mx0) or
not allfinite(model.logp(mx)) ## or
## not allfinite(model.dlogp()(mx))
):
messages = []
for var in vars:
vals = {
"value" : mx[var.name],
"logp" : var.logp(mx) ## ,
## "dlogp" : var.dlogp()(mx)
}
def message(name, values):
if np.size(values) < 10:
return name + " bad: " + str(values)
else:
idx = np.nonzero(logical_not(isfinite(values)))
return name + " bad at idx: " + str(idx) + " with values: " + str(values[idx])
messages += [
message(var.name + "." + k, v)
for k,v in vals.items()
if not allfinite(v)]
specific_errors = '\n'.join(messages)
raise ValueError("Optimization error: max, logp " + ## or dlogp at " +
"max have non-finite values. Some values may be " +
"outside of distribution support. max: " +
repr(mx) + " logp: " + repr(model.logp(mx)) +
## " dlogp: " + repr(model.dlogp()(mx)) +
"Check that " +
"1) you don't have hierarchical parameters, " +
"these will lead to points with infinite " +
"density. 2) your distribution logp's are " +
"properly specified. Specific issues: \n" +
specific_errors)
mx = {v.name: mx[v.name].astype(v.dtype) for v in model.vars}
if return_raw:
return mx, r
else:
return mx
def allfinite(x):
return np.all(isfinite(x))
def nan_to_high(x):
return np.where(isfinite(x), x, 1.0e100)
def scipyminimize(f, x0, fprime, *args, **kwargs):
r = scipy.optimize.minimize(f, x0, jac=fprime, *args, **kwargs)
return r.x, r
def allinmodel(vars, model):
notin = [v for v in vars if v not in model.vars]
if notin:
raise ValueError("Some variables not in the model: " + str(notin))