-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWeedsPy_MCMC.py
620 lines (479 loc) · 23.9 KB
/
WeedsPy_MCMC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
# =============================================================================
# WeedsPy_MCMC.py #
# Author: Dr. Gwenllian M. Williams #
# =============================================================================
# Uses emcee to explore parameters of synthetic spectra created by WeedsPy_MCMC.class #
# =============================================================================
import os
import numpy as np
import emcee
from astropy.io import fits
import matplotlib.pyplot as plt
import corner
#matplotlib.use('TkAgg')
"""
Below is the WeedsPy_MCMC class and the read_params_file function, and an
example of a script that uses these functions.
Both the class, and the executable script, must be present in the same .py file:
Any Sic commands made to Gildas/CLASS in Python scripts that are placed in a
separate module script would not be recognised by Gildas/CLASS due to the
nesting. Sic commands must be placed in the primary Python script you execute,
Therefore you cannot place the below WeedsPy_MCMC class and the read_params_file
function in a separate module.
You must place them instead in the preamble of your executable script.
Place your executable scripts after: if __name__ == '__main__':
"""
def read_params_file(paramfile):
"""
Function to read in the parameter file.
This function is defined outside of the WeedsPy_MCMC class object,
because it reads the input parameters that will be passed later
to the WeedsPy_MCMC class
"""
params={}
fileObject = open(paramfile)
# Read parameter file into a dictionary, skipping empty lines and comments
for line in fileObject:
if(line=="\n"):
continue
if(line[0]=="#"):
continue
words=line.split()
params[words[0].strip()] = words[2].strip()
# Some catalog entries for molecules have whitespace:
# In this case, need to merge words[2] and words[3], and include quotation marks so that CLASS can read it.
if words[0]=='molecule':
if ',' in words[2]:
print('There is a comma in the molecule string')
molname = '\"'+words[2]+' '+words[3]+'\"'
params[words[0].strip()] = molname
return params
class WeedsPy_MCMC:
def __init__(self,
catalog_name,
molecule_name,
source_size,
rms_noise,
rms_noise_unit,
n_iter,
n_burn,
n_walkers,
n_dim,
column_base,
priors,
initial,
mean_freq,
bmaj,
bmin,
freq_unit,
telescope_diameter,
vel_sys,
vel_width,
):
"""
This class takes in the spectrum to the modelled by Weeds, and uses
emcee to determine the parameters of the synthetic spectra.
This is not a best fit spectrum, rather a highest likelihood spectrum.
Parameters:
"""
self.catalog_name = catalog_name
self.molecule_name = molecule_name
self.source_size = source_size
self.rms_noise = rms_noise
self.rms_noise_unit = rms_noise_unit
self.n_iter = n_iter
self.n_burn = n_burn
self.n_walkers = n_walkers
self.n_dim = n_dim
self.column_base = column_base
self.priors = priors
self.initial = initial
self.mean_freq = mean_freq
self.bmaj = bmaj
self.bmin = bmin
self.freq_unit = freq_unit
self.telescope_diameter = telescope_diameter
self.vel_sys = vel_sys
self.vel_width = vel_width
def set_gildas_variables(self):
"""
Function to set the variables in Gildas/CLASS
"""
# Define CLASS variables:
Sic.comm('define character*128 peakname')
Sic.comm('define character*128 molecule')
Sic.comm('define real telescope_diameter')
Sic.comm('define character*128 t_cont')
Sic.comm('define character*128 pixi')
Sic.comm('define character*128 pixj')
Sic.comm('define character*128 min_spec')
Sic.comm('define character*128 max_spec')
# Pass some of these input parameters to CLASS, and set some plot ranges:
Sic.comm('let molecule '+str(self.molecule_name))
Sic.comm('use in '+str(self.catalog_name))
Sic.comm('set unit f')
Sic.comm('let telescope_diameter '+str(self.telescope_diameter))
return
def make_gildas_mdl_file(self,theta):#,ii=None,jj=None):
"""
This is a function to make the Gildas/CLASS readable text file (.mdl extension) that contains the synthetic spectrum model parameters walked to by the emcee.
"""
# If you want to model column density, temperature, centroid velocity and velocity width:
if self.n_dim == 4:
# Unpack the theta array of the walked to parameter values:
a1_N, a2_T, a3_v, a4_dv = theta
# Model properties
coldens = a1_N*self.column_base
temp = a2_T
v_off = a3_v
v_width = a4_dv
# =========================================================================
# Create the model file that CLASS will read:
# =========================================================================
# Create arrays to write to the .mdl files, which CLASS will read
hd1 = np.array(["! species", "Ntot", "Tex", "source_size", "v_off", "width"])
hd2 = np.array(["!", "(cm-2)","(K)", "('')", "(km/s)", "(km/s)"])
hd3 = np.array([self.molecule_name,coldens,temp,self.source_size,v_off,v_width])
mdl=[hd1,hd2,hd3]
# If you want to model all 5 parameters:
elif self.n_dim == 5:
a1_N, a2_T, a3_v, a4_dv, a5_ss = theta
# Model properties
coldens = a1_N*self.column_base
temp = a2_T
v_off = a3_v
v_width = a4_dv
source_size = a5_ss
# Create arrays to write to the .mdl files, which CLASS will read
hd1 = np.array(["! species", "Ntot", "Tex", "source_size", "v_off", "width"])
hd2 = np.array(["!", "(cm-2)","(K)", "('')", "(km/s)", "(km/s)"])
hd3 = np.array([self.molecule_name,coldens,temp,source_size,v_off,v_width])
mdl=[hd1,hd2,hd3]
# If you want to model only column density and velocity width:
elif self.n_dim == 2:
a1_N, a2_T = theta
# Model properties
coldens = a1_N*self.column_base
temp = a2_T
# Create arrays to write to the .mdl files, which CLASS will read
hd1 = np.array(["! species", "Ntot", "Tex", "source_size", "v_off", "width"])
hd2 = np.array(["!", "(cm-2)","(K)", "('')", "(km/s)", "(km/s)"])
hd3 = np.array([self.molecule_name,coldens,temp,self.source_size,self.vel_sys,self.vel_width])
mdl=[hd1,hd2,hd3]
# If n_dim is not 4, 5 or 2, raise a ValueError
else:
raise ValueError("n_dim must be either 4, 5 or 2. Code does not yet support modelling of 3 or only 1 parameters.")
return mdl
def main_emcee(self,xdata,ydata):
"""
This is the main function that will carry out the emcee process
"""
def model(theta,ii=None,jj=None):#,save_best=None):#,best_model_flag=None):
"""
Function to evaluate the model of the data
"""
# Make the .mdl file that Gildas/CLASS will need to make the synthetic spectrum and save it
mdl = self.make_gildas_mdl_file(theta)
np.savetxt("mdlfiles/temp_mdlfile.mdl",mdl,delimiter='\t',fmt='%s')
# =========================================================================
# Call the Gildas/CLASS script to perform the modelling
# =========================================================================
Sic.comm('@WeedsPy_MCMC.class')
# =========================================================================
# Read in the model data, and return it:
# =========================================================================
mod = fits.open('synthfiles/temp_synthfile.fits')[0]
modeldata = np.asarray(mod.data)[0,0,0,0:]
modeldata[np.where(np.isnan(modeldata)==True)] = 0. # Remove nans from the model
#Delete the intermediate files whilst trying to converge on a solution
os.remove('mdlfiles/temp_mdlfile.mdl')
os.remove('synthfiles/temp_synthfile.fits')
os.remove('synthfiles/temp_synthfile.30m')
return modeldata
def lnlike(theta, x, y, yerr):
"""
Function calculating the "likelihood" of the model
"""
return -0.5 * np.sum(((y - model(theta))/yerr) ** 2)
def lnprior(theta):
"""
Function to set the priors, and check that all the values walked to are within the priors. Flat priors.
"""
# If you are modelling 4 parameters: column density, temperature, centroid velocity and velocity width
if self.n_dim == 4:
a1_N, a2_T, a3_v, a4_dv = theta #, a5_s = theta
if self.priors[0] < a1_N < self.priors[1] and self.priors[2] < a2_T < self.priors[3] and self.priors[4] < a3_v < self.priors[5] and self.priors[6] < a4_dv < self.priors[7]:
return 0.0
# If you're modelling all 5 parameters:
elif self.n_dim == 5:
a1_N, a2_T, a3_v, a4_dv, a5_ss = theta #, a5_s = theta
if self.priors[0] < a1_N < self.priors[1] and self.priors[2] < a2_T < self.priors[3] and self.priors[4] < a3_v < self.priors[5] and self.priors[6] < a4_dv < self.priors[7] and self.priors[8] < a5_ss < self.priors[9]:
return 0.0
# If you're modelling only 2 parameters: column density and temperature
elif self.n_dim == 2:
a1_N, a2_T = theta #, a5_s = theta
if self.priors[0] < a1_N < self.priors[1] and self.priors[2] < a2_T < self.priors[3]:
return 0.0
else:
raise ValueError("n_dim must be either 4, 5 or 2. Code does not yet support modelling of 3 or only 1 parameters.")
return -np.inf
def lnprob(theta, x, y, yerr):
"""
Function to combine lnlike() and lnprior(), see if theta is within the priors, and calculate the likelihood of the model.
"""
lp = lnprior(theta)
if not np.isfinite(lp):
return -np.inf
return lp + lnlike(theta, x, y, yerr)
# Set the starting locations of the walkers as a gaussian around the initial guesses
p0 = [np.array(self.initial) + 1e-3 * np.random.randn(self.n_dim) for i in range(self.n_walkers)]
# Setup the vector of theta (i.e. data)
data = (xdata, ydata, self.rms_noise)
sampler = emcee.EnsembleSampler(self.n_walkers, self.n_dim, lnprob, args=data)
# Start burn-in run
print("Running burn-in...")
pos0, prob0, state0 = sampler.run_mcmc(p0, self.n_burn)
sampler.reset()
# Start the production run from the last of the burn in runs, and with a clear sampler
print("Running production...")
pos, prob, state = sampler.run_mcmc(pos0, self.n_iter)
return sampler
def run_emcee(self,xdata,ydata,ii,jj):
"""
Function that runs the main_emcee process, and processes the outcome of the emcee
"""
def save_highest_likelihood_spectrum(theta,ii,jj):
"""
This is a function to save the highest likelihood spectrum made by Gildas/CLASS, because we delete intermediate spectrum files while the emcee is walking to save space.
"""
mdl = self.make_gildas_mdl_file(theta)
np.savetxt('mdlfiles/temp_mdlfile.mdl',mdl,delimiter='\t',fmt='%s')
# Make the spectrum:
Sic.comm('@WeedsPy_MCMC.class')
# Rename the temporary spectrum files:
os.rename('mdlfiles/temp_mdlfile.mdl','mdlfiles/best_mdlfile_i{0}_j{1}.mdl'.format(ii,jj))
os.rename('synthfiles/temp_synthfile.fits','synthfiles/best_synthfile_i{0}_j{1}.fits'.format(ii,jj))
os.rename('synthfiles/temp_synthfile.30m','synthfiles/best_synthfile_i{0}_j{1}.30m'.format(ii,jj))
return
# Run the emcee
sampler = self.main_emcee(xdata,ydata)
# Get the run chains
samples = sampler.get_chain()
# Get the flat samples
samples_flat = sampler.get_chain(flat=True) # thin=10, discard=self.n_burn
np.savetxt('samples/samples_flatchain_i{0}_j{1}.csv'.format(ii,jj),samples_flat,delimiter=',')
# Find the autocorrelation time of the run:
try:
tau = sampler.get_autocorr_time()
except:
print(
f'**Warning** Walker chain is shorter than 50 times the integrated autocorrelation time for {self.n_dim} parameter(s). Proceed with caution and consider running a longer chain if you can.'
)
tau = np.inf
print(
f'The autocorrelation time is {tau}. Consider running the chains for at least 10 times as long as this.'
)
print('----------------------------------------------------------')
# Get the results
theta_max = sampler.flatchain[np.argmax(sampler.flatlnprobability)]
theta_save = theta_max.copy()
theta_save[0] = theta_save[0]*self.column_base
np.savetxt('theta_max/theta_max_flatlnprob_i{0}_j{1}.csv'.format(ii,jj),theta_save,delimiter=',')
# Get the highest likelihood parameters and their errors
results = []
results_err_up = []
results_err_low = []
# Loop over each dimension
for i in range(self.n_dim):
mcmc_params = np.percentile(samples_flat[:,i],[16,50,84])
errors = np.diff(mcmc_params)
# Store the results to arrays
results.append(mcmc_params[1])
results_err_low.append(errors[0])
results_err_up.append(errors[1])
# Save the best spectrum:
save_highest_likelihood_spectrum(theta_max,ii,jj)
# Save plots of the results:
self.plot_corner(samples_flat,ii,jj)
self.plot_chains(samples,ii,jj)
self.plot_spectrum(ii,jj,ydata)
return samples, samples_flat, results, results_err_up, results_err_low
def plot_corner(self,samples_flat,ii,jj):
"""
Function to make a corner plot
"""
# Change the plot labels based on the number of fitted parameters:
if self.n_dim == 4:
labels = [r"N(mol) ($\times$ %s cm$^{-2}$)" % (self.column_base),r"T$_{\mathrm{ex}}$ (K)",r"V$_{\mathrm{cen}}$ (km s$^{-1}$)","$\Delta$V (km s$^{-1}$)"]
elif self.n_dim == 5:
labels = [r"N(mol) ($\times$ %s cm$^{-2}$)" % (self.column_base),r"T$_{\mathrm{ex}}$ (K)",r"V$_{\mathrm{cen}}$ (km s$^{-1}$)","$\Delta$V (km s$^{-1}$)", r"$\theta_{\mathrm{source}}$ (arcsec)"]
elif self.n_dim == 2:
labels = [r"N(mol) ($\times$ %s cm$^{-2}$)" % (self.column_base),r"T$_{\mathrm{ex}}$ (K)"]
else:
raise ValueError("n_dim must be either 4, 5 or 2. Code does not yet support modelling of 3 or only 1 parameters.")
fig = corner.corner(samples_flat,
labels=labels,
show_titles=True,
plot_datapoints=True,
quantiles=[0.16,0.5,0.84],
smooth = 1.0,
levels=(1-np.exp(-0.5),1-np.exp(-1.0),1-np.exp(-2.0)),
label_kwargs={'fontsize':11},
title_kwargs={'fontsize':11},
)
fig.savefig('plots/corner_i{0}_j{1}.pdf'.format(ii,jj),bbox_inches='tight')
return
def plot_chains(self,samples,ii,jj):
"""
Function to make a plot of the walked chains
"""
fig, axs = plt.subplots(self.n_dim, figsize=(10,7), sharex=True)
axsf = axs.flatten()
# Change the plot labels based on the number of fitted parameters:
if self.n_dim == 4:
labels = [r"N(mol) ($\times$ %s cm$^{-2}$)" % (self.column_base),r"T$_{\mathrm{ex}}$ (K)",r"V$_{\mathrm{cen}}$ (km s$^{-1}$)","$\Delta$V (km s$^{-1}$)"]
elif self.n_dim == 5:
labels = [r"N(mol) ($\times$ %s cm$^{-2}$)" % (self.column_base),r"T$_{\mathrm{ex}}$ (K)",r"V$_{\mathrm{cen}}$ (km s$^{-1}$)","$\Delta$V (km s$^{-1}$)", r"$\theta_{\mathrm{source}}$ (arcsec)"]
elif self.n_dim == 2:
labels = [r"N(mol) ($\times$ %s cm$^{-2}$)" % (self.column_base),r"T$_{\mathrm{ex}}$ (K)"]
else:
raise ValueError("n_dim must be either 4, 5 or 2. Code does not yet support modelling of 3 or only 1 parameters.")
# Iterate over each dimension
for i in range(self.n_dim):
# Plot the chain traces
axsf[i].plot(samples[:,:,i],"k",alpha=0.4)
# General params
axsf[i].set_xlim(0,len(samples))
axsf[i].set_ylabel(labels[i])
axsf[i].tick_params(top=True,right=True)
axsf[i].yaxis.set_tick_params(which='minor', right=True)
axsf[i].xaxis.set_tick_params(which='minor', top=True)
# Only put x label on the final subplot
axsf[i].set_xlabel("Iterations")
# Save
fig.savefig('plots/chains_i{0}_j{1}.pdf'.format(ii,jj),bbox_inches='tight')
return fig
def plot_spectrum(self,ii,jj,y_data):
# Get min and max of the spectrum:
min_spec, max_spec = np.min(y_data), np.max(y_data)
Sic.comm('let min_spec '+str(min_spec))
Sic.comm('let max_spec '+str(max_spec))
# Make plot box:
Sic.comm('cl')
Sic.comm('pen /w 3 /col 0 /dash 1') # Black pen
Sic.comm('greg\draw t -2 2 "Intensity (K)" 4 90 /box 4')
Sic.comm("set mod y 'min_spec' 'max_spec'")
Sic.comm('box')
# Original spectrum
Sic.comm('retrieve OBS')
Sic.comm('spectrum')
Sic.comm('pen /w 3 /col 1 /dash 2') # Red pen
# Synthetic spectrum
Sic.comm('retrieve TB_MODEL')
Sic.comm('spectrum')
# Set the pixi and pixj variables:
Sic.comm('let pixi '+str(ii))
Sic.comm('let pixj '+str(jj))
# Save
Sic.comm("hard plots/spec_i'pixi'_j'pixj'.eps /dev eps color /over")
# Clear the plot
Sic.comm("cl")
return
def convert_noise_to_K(self):
"""
Convert rms noise from mJy/beam, Jy/beam or uJy/beam to Kelvin
"""
# Check the units of the frequency, and convert to GHz
if self.freq_unit == 'MHz':
mean_freq_GHz = self.mean_freq/1e3
elif self.freq_unit == 'Hz':
mean_freq_GHz = self.mean_freq/1e9
elif self.freq_unit == 'kHz':
mean_freq_GHz = self.mean_freq/1e6
else:
# If not in Hz, kHz or MHz, assume already in GHz
mean_freq_GHz = self.mean_freq
# Check the unit of the rms noise, and do conversion
if self.rms_noise_unit == 'Jy/beam':
rms_kelvin = (1.222e3*self.rms_noise*1e3)/((mean_freq_GHz**2)*self.bmaj*self.bmin)
elif self.rms_noise_unit == 'mJy/beam':
rms_kelvin = (1.222e3*self.rms_noise)/((mean_freq_GHz**2)*self.bmaj*self.bmin)
elif self.rms_noise_unit == 'uJy/beam':
rms_kelvin = (1.222e3*self.rms_noise/1e3)/((mean_freq_GHz**2)*self.bmaj*self.bmin)
else:
# If not Jy/beam, mJy/beam or uJy/beam, assume already in Kelvin
rms_kelvin = rms_noise
self.rms_noise = rms_kelvin
self.rms_noise_unit = 'K'
return rms_kelvin
if __name__ == '__main__':
# Read the parameter file:
params = read_params_file('paramfile.txt')
# Get parameters from the parameter file and set their types:
catalog_name = params['catalog_name']
molecule_name = params['molecule']
rms_noise = float(params['rms_noise'])
rms_noise_unit = str(params['rms_noise_unit'])
n_burn = int(params['nburn'])
n_iter = int(params['nprod'])
n_walkers = int(params['nwalkers'])
mean_freq = float(params['mean_freq'])
bmaj = float(params['bmaj'])
bmin = float(params['bmin'])
freq_unit = str(params['freq_unit'])
telescope_diameter = int(params['telescope_diameter'])
# The power base of the column density. Easier to run emcee without the power and multiply it by this for the modelling
column_base = 1e18
# List of the priors, each pairs corresponds to upper and lower bounds for
# column density, temperature, systemic velocity and velocity width
priors = [0.05,3.0,80,230,56.0,64.0,2.0,8.0]
# Initial locations for walkers:
# column density, temperature, systemic velocity, velocity width
initial = np.array([1.0,120,60.0,5.8])
# Number of parameters to fit:
n_dim = len(initial)
# Set the value of some of the model parameters:
# If they are free parameters, set them to None. If they are fixed parameters, give them a value.
source_size = float(params['source_size'])
vel_sys = None
vel_width = None
# Initialise the class
W = WeedsPy_MCMC(catalog_name = catalog_name,
molecule_name = molecule_name,
source_size = source_size,
rms_noise = rms_noise,
rms_noise_unit = rms_noise_unit,
n_iter = n_iter,
n_burn = n_burn,
n_walkers = n_walkers,
n_dim = n_dim,
column_base = column_base,
priors = priors,
initial = initial,
mean_freq = mean_freq,
bmaj = bmaj,
bmin = bmin,
freq_unit = freq_unit,
telescope_diameter = telescope_diameter,
vel_sys = vel_sys,
vel_width = vel_width)
# Set the variables in GILDAS
W.set_gildas_variables()
# Read in the data
pix_x = np.array([288]) # x pixel coord
pix_y = np.array([296]) # y pixel coord
tcont = np.array([22.062]) # continuum level (in Kelvin) at that pixel
npix = len(pix_x)
# Loop over each of the pixels
for ind in range(0,npix):
peakname = 'spectra/spec_i{0}_j{1}.30m'.format(pix_x[ind],pix_y[ind])
obs = fits.open(peakname[:-4]+'.fits')[0]
ydata = np.asarray(obs.data).flatten()
xdata = np.arange(0,len(ydata))
# Set the continuum brightness, and the .30m file name:
Sic.comm('let peakname '+peakname)
Sic.comm('let t_cont '+str(tcont[ind]))
# Convert the noise to Kelvin:
W.convert_noise_to_K()
# Run the emcee:
samples, samples_flat, results, results_err_up, results_err_low = W.run_emcee(xdata,ydata,pix_x[ind],pix_y[ind])