-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.yaml
293 lines (289 loc) · 11.1 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# Configuration file for all scripts
# All file paths are relative to the parent script directory, unless otherwise stated
---
global-variables:
network-variables:
# File name for decoder model to load
- &decoder_name 'Decoder V10'
# File name for encoder model to load
- &encoder_name 'Encoder V10'
# Path to the configuration files for the encoder and decoder
- &network_configs_directory '../network_configs/'
# Path to the Python interpreter
- &python-path '../venv/bin/python'
data-variables:
# Path to real spectra as a pickled dictionary containing the spectra (spectra),
# can also contain parameters (params) for supervised learning and
# spectra names (names) if fitted parameters are provided for specific files in
# spectra_directory and tests in spectrum-fit is True
- &spectra_data_path '../../Spectrum-Machine-Learning/data/spectra.pickle'
# Path to synthetic spectra with noise with the same format as spectra_data_path
# - &synthetic_data_path '../data/synth_spectra.pickle'
- &synthetic_data_path '../../Spectrum-Machine-Learning/data/synth_spectra_clean.pickle'
# Path to synthetic spectra without noise with the same format as spectra_data_path
# - &clean_synthetic_data_path '../data/synth_spectra_clean.pickle'
- &clean_synthetic_data_path '../../Spectrum-Machine-Learning/data/synth_spectra_clean.pickle'
- &andrea_spectra_path '../data/andrea_spectra.pkl'
# Path to the directory containing spectra fits files
- &spectra_data_directory '../../spectra/'
model-variables:
# Number of free parameters
# - ¶meter_number 7
- ¶meter_number 5
# Xspec model to use
# - &model_name 'tbabs(simplcutx(ezdiskbb)*gabs*gabs)'
- &model_name 'tbabs(simplcutx(ezdiskbb))'
# Name of custom model that has to be loaded, can be empty
- &custom_model_name 'simplcutx'
# Directory to custom model, can be empty
- &model_directory '../../../Documents/Xspec_Models/simplcutx'
# Which model free parameter indices to take the log of, starting from 0
- &log_parameters
- 0
- 2
- 3
- 4
# - 5
# - 6
# - 4
# Index starting from 1 and value for each fixed parameter
- &fixed_parameters
4: 0
5: 100
# 8: 6.7
# 9: 2.0e-2
# 11: 6.97
# 12: 2.0e-2
output-variables:
# Directory to save network training states
- &network_states_directory '../model_states/'
# spectrum_fit.py config file
spectrum-fit:
# Variables for training the network
training:
# Whether PyXspec tests should be used or not
tests: False
# File number to save decoder progress
decoder-save: 0
# File number to save encoder progress
encoder-save: 0
# File number to load decoder state
decoder-load: 1
# File number to load encoder state
encoder-load: 1
# Number of epochs to train for
epochs: 200
# Number of inputs to process between network weight updates
batch-size: 60
# Number of threads to use for multiprocessing, 0 will use all available,
# only used if tests is True
cpus: 12
# Learning rate for training
learning-rate: 1.0e-4
# Validation dataset fraction
validation-fraction: 0.15
# File name for decoder model to load
decoder-name: *decoder_name
# File name for encoder model to load
encoder-name: *encoder_name
# Network description
network-description: 'Encoder only, mass only'
# Path to the configuration files for the encoder and decoder
network-configs-directory: *network_configs_directory
# Path to the Python interpreter
python-path: *python-path
# File location for different datasets
data:
# Path to the dictionary containing the spectra (spectra) for decoder training,
# can also contain parameters (params) for supervised learning and/or PyXspec validation,
# and file names (names) if specific files in the directory should be used
# if parameters are provided and PyXspec validation is used
decoder-data-path: *synthetic_data_path
# Path to the spectra for encoder training, same format as for decoder
encoder-data-path: *spectra_data_path
# Path to the directory of spectra fits files, only required if parameters exists,
# and you want to use PyXspec validation
spectra-directory: *spectra_data_directory
# Model for the network to learn
model:
# Number of iterations for traditional PyXspec fitting
iterations: 10
# Number of iterations before calculating fit statistic for tracking progress,
# set this to iterations if you don't want to track the evolution
step: 10
# Number of free parameters
parameters-number: *parameter_number
# Xspec model to use
model-name: *model_name
# Name of custom model that has to be loaded, can be empty
custom-model-name: *custom_model_name
# Directory to custom model, can be empty
model-directory: *model_directory
# Which model free parameter indices to take the log of, starting from 0
log-parameters: *log_parameters
# Index starting from 1 and value for each fixed parameter
fixed-parameters: *fixed_parameters
# Default free parameter values, can be empty if default performance measurement isn't required
default-parameters:
- 1
- 2.5
- 2.0e-2
- 1
- 1
# - 1
# - 1
# Model free parameter names for displaying on plots
parameter-names:
- '$N_{H}$ $(10^{22}\ cm^{-2})$'
- '$\Gamma$'
- '$f_{sc}$'
- '$kT_{\rm disk}$ (keV)'
- '$N$'
# - '$A_{\rm FeXXV}$'
# - '$A_{\rm FeXXVI}$'
# - '$N_{H}$ $(10^{22}\ cm^{-2})$'
# - '$r\log{xi}$'
# - '$\dot{M}$'
# - 'Inclination'
# - '$M_{\rm BH}$'
# - '$a_{\rm BH}$'
# - 'norm'
# Directory locations for different outputs
output:
# Path to the parameters predictions generated by the encoder
parameter-predictions-path: '../data/parameter_predictions.csv'
# Directory to save network training states
network-states-directory: *network_states_directory
# Directory to save plots
plots-directory: '../plots/'
# Directory for passing data to/from multiprocessing workers
worker-directory: '../data/worker/'
# data_preprocessing.py config file
data-preprocessing:
# Variables for preprocessing
preprocessing:
# Number of threads to use for multiprocessing, 0 will use all available
cpus: 1
# File location for different datasets
data:
# Path to the directory of spectra fits files,
# should be the same as for spectrum_fit.py
spectra-directory: *spectra_data_directory
# Path to the directory of background fits files +
# background file location in spectrum fits file
background-directory: *spectra_data_directory
# Path to the spectra file containing spectra names,
# can be either a pickled dictionary with the key names, or a numpy file,
# leave empty if this doesn't exist, file names in spectra-directory will be used instead
names-path: *spectra_data_path
# File location for outputs
output:
# Path to the outputted spectra
processed-path: '../data/preprocessed_spectra.npy'
# synthesize_spectra.py config file
synthesize-spectra:
# Variables for synthesizing spectra
synthesize:
# If previous synthetic data should be removed
clear-spectra: True
# If the distribution should be biased towards a uniform distribution in the parameter space
flat-distribution-bias: True
# Number of synthetic data to try to generate, real number will be less due to bad data cleaning
synthetic-number: 20000
# Number of spectra to generate before saving,
# useful if you want to stop it early, but smaller values are slower
spectra-per-batch: 1000
# Number of fake spectra to generate per background, lower is better but slower
spectra-per-background: 100
# Number of threads to use for multiprocessing, 0 will use all available
cpus: 12
# Path to the Python interpreter
python-path: *python-path
# File locations for outputs
# All paths are relative to the parent folder due to string overflows in fits files
data:
# Path to the directory of spectra fits files
# Should be the same as for spectrum_fit.py
spectra-directory: *spectra_data_directory
# Path to the spectra file containing spectra names,
# can be either a pickled dictionary with the key names, or a numpy file,
# leave empty if this doesn't exist, file names in spectra-directory will be used instead,
# should be the same as for spectrum_fit.py
names-path: *spectra_data_path
# Model to base the synthetic spectra off
model:
# Number of free parameters, should be the same as for spectrum_fit.py
parameter-number: *parameter_number
# Xspec model to use
model-name: *model_name
# Name of custom model that has to be loaded, can be empty
# Should be the same as for spectrum_fit.py
custom-model-name: *custom_model_name
# Directory to custom model, can be empty
# Should be the same as for spectrum_fit.py
model-directory: *model_directory
# Which model free parameter indices to be sampled from logarithmic space, starting from 0,
# should be the same as for spectrum_fit.py
log-parameters: *log_parameters
# Parameter lower limit and upper limit in order of free parameter index
parameter-limits:
- # First free parameter
low: 5.0e-3 # Lower limit
high: 75 # Upper limit
-
low: 1.3
high: 4
-
low: 1.0e-3
high: 1
-
low: 2.5e-2
high: 4
-
low: 1.0e-2
high: 1.0e+10
# -
# low: 1.0e-4
# high: 1
# -
# low: 1.0e-4
# high: 1
# Index starting from 1 and value for each fixed parameter
# Should be the same as for spectrum_fit.py but as a dictionary
fixed-parameters: *fixed_parameters
# Directory locations for different outputs
output:
# Path to save the corrected and normalized spectra file
synthetic-path: *clean_synthetic_data_path
# Path to directory to save worker progress
worker-directory: '../data/synth_worker/'
# network_optimizer.py config file
network-optimizer:
# Optimization parameters
optimization:
# If continuing from previous optimization process
load: False
# If optimization progress should be saved
save: True
# Number of epochs to train for
epochs: 50
# Number of trials to try
trials: 30
# Minimum number of trials before pruning can happen
pruning-minimum-trials: 5
# File name for the network to optimize
name: *decoder_name
# Path to the configuration files for the encoder and decoder
# Should be the same as for spectrum_fit.py
network-configs-directory: *network_configs_directory
# Which model parameter indices to take the log of
# Should be the same as for spectrum_fit.py
log-parameters: *log_parameters
# File location for different datasets
data:
# Path to the spectra for training
spectra-path: *synthetic_data_path
# Directory locations for outputs
output:
# Directory to save network optimization states
network-states-directory: *network_states_directory