From 0079f4a00e4fd0b7a491d622cf801a0d30d54081 Mon Sep 17 00:00:00 2001 From: Thomas Vuillaume Date: Fri, 24 Jan 2025 10:02:15 +0100 Subject: [PATCH 01/17] first try to turn dl1dl2 into a tool --- lstchain/scripts/lstchain_dl1_to_dl2.py | 284 +++++++++++++++++------- 1 file changed, 209 insertions(+), 75 deletions(-) diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py index 4ae1eb4b13..22a6e3c634 100644 --- a/lstchain/scripts/lstchain_dl1_to_dl2.py +++ b/lstchain/scripts/lstchain_dl1_to_dl2.py @@ -6,7 +6,8 @@ Run lstchain_dl1_to_dl2 --help to see the options. """ -import argparse +import h5py +import json from pathlib import Path import joblib import logging @@ -17,6 +18,7 @@ from ctapipe.instrument import SubarrayDescription from ctapipe_io_lst import OPTICS from tables import open_file +from ctapipe.core import Tool, ToolConfigurationError, traits, Provenance from lstchain.io import ( get_dataset_keys, @@ -41,41 +43,209 @@ from lstchain.reco.utils import filter_events, impute_pointing, add_delta_t_key logger = logging.getLogger(__name__) -parser = argparse.ArgumentParser(description=__doc__) - -# Required arguments -parser.add_argument('--input-files', '-f', - type=Path, - nargs='+', - dest='input_files', - help='Path (or list of paths) to a DL1 HDF5 file', - required=True) - -parser.add_argument('--path-models', '-p', - action='store', - type=Path, - dest='path_models', - help='Path where to find the trained RF', - default='./trained_models') - -# Optional arguments -parser.add_argument('--output-dir', '-o', - action='store', - type=Path, - dest='output_dir', - help='Path where to store the reco dl2 events', - default='./dl2_data') - -parser.add_argument('--config', '-c', - action='store', - type=Path, - dest='config_file', - help='Path to a configuration file. If none is given, a standard configuration is applied', - default=None, - required=False) - - -def apply_to_file(filename, models_dict, output_dir, config): + + +# def write_provenance(hdf5_file_path, stage_name): +# """ +# Write JSON provenance information to an HDF5 file. +# It uses the current activity's provenance information and should typically be called within a ctapipe Tool. + +# Parameters: +# ----------- +# hdf5_file_path : str or Path +# Path to the HDF5 file +# stage_name : str +# Name of the stage generating the provenance + +# Returns: +# -------- +# None +# """ +# try: +# # Open the HDF5 file in read-write mode +# with h5py.File(hdf5_file_path, 'a') as h5file: +# # Ensure the /provenance group exists +# if 'provenance' not in h5file: +# h5file.create_group('provenance') + +# # Convert the provenance dictionary to a JSON string +# provenance_json = json.dumps(Provenance().current_activity.provenance, indent=2, default=str) + +# # Create the dataset in the tool's group +# h5file['provenance'].create_dataset( +# stage_name, +# data=provenance_json.encode('utf-8'), +# dtype=h5py.special_dtype(vlen=str) +# ) + +# print(f"Provenance for {stage_name} written successfully to {hdf5_file_path}") + +# except Exception as e: +# print(f"Error writing provenance: {e}") +# raise + + +def write_provenance(hdf5_file_path, dataset_name): + """ + Write JSON provenance information to an HDF5 file. + It uses the current activity's provenance information and should typically be called within a ctapipe Tool. + + Parameters: + ----------- + hdf5_file_path : str or Path + Path to the HDF5 file + stage_name : str + Name of the stage generating the provenance + + Returns: + -------- + None + """ + try: + with h5py.File(hdf5_file_path, 'a') as h5file: + if dataset_name not in h5file: + h5file.create_group(dataset_name) + + # Convert to JSON string + provenance_json = json.dumps(Provenance().current_activity.provenance, indent=2, default=str) + + # Store as an attribute instead of a dataset + h5file[dataset_name].attrs['provenance'] = provenance_json + + except Exception as e: + print(f"Error writing provenance: {e}") + raise + + +def read_provenance(hdf5_file_path, dataset_name): + """ + Read JSON provenance from HDF5 file's dataset attributes. + + Parameters: + ----------- + hdf5_file_path : str + Path to the HDF5 file + dataset_name : str + Name of the dataset containing provenance + + Returns: + -------- + dict + Provenance information as JSON-decoded dictionary + """ + with h5py.File(hdf5_file_path, 'r') as h5file: + if dataset_name not in h5file or 'provenance' not in h5file[dataset_name].attrs: + print(f"No provenance found for {dataset_name}") + return {} + + return json.loads(h5file[dataset_name].attrs['provenance']) + +def dl2_filename(dl1_filename): + """ + Create the name of the DL2 file from the DL1 file name. + + Parameters: + ----------- + dl1_filename : str + Name of the DL1 file + + Returns: + -------- + str + Name of the DL2 file + """ + return dl1_filename.replace('dl1', 'dl2', 1) + +class DL1ToDL2Tool(Tool): + name = "DL1 to DL2 Tool" + description = __doc__ + + input_files = traits.List( + traits.Path, + help="Path (or list of paths) to a DL1 HDF5 file", + ).tag(config=True) + + path_models = traits.Path( + help="Path where to find the trained RF", + default='./trained_models', + ).tag(config=True) + + output_dir = traits.Path( + help="Path where to store the reco dl2 events", + default='./dl2_data', + ).tag(config=True) + + config_file = traits.Path( + help="Path to a configuration file. If none is given, a standard configuration is applied", + default=None, + ).tag(config=True) + + aliases = { + ("f", "input-files"): "DL1ToDL2Tool.input_files", + ("p", "path-models"): "DL1ToDL2Tool.path_models", + ("o", "output-dir"): "DL1ToDL2Tool.output_dir", + ("c", "config"): "DL1ToDL2Tool.config_file", + } + + def setup(self): + + # Check if input files are provided + if not self.input_files: + raise ToolConfigurationError("No input files provided. Use --input-files to specify.") + + # Additional setup logic can go here + self.log.info(f"Input files: {self.input_files}") + self.log.info(f"Path to models: {self.path_models}") + self.log.info(f"Output directory: {self.output_dir}") + + def start(self): + + custom_config = {} + if self.config_file is not None: + try: + custom_config = read_configuration_file(self.config_file.absolute()) + except Exception as e: + self.log.error(f"Custom configuration could not be loaded: {e}") + return + + config = replace_config(standard_config, custom_config) + + models_keys = ['reg_energy', 'cls_gh'] + + if config['disp_method'] == 'disp_vector': + models_keys.append('reg_disp_vector') + elif config['disp_method'] == 'disp_norm_sign': + models_keys.extend(['reg_disp_norm', 'cls_disp_sign']) + + models_dict = {} + for models_key in models_keys: + models_path = Path(self.path_models, f'{models_key}.sav') + + if len(self.input_files) == 1: + models_dict[models_key] = models_path + else: + models_dict[models_key] = joblib.load(models_path) + + self.output_dir.mkdir(exist_ok=True) + for input_dl1file in self.input_files: + output_dl2file = self.output_dir.joinpath(dl2_filename(input_dl1file.name)) + if output_dl2file.exists(): + raise IOError(str(output_dl2file) + ' exists, exiting.') + else: + apply_to_file(input_dl1file, models_dict, output_dl2file, config) + write_provenance(output_dl2file, 'dl2') + + +def apply_to_file(filename, models_dict, output_file, config): + """ + Applies models to the data in the specified file and writes the output to a new file in the output directory. + + Parameters: + - filename (Path or str): The path to the input file. + - models_dict (dict): A dictionary containing the models to be applied. + - output_file (Path or str): The path to the output file. + - config (dict): The configuration dictionary containing parameters for the processing. + """ data = pd.read_hdf(filename, key=dl1_params_lstcam_key) @@ -196,12 +366,6 @@ def apply_to_file(filename, models_dict, output_dir, config): logger.warning("No dl2 output file written.") return - output_dir.mkdir(exist_ok=True) - output_file = output_dir.joinpath(filename.name.replace('dl1', 'dl2', 1)) - - if output_file.exists(): - raise IOError(str(output_file) + ' exists, exiting.') - dl1_keys = get_dataset_keys(filename) if dl1_images_lstcam_key in dl1_keys: @@ -258,42 +422,12 @@ def apply_to_file(filename, models_dict, output_dir, config): write_dataframe(dl2_onlylhfit, output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata) write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), output_file, dl2_params_src_dep_lstcam_key, config=config, meta=metadata) + def main(): - args = parser.parse_args() - - custom_config = {} - if args.config_file is not None: - try: - custom_config = read_configuration_file(args.config_file.absolute()) - except("Custom configuration could not be loaded !!!"): - pass - - config = replace_config(standard_config, custom_config) - - models_keys = ['reg_energy', 'cls_gh'] - - if config['disp_method'] == 'disp_vector': - models_keys.append('reg_disp_vector') - elif config['disp_method'] == 'disp_norm_sign': - models_keys.extend(['reg_disp_norm', 'cls_disp_sign']) - - models_dict = {} - for models_key in models_keys: - models_path = Path(args.path_models, f'{models_key}.sav') - - # For a single input file, each model is loaded just before it is used - if len(args.input_files)==1: - models_dict[models_key] = models_path - # For multiple input files, all the models are loaded only once here - else: - models_dict[models_key] = joblib.load(models_path) - - for filename in args.input_files: - apply_to_file(filename, models_dict, args.output_dir, config) - - + tool = DL1ToDL2Tool() + tool.run() if __name__ == '__main__': main() From aa55c955d0cf225d8301b096566e3406dab2ab0c Mon Sep 17 00:00:00 2001 From: Thomas Vuillaume Date: Thu, 30 Jan 2025 15:47:57 +0100 Subject: [PATCH 02/17] transform script dl1 to dl2 in tool and add provenance into file --- lstchain/io/provenance.py | 79 +++++++++++++ lstchain/io/tests/test_provenance.py | 31 +++++ lstchain/scripts/lstchain_dl1_to_dl2.py | 106 +----------------- .../scripts/tests/test_lstchain_scripts.py | 7 ++ 4 files changed, 123 insertions(+), 100 deletions(-) create mode 100644 lstchain/io/provenance.py create mode 100644 lstchain/io/tests/test_provenance.py diff --git a/lstchain/io/provenance.py b/lstchain/io/provenance.py new file mode 100644 index 0000000000..fc6068ae8f --- /dev/null +++ b/lstchain/io/provenance.py @@ -0,0 +1,79 @@ +import json +import h5py +from ctapipe.core import Provenance +import logging + +logger = logging.getLogger() + +def write_provenance(hdf5_file_path, stage_name): + """ + Write JSON provenance information to an HDF5 file. + It uses the current activity's provenance information and should typically be called within a ctapipe Tool. + + Parameters: + ----------- + hdf5_file_path : str or Path + Path to the HDF5 file + stage_name : str + Name of the stage generating the provenance + + Returns: + -------- + None + """ + try: + with h5py.File(hdf5_file_path, 'a') as h5file: + if 'provenance' not in h5file: + h5file.create_group('provenance') + + # Get the provenance dictionary from the current activity + provenance_data = Provenance().current_activity.provenance + # Dump the dictionary to a JSON string and write it to the HDF5 file + h5file['provenance'].create_dataset(stage_name, data=json.dumps(provenance_data, default=str)) + + except Exception as e: + raise Exception(f"Error writing provenance: {e}") + + +def read_provenance(hdf5_file_path, dataset_name): + """ + Read JSON provenance from HDF5 file's dataset attributes. + + Parameters: + ----------- + hdf5_file_path : str + Path to the HDF5 file + dataset_name : s + Name of the dataset containing provenance + + Returns: + -------- + dict + Provenance information as JSON-decoded dictionary + """ + logger.log(logging.INFO, f"reading provenance from {hdf5_file_path}") + with h5py.File(hdf5_file_path, 'r') as h5file: + if 'provenance' not in h5file: + raise ValueError("No provenance found in HDF5 file") + elif dataset_name not in h5file['provenance']: + raise ValueError(f"No provenance found for {dataset_name}") + else: + return json.loads(h5file['provenance'][dataset_name][()]) + + +def read_dl2_provenance(hdf5_file_path): + """ + Read JSON provenance from HDF5 file's dataset attributes. + This function is a wrapper around read_provenance() that reads the provenance for the 'dl2' dataset. + + Parameters: + ----------- + hdf5_file_path : str + Path to the HDF5 file + + Returns: + -------- + dict + Provenance information as JSON-decoded dictionary + """ + return read_provenance(hdf5_file_path, 'dl1_to_dl2') \ No newline at end of file diff --git a/lstchain/io/tests/test_provenance.py b/lstchain/io/tests/test_provenance.py new file mode 100644 index 0000000000..a60116dfea --- /dev/null +++ b/lstchain/io/tests/test_provenance.py @@ -0,0 +1,31 @@ +import pytest +import json +import h5py +import pytest + +from lstchain.io.provenance import write_provenance, read_provenance +from ctapipe.core import Provenance + +@pytest.fixture +def hdf5_file(tmp_path): + test_file = tmp_path / 'test_provenance.h5' + yield test_file + +def test_write_provenance(hdf5_file): + stage_name = 'test_stage' + p = Provenance() + p.add_input_file('input file', role='test input file') + p.add_output_file('output file', role='test output file') + write_provenance(hdf5_file, stage_name) + with h5py.File(hdf5_file, 'r') as h5file: + assert 'provenance' in h5file + assert stage_name in h5file['provenance'] + + +def test_read_provenance(hdf5_file): + stage_name = 'test_stage' + write_provenance(hdf5_file, stage_name) + result = read_provenance(hdf5_file, stage_name) + assert 'activity_name' in result + assert 'activity_uuid' in result + diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py index 22a6e3c634..600a661673 100644 --- a/lstchain/scripts/lstchain_dl1_to_dl2.py +++ b/lstchain/scripts/lstchain_dl1_to_dl2.py @@ -6,8 +6,6 @@ Run lstchain_dl1_to_dl2 --help to see the options. """ -import h5py -import json from pathlib import Path import joblib import logging @@ -39,107 +37,13 @@ dl2_likelihood_params_lstcam_key, write_dataframe, ) +from lstchain.io.provenance import write_provenance from lstchain.reco import dl1_to_dl2 from lstchain.reco.utils import filter_events, impute_pointing, add_delta_t_key logger = logging.getLogger(__name__) -# def write_provenance(hdf5_file_path, stage_name): -# """ -# Write JSON provenance information to an HDF5 file. -# It uses the current activity's provenance information and should typically be called within a ctapipe Tool. - -# Parameters: -# ----------- -# hdf5_file_path : str or Path -# Path to the HDF5 file -# stage_name : str -# Name of the stage generating the provenance - -# Returns: -# -------- -# None -# """ -# try: -# # Open the HDF5 file in read-write mode -# with h5py.File(hdf5_file_path, 'a') as h5file: -# # Ensure the /provenance group exists -# if 'provenance' not in h5file: -# h5file.create_group('provenance') - -# # Convert the provenance dictionary to a JSON string -# provenance_json = json.dumps(Provenance().current_activity.provenance, indent=2, default=str) - -# # Create the dataset in the tool's group -# h5file['provenance'].create_dataset( -# stage_name, -# data=provenance_json.encode('utf-8'), -# dtype=h5py.special_dtype(vlen=str) -# ) - -# print(f"Provenance for {stage_name} written successfully to {hdf5_file_path}") - -# except Exception as e: -# print(f"Error writing provenance: {e}") -# raise - - -def write_provenance(hdf5_file_path, dataset_name): - """ - Write JSON provenance information to an HDF5 file. - It uses the current activity's provenance information and should typically be called within a ctapipe Tool. - - Parameters: - ----------- - hdf5_file_path : str or Path - Path to the HDF5 file - stage_name : str - Name of the stage generating the provenance - - Returns: - -------- - None - """ - try: - with h5py.File(hdf5_file_path, 'a') as h5file: - if dataset_name not in h5file: - h5file.create_group(dataset_name) - - # Convert to JSON string - provenance_json = json.dumps(Provenance().current_activity.provenance, indent=2, default=str) - - # Store as an attribute instead of a dataset - h5file[dataset_name].attrs['provenance'] = provenance_json - - except Exception as e: - print(f"Error writing provenance: {e}") - raise - - -def read_provenance(hdf5_file_path, dataset_name): - """ - Read JSON provenance from HDF5 file's dataset attributes. - - Parameters: - ----------- - hdf5_file_path : str - Path to the HDF5 file - dataset_name : str - Name of the dataset containing provenance - - Returns: - -------- - dict - Provenance information as JSON-decoded dictionary - """ - with h5py.File(hdf5_file_path, 'r') as h5file: - if dataset_name not in h5file or 'provenance' not in h5file[dataset_name].attrs: - print(f"No provenance found for {dataset_name}") - return {} - - return json.loads(h5file[dataset_name].attrs['provenance']) - def dl2_filename(dl1_filename): """ Create the name of the DL2 file from the DL1 file name. @@ -233,7 +137,11 @@ def start(self): raise IOError(str(output_dl2file) + ' exists, exiting.') else: apply_to_file(input_dl1file, models_dict, output_dl2file, config) - write_provenance(output_dl2file, 'dl2') + p = Provenance() + p.add_input_file(input_dl1file, role='dl1 input file') + p.add_output_file(output_dl2file) + p.add_input_file(self.path_models, role='trained model directory') + write_provenance(output_dl2file, 'dl1_to_dl2') def apply_to_file(filename, models_dict, output_file, config): @@ -411,7 +319,6 @@ def apply_to_file(filename, models_dict, output_file, config): dl2.drop(lhfit_keys, axis=1, inplace=True) write_dl2_dataframe(dl2, output_file, config=config, meta=metadata) write_dataframe(dl2_onlylhfit, output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata) - else: if 'lh_fit_config' not in config.keys(): write_dl2_dataframe(dl2_srcindep, output_file, config=config, meta=metadata) @@ -424,7 +331,6 @@ def apply_to_file(filename, models_dict, output_file, config): meta=metadata) - def main(): tool = DL1ToDL2Tool() tool.run() diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py index 52b7a570be..f3db91106d 100644 --- a/lstchain/scripts/tests/test_lstchain_scripts.py +++ b/lstchain/scripts/tests/test_lstchain_scripts.py @@ -17,6 +17,7 @@ from ctapipe.io import read_table from ctapipe.io import EventSource from ctapipe.containers import EventType +from lstchain.io.provenance import read_dl2_provenance from lstchain.io.config import get_srcdep_config, get_standard_config @@ -385,6 +386,12 @@ def test_lstchain_dl1_to_dl2(simulated_dl2_file): assert "reco_disp_dy" in dl2_df.columns assert "reco_src_x" in dl2_df.columns assert "reco_src_y" in dl2_df.columns + + prov = read_dl2_provenance(simulated_dl2_file) + assert "activity_name" in prov + assert "config" in prov + assert "path_models" in prov['config']['DL1ToDL2Tool'] + assert prov['config']['DL1ToDL2Tool']['path_models'] is not None def test_lstchain_dl1_to_dl2_srcdep(simulated_srcdep_dl2_file): From f24951d854064224fb1c7cb5a594fb148fe65022 Mon Sep 17 00:00:00 2001 From: Thomas Vuillaume Date: Thu, 30 Jan 2025 16:06:07 +0100 Subject: [PATCH 03/17] fix imports tests --- lstchain/io/tests/test_provenance.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/lstchain/io/tests/test_provenance.py b/lstchain/io/tests/test_provenance.py index a60116dfea..625e4a6692 100644 --- a/lstchain/io/tests/test_provenance.py +++ b/lstchain/io/tests/test_provenance.py @@ -1,8 +1,5 @@ import pytest -import json import h5py -import pytest - from lstchain.io.provenance import write_provenance, read_provenance from ctapipe.core import Provenance From 5e6514c9d6eb1249edf4b8bb7ad26a87ec91c38e Mon Sep 17 00:00:00 2001 From: Thomas Vuillaume Date: Thu, 30 Jan 2025 16:47:21 +0100 Subject: [PATCH 04/17] revert output filename mess --- lstchain/scripts/lstchain_dl1_to_dl2.py | 46 ++++++++++++++----------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py index 67f1c98279..2bbbdc2dd3 100644 --- a/lstchain/scripts/lstchain_dl1_to_dl2.py +++ b/lstchain/scripts/lstchain_dl1_to_dl2.py @@ -131,18 +131,13 @@ def start(self): else: models_dict[models_key] = joblib.load(models_path) - self.output_dir.mkdir(exist_ok=True) for input_dl1file in self.input_files: - output_dl2file = self.output_dir.joinpath(dl2_filename(input_dl1file.name)) - if output_dl2file.exists(): - raise IOError(str(output_dl2file) + ' exists, exiting.') - else: - apply_to_file(input_dl1file, models_dict, output_dl2file, config, self.path_models) - p = Provenance() - p.add_input_file(input_dl1file, role='dl1 input file') - p.add_output_file(output_dl2file) - p.add_input_file(self.path_models, role='trained model directory') - write_provenance(output_dl2file, 'dl1_to_dl2') + output_filepath = apply_to_file(input_dl1file, models_dict, output_dir, config, self.path_models) + p = Provenance() + p.add_input_file(input_dl1file, role='dl1 input file') + p.add_output_file(output_filepath, role='dl2 output file') + p.add_input_file(self.path_models, role='trained model directory') + write_provenance(output_filepath, 'dl1_to_dl2') def apply_to_file(filename, models_dict, output_dir, config, models_path): @@ -152,10 +147,15 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path): Parameters: - filename (Path or str): The path to the input file. - models_dict (dict): A dictionary containing the models to be applied. - - output_file (Path or str): The path to the output file. + - output_dir (Path or str): The path for the output directory. - config (dict): The configuration dictionary containing parameters for the processing. - models_path (Path or str): The path to the directory containing the trained models. """ + output_dir = Path(output_dir) + output_dir.mkdir(exist_ok=True, parents=True) + dl2_output_file = output_dir.joinpath(dl2_filename(filename.name)) + if dl2_output_file.exists(): + raise IOError(str(dl2_output_file) + ' exists, exiting.') data = pd.read_hdf(filename, key=dl1_params_lstcam_key) @@ -334,10 +334,10 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path): dl1_keys.remove(dl1_likelihood_params_lstcam_key) metadata = global_metadata() - write_metadata(metadata, output_file) + write_metadata(metadata, dl2_output_file) with open_file(filename, 'r') as h5in: - with open_file(output_file, 'a') as h5out: + with open_file(dl2_output_file, 'a') as h5out: # Write the selected DL1 info for k in dl1_keys: @@ -358,28 +358,32 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path): # need container to use lstchain.io.add_global_metadata and lstchain.io.add_config_metadata if not config['source_dependent']: if 'lh_fit_config' not in config.keys(): - write_dl2_dataframe(dl2, output_file, config=config, meta=metadata) + write_dl2_dataframe(dl2, dl2_output_file, config=config, meta=metadata) else: dl2_onlylhfit = dl2[lhfit_keys] dl2.drop(lhfit_keys, axis=1, inplace=True) - write_dl2_dataframe(dl2, output_file, config=config, meta=metadata) - write_dataframe(dl2_onlylhfit, output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata) + write_dl2_dataframe(dl2, dl2_output_file, config=config, meta=metadata) + write_dataframe(dl2_onlylhfit, dl2_output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata) else: if 'lh_fit_config' not in config.keys(): - write_dl2_dataframe(dl2_srcindep, output_file, config=config, meta=metadata) + write_dl2_dataframe(dl2_srcindep, dl2_output_file, config=config, meta=metadata) else: dl2_onlylhfit = dl2_srcindep[lhfit_keys] dl2_srcindep.drop(lhfit_keys, axis=1, inplace=True) - write_dl2_dataframe(dl2_srcindep, output_file, config=config, meta=metadata) - write_dataframe(dl2_onlylhfit, output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata) - write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), output_file, dl2_params_src_dep_lstcam_key, config=config, + write_dl2_dataframe(dl2_srcindep, dl2_output_file, config=config, meta=metadata) + write_dataframe(dl2_onlylhfit, dl2_output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata) + write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), dl2_output_file, dl2_params_src_dep_lstcam_key, config=config, meta=metadata) + return dl2_output_file + def main(): tool = DL1ToDL2Tool() tool.run() + if __name__ == '__main__': main() + From 913739676da933551db8d5f7ab4a6ee366001407 Mon Sep 17 00:00:00 2001 From: Thomas Vuillaume Date: Thu, 30 Jan 2025 16:54:11 +0100 Subject: [PATCH 05/17] fix outputdir --- lstchain/scripts/lstchain_dl1_to_dl2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py index 2bbbdc2dd3..f86f78f2bb 100644 --- a/lstchain/scripts/lstchain_dl1_to_dl2.py +++ b/lstchain/scripts/lstchain_dl1_to_dl2.py @@ -132,7 +132,7 @@ def start(self): models_dict[models_key] = joblib.load(models_path) for input_dl1file in self.input_files: - output_filepath = apply_to_file(input_dl1file, models_dict, output_dir, config, self.path_models) + output_filepath = apply_to_file(input_dl1file, models_dict, self.output_dir, config, self.path_models) p = Provenance() p.add_input_file(input_dl1file, role='dl1 input file') p.add_output_file(output_filepath, role='dl2 output file') From 2d118d29bbca5db7d98d66d655faecddb950795f Mon Sep 17 00:00:00 2001 From: Thomas Vuillaume Date: Thu, 30 Jan 2025 17:02:55 +0100 Subject: [PATCH 06/17] modify docs accordingly --- docs/lstchain_api/scripts/index.rst | 13 ------------- docs/lstchain_api/tools/index.rst | 2 ++ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/docs/lstchain_api/scripts/index.rst b/docs/lstchain_api/scripts/index.rst index d2006908c5..9a317edc29 100644 --- a/docs/lstchain_api/scripts/index.rst +++ b/docs/lstchain_api/scripts/index.rst @@ -99,19 +99,6 @@ Usage :func: parser :prog: lstchain_dl1_muon_analysis -.. _lstchain_dl1_to_dl2: - -lstchain_dl1_to_dl2 -+++++++++++++++++++ - -.. automodule:: lstchain.scripts.lstchain_dl1_to_dl2 - -Usage ------ -.. argparse:: - :module: lstchain.scripts.lstchain_dl1_to_dl2 - :func: parser - :prog: lstchain_dl1_to_dl2 .. _lstchain_dl1ab: diff --git a/docs/lstchain_api/tools/index.rst b/docs/lstchain_api/tools/index.rst index 92101749e5..c475ad7abf 100644 --- a/docs/lstchain_api/tools/index.rst +++ b/docs/lstchain_api/tools/index.rst @@ -24,3 +24,5 @@ Reference/API :no-inheritance-diagram: .. automodapi:: lstchain.tools.lstchain_fit_intensity_scan :no-inheritance-diagram: +.. automodapi:: lstchain.scripts.lstchain_dl1_to_dl2 + :no-inheritance-diagram: From d7f6d9d6bbd37114bc6c9ff162e5e0dab26184b0 Mon Sep 17 00:00:00 2001 From: Thomas Vuillaume Date: Fri, 31 Jan 2025 11:01:49 +0100 Subject: [PATCH 07/17] rm dl1_to_dl2 in doc list --- docs/lstchain_api/scripts/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/lstchain_api/scripts/index.rst b/docs/lstchain_api/scripts/index.rst index 9a317edc29..42db059a93 100644 --- a/docs/lstchain_api/scripts/index.rst +++ b/docs/lstchain_api/scripts/index.rst @@ -14,7 +14,6 @@ The scripts to be executed from the command line are described below: * `lstchain_data_create_time_calibration_file`_ * `lstchain_data_r0_to_dl1`_ * `lstchain_dl1_muon_analysis`_ -* `lstchain_dl1_to_dl2`_ * `lstchain_dl1ab`_ * `lstchain_dump_config`_ * `lstchain_find_pedestals`_ From 72d831f814787d5226318bd1f87e995d13d6d556 Mon Sep 17 00:00:00 2001 From: Abelardo Moralejo Date: Mon, 3 Feb 2025 15:00:04 +0100 Subject: [PATCH 08/17] default => default_value in traits --- lstchain/scripts/lstchain_dl1_to_dl2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py index f86f78f2bb..323a895ee5 100644 --- a/lstchain/scripts/lstchain_dl1_to_dl2.py +++ b/lstchain/scripts/lstchain_dl1_to_dl2.py @@ -72,17 +72,17 @@ class DL1ToDL2Tool(Tool): path_models = traits.Path( help="Path where to find the trained RF", - default='./trained_models', + default_value='./trained_models', ).tag(config=True) output_dir = traits.Path( help="Path where to store the reco dl2 events", - default='./dl2_data', + default_value='./dl2_data', ).tag(config=True) config_file = traits.Path( help="Path to a configuration file. If none is given, a standard configuration is applied", - default=None, + default_value=None, ).tag(config=True) aliases = { From 15f49b3b9fc5461fa18b943f5ecfd31893f3c8c0 Mon Sep 17 00:00:00 2001 From: Abelardo Moralejo Date: Mon, 3 Feb 2025 15:04:10 +0100 Subject: [PATCH 09/17] Allow None as default for config file --- lstchain/scripts/lstchain_dl1_to_dl2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py index 323a895ee5..11f5c59e09 100644 --- a/lstchain/scripts/lstchain_dl1_to_dl2.py +++ b/lstchain/scripts/lstchain_dl1_to_dl2.py @@ -81,6 +81,7 @@ class DL1ToDL2Tool(Tool): ).tag(config=True) config_file = traits.Path( + allow_none=True, help="Path to a configuration file. If none is given, a standard configuration is applied", default_value=None, ).tag(config=True) From 73c6e012d77ab724207f8c2dc55bca1b329f1dbb Mon Sep 17 00:00:00 2001 From: Abelardo Moralejo Date: Mon, 3 Feb 2025 20:12:47 +0100 Subject: [PATCH 10/17] Update test_lstchain_scripts.py Apparently traits.List requires one switch "-f" per input file. I do not know if this is intended. --- lstchain/scripts/tests/test_lstchain_scripts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py index f3db91106d..073e731645 100644 --- a/lstchain/scripts/tests/test_lstchain_scripts.py +++ b/lstchain/scripts/tests/test_lstchain_scripts.py @@ -366,6 +366,7 @@ def test_lstchain_merged_dl1_to_dl2( "lstchain_dl1_to_dl2", "-f", simulated_dl1_file_, + "-f", merged_simulated_dl1_file, "-p", rf_models["path"], From df36e4102ec4183199ab188d5a8d8de584bca797 Mon Sep 17 00:00:00 2001 From: Abelardo Moralejo Date: Tue, 4 Feb 2025 09:40:07 +0100 Subject: [PATCH 11/17] Moved dl1_to_dl2 tool to the tools folder --- lstchain/{scripts => tools}/lstchain_dl1_to_dl2.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename lstchain/{scripts => tools}/lstchain_dl1_to_dl2.py (100%) diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/tools/lstchain_dl1_to_dl2.py similarity index 100% rename from lstchain/scripts/lstchain_dl1_to_dl2.py rename to lstchain/tools/lstchain_dl1_to_dl2.py From e99e7cc82832d8bc7fae8bcd8c99c115a9dfd319 Mon Sep 17 00:00:00 2001 From: Abelardo Moralejo Date: Tue, 4 Feb 2025 09:55:50 +0100 Subject: [PATCH 12/17] scripts => tools (lstchain_dl1_to_dl2 docs) --- docs/introduction.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/introduction.rst b/docs/introduction.rst index 3f8dda8d10..8e8e426d74 100644 --- a/docs/introduction.rst +++ b/docs/introduction.rst @@ -78,7 +78,7 @@ Here is an example configuration file for this step. DL1 to DL2 ---------- -Use ``lstchain.scripts.lstchain_dl1_to_dl2`` for real data and MC. +Use ``lstchain.tools.lstchain_dl1_to_dl2`` for real data and MC. For more information, try ``--help`` or see the :doc:`lstchain_api/index`. From bbe8796b9e127f51b1fc18554d67d1b26cf6f9b1 Mon Sep 17 00:00:00 2001 From: Abelardo Moralejo Date: Tue, 4 Feb 2025 10:45:53 +0100 Subject: [PATCH 13/17] Make --overwrite switch work --- lstchain/tools/lstchain_dl1_to_dl2.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lstchain/tools/lstchain_dl1_to_dl2.py b/lstchain/tools/lstchain_dl1_to_dl2.py index 11f5c59e09..63ea12ded6 100644 --- a/lstchain/tools/lstchain_dl1_to_dl2.py +++ b/lstchain/tools/lstchain_dl1_to_dl2.py @@ -133,7 +133,8 @@ def start(self): models_dict[models_key] = joblib.load(models_path) for input_dl1file in self.input_files: - output_filepath = apply_to_file(input_dl1file, models_dict, self.output_dir, config, self.path_models) + output_filepath = apply_to_file(input_dl1file, models_dict, self.output_dir, config, + self.path_models, self.overwrite) p = Provenance() p.add_input_file(input_dl1file, role='dl1 input file') p.add_output_file(output_filepath, role='dl2 output file') @@ -141,7 +142,7 @@ def start(self): write_provenance(output_filepath, 'dl1_to_dl2') -def apply_to_file(filename, models_dict, output_dir, config, models_path): +def apply_to_file(filename, models_dict, output_dir, config, models_path, overwrite=False): """ Applies models to the data in the specified file and writes the output to a new file in the output directory. @@ -155,6 +156,11 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path): output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True, parents=True) dl2_output_file = output_dir.joinpath(dl2_filename(filename.name)) + + # Remove previous file if overwrite option is used: + if overwrite: + dl2_output_file.unlink(missing_ok=True) + if dl2_output_file.exists(): raise IOError(str(dl2_output_file) + ' exists, exiting.') From 5d8c05544d61400acc2815d56934c23c8c279485 Mon Sep 17 00:00:00 2001 From: Abelardo Moralejo Date: Tue, 4 Feb 2025 15:00:33 +0100 Subject: [PATCH 14/17] tool name --- lstchain/tools/lstchain_dl1_to_dl2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lstchain/tools/lstchain_dl1_to_dl2.py b/lstchain/tools/lstchain_dl1_to_dl2.py index 63ea12ded6..1c403f3a5e 100644 --- a/lstchain/tools/lstchain_dl1_to_dl2.py +++ b/lstchain/tools/lstchain_dl1_to_dl2.py @@ -62,7 +62,7 @@ def dl2_filename(dl1_filename): return dl1_filename.replace('dl1', 'dl2', 1) class DL1ToDL2Tool(Tool): - name = "DL1 to DL2 Tool" + name = "lstchain_dl1_to_dl2" description = __doc__ input_files = traits.List( From 5e0640fef02663a283ffedd0adb0cd524784643b Mon Sep 17 00:00:00 2001 From: Daniel Morcuende Date: Tue, 4 Feb 2025 15:08:40 +0100 Subject: [PATCH 15/17] add DL1toDL2 to module's __all__ --- lstchain/tools/lstchain_dl1_to_dl2.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lstchain/tools/lstchain_dl1_to_dl2.py b/lstchain/tools/lstchain_dl1_to_dl2.py index 1c403f3a5e..8d45ea7fdb 100644 --- a/lstchain/tools/lstchain_dl1_to_dl2.py +++ b/lstchain/tools/lstchain_dl1_to_dl2.py @@ -45,6 +45,9 @@ logger = logging.getLogger(__name__) +__all__ = ["DL1ToDL2Tool"] + + def dl2_filename(dl1_filename): """ Create the name of the DL2 file from the DL1 file name. From 1acdbe0292c43d8ffdbf5361934de3750be5be6e Mon Sep 17 00:00:00 2001 From: Daniel Morcuende Date: Tue, 4 Feb 2025 15:09:10 +0100 Subject: [PATCH 16/17] change script by tool in automodapi --- docs/lstchain_api/tools/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/lstchain_api/tools/index.rst b/docs/lstchain_api/tools/index.rst index c475ad7abf..2a0aa21cb4 100644 --- a/docs/lstchain_api/tools/index.rst +++ b/docs/lstchain_api/tools/index.rst @@ -24,5 +24,5 @@ Reference/API :no-inheritance-diagram: .. automodapi:: lstchain.tools.lstchain_fit_intensity_scan :no-inheritance-diagram: -.. automodapi:: lstchain.scripts.lstchain_dl1_to_dl2 +.. automodapi:: lstchain.tools.lstchain_dl1_to_dl2 :no-inheritance-diagram: From 57af1bbf6c9a078499e71c578e7ae6b200673c85 Mon Sep 17 00:00:00 2001 From: Abelardo Moralejo Date: Tue, 4 Feb 2025 15:55:46 +0100 Subject: [PATCH 17/17] Fill provenance earlier --- lstchain/tools/lstchain_dl1_to_dl2.py | 38 +++++++++++++++------------ 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/lstchain/tools/lstchain_dl1_to_dl2.py b/lstchain/tools/lstchain_dl1_to_dl2.py index 8d45ea7fdb..2f514eb531 100644 --- a/lstchain/tools/lstchain_dl1_to_dl2.py +++ b/lstchain/tools/lstchain_dl1_to_dl2.py @@ -134,38 +134,42 @@ def start(self): models_dict[models_key] = models_path else: models_dict[models_key] = joblib.load(models_path) - + + output_dir = Path(self.output_dir) + output_dir.mkdir(exist_ok=True, parents=True) + for input_dl1file in self.input_files: - output_filepath = apply_to_file(input_dl1file, models_dict, self.output_dir, config, - self.path_models, self.overwrite) + dl2_output_file = output_dir.joinpath(dl2_filename(input_dl1file.name)) + p = Provenance() p.add_input_file(input_dl1file, role='dl1 input file') - p.add_output_file(output_filepath, role='dl2 output file') + p.add_output_file(dl2_output_file, role='dl2 output file') p.add_input_file(self.path_models, role='trained model directory') - write_provenance(output_filepath, 'dl1_to_dl2') + + # Remove previous file if overwrite option is used: + if self.overwrite: + dl2_output_file.unlink(missing_ok=True) + + if dl2_output_file.exists(): + raise IOError(str(dl2_output_file) + ' exists, exiting.') + + write_provenance(dl2_output_file, 'dl1_to_dl2') + + apply_to_file(input_dl1file, models_dict, dl2_output_file, config, + self.path_models) -def apply_to_file(filename, models_dict, output_dir, config, models_path, overwrite=False): +def apply_to_file(filename, models_dict, dl2_output_file, config, models_path): """ Applies models to the data in the specified file and writes the output to a new file in the output directory. Parameters: - filename (Path or str): The path to the input file. - models_dict (dict): A dictionary containing the models to be applied. - - output_dir (Path or str): The path for the output directory. + - dl2_output_file (Path or str): The path for the output DL2 file. - config (dict): The configuration dictionary containing parameters for the processing. - models_path (Path or str): The path to the directory containing the trained models. """ - output_dir = Path(output_dir) - output_dir.mkdir(exist_ok=True, parents=True) - dl2_output_file = output_dir.joinpath(dl2_filename(filename.name)) - - # Remove previous file if overwrite option is used: - if overwrite: - dl2_output_file.unlink(missing_ok=True) - - if dl2_output_file.exists(): - raise IOError(str(dl2_output_file) + ' exists, exiting.') data = pd.read_hdf(filename, key=dl1_params_lstcam_key)