From 0079f4a00e4fd0b7a491d622cf801a0d30d54081 Mon Sep 17 00:00:00 2001
From: Thomas Vuillaume <thomas.vuillaume@lapp.in2p3.fr>
Date: Fri, 24 Jan 2025 10:02:15 +0100
Subject: [PATCH 01/17] first try to turn dl1dl2 into a tool

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 284 +++++++++++++++++-------
 1 file changed, 209 insertions(+), 75 deletions(-)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 4ae1eb4b13..22a6e3c634 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -6,7 +6,8 @@
 Run lstchain_dl1_to_dl2 --help to see the options.
 """
 
-import argparse
+import h5py
+import json
 from pathlib import Path
 import joblib
 import logging
@@ -17,6 +18,7 @@
 from ctapipe.instrument import SubarrayDescription
 from ctapipe_io_lst import OPTICS
 from tables import open_file
+from ctapipe.core import Tool, ToolConfigurationError, traits, Provenance
 
 from lstchain.io import (
     get_dataset_keys,
@@ -41,41 +43,209 @@
 from lstchain.reco.utils import filter_events, impute_pointing, add_delta_t_key
 
 logger = logging.getLogger(__name__)
-parser = argparse.ArgumentParser(description=__doc__)
-
-# Required arguments
-parser.add_argument('--input-files', '-f',
-                    type=Path,
-                    nargs='+',
-                    dest='input_files',
-                    help='Path (or list of paths) to a DL1 HDF5 file',
-                    required=True)
-
-parser.add_argument('--path-models', '-p',
-                    action='store',
-                    type=Path,
-                    dest='path_models',
-                    help='Path where to find the trained RF',
-                    default='./trained_models')
-
-# Optional arguments
-parser.add_argument('--output-dir', '-o',
-                    action='store',
-                    type=Path,
-                    dest='output_dir',
-                    help='Path where to store the reco dl2 events',
-                    default='./dl2_data')
-
-parser.add_argument('--config', '-c',
-                    action='store',
-                    type=Path,
-                    dest='config_file',
-                    help='Path to a configuration file. If none is given, a standard configuration is applied',
-                    default=None,
-                    required=False)
-
-
-def apply_to_file(filename, models_dict, output_dir, config):
+
+
+# def write_provenance(hdf5_file_path, stage_name):
+#     """
+#     Write JSON provenance information to an HDF5 file.
+#     It uses the current activity's provenance information and should typically be called within a ctapipe Tool.
+
+#     Parameters:
+#     -----------
+#     hdf5_file_path : str or Path
+#         Path to the HDF5 file
+#     stage_name : str
+#         Name of the stage generating the provenance
+
+#     Returns:
+#     --------
+#     None
+#     """
+#     try:
+#         # Open the HDF5 file in read-write mode
+#         with h5py.File(hdf5_file_path, 'a') as h5file:
+#             # Ensure the /provenance group exists
+#             if 'provenance' not in h5file:
+#                 h5file.create_group('provenance')
+            
+#             # Convert the provenance dictionary to a JSON string
+#             provenance_json = json.dumps(Provenance().current_activity.provenance, indent=2, default=str)
+            
+#             # Create the dataset in the tool's group
+#             h5file['provenance'].create_dataset(
+#                 stage_name,
+#                 data=provenance_json.encode('utf-8'),
+#                 dtype=h5py.special_dtype(vlen=str)
+#             )
+        
+#         print(f"Provenance for {stage_name} written successfully to {hdf5_file_path}")
+    
+#     except Exception as e:
+#         print(f"Error writing provenance: {e}")
+#         raise
+
+
+def write_provenance(hdf5_file_path, dataset_name):
+    """
+    Write JSON provenance information to an HDF5 file.
+    It uses the current activity's provenance information and should typically be called within a ctapipe Tool.
+
+    Parameters:
+    -----------
+    hdf5_file_path : str or Path
+        Path to the HDF5 file
+    stage_name : str
+        Name of the stage generating the provenance
+
+    Returns:
+    --------
+    None
+    """
+    try:
+        with h5py.File(hdf5_file_path, 'a') as h5file:
+            if dataset_name not in h5file:
+                h5file.create_group(dataset_name)
+            
+            # Convert to JSON string
+            provenance_json = json.dumps(Provenance().current_activity.provenance, indent=2, default=str)
+            
+            # Store as an attribute instead of a dataset
+            h5file[dataset_name].attrs['provenance'] = provenance_json
+        
+    except Exception as e:
+        print(f"Error writing provenance: {e}")
+        raise
+
+
+def read_provenance(hdf5_file_path, dataset_name):
+    """
+    Read JSON provenance from HDF5 file's dataset attributes.
+
+    Parameters:
+    -----------
+    hdf5_file_path : str
+        Path to the HDF5 file
+    dataset_name : str
+        Name of the dataset containing provenance
+
+    Returns:
+    --------
+    dict
+        Provenance information as JSON-decoded dictionary
+    """
+    with h5py.File(hdf5_file_path, 'r') as h5file:
+        if dataset_name not in h5file or 'provenance' not in h5file[dataset_name].attrs:
+            print(f"No provenance found for {dataset_name}")
+            return {}
+        
+        return json.loads(h5file[dataset_name].attrs['provenance'])
+
+def dl2_filename(dl1_filename):
+    """
+    Create the name of the DL2 file from the DL1 file name.
+
+    Parameters:
+    -----------
+    dl1_filename : str
+        Name of the DL1 file
+
+    Returns:
+    --------
+    str
+        Name of the DL2 file
+    """
+    return dl1_filename.replace('dl1', 'dl2', 1)
+
+class DL1ToDL2Tool(Tool):
+    name = "DL1 to DL2 Tool"
+    description = __doc__
+
+    input_files = traits.List(
+        traits.Path,
+        help="Path (or list of paths) to a DL1 HDF5 file",
+    ).tag(config=True)
+
+    path_models = traits.Path(
+        help="Path where to find the trained RF",
+        default='./trained_models',
+    ).tag(config=True)
+
+    output_dir = traits.Path(
+        help="Path where to store the reco dl2 events",
+        default='./dl2_data',
+    ).tag(config=True)
+
+    config_file = traits.Path(
+        help="Path to a configuration file. If none is given, a standard configuration is applied",
+        default=None,
+    ).tag(config=True)
+
+    aliases = {
+        ("f", "input-files"): "DL1ToDL2Tool.input_files",
+        ("p", "path-models"): "DL1ToDL2Tool.path_models",
+        ("o", "output-dir"): "DL1ToDL2Tool.output_dir",
+        ("c", "config"): "DL1ToDL2Tool.config_file",
+    }
+
+    def setup(self):
+         
+        # Check if input files are provided
+        if not self.input_files:
+            raise ToolConfigurationError("No input files provided. Use --input-files to specify.")
+
+        # Additional setup logic can go here
+        self.log.info(f"Input files: {self.input_files}")
+        self.log.info(f"Path to models: {self.path_models}")
+        self.log.info(f"Output directory: {self.output_dir}")
+
+    def start(self):
+            
+        custom_config = {}
+        if self.config_file is not None:
+            try:
+                custom_config = read_configuration_file(self.config_file.absolute())
+            except Exception as e:
+                self.log.error(f"Custom configuration could not be loaded: {e}")
+                return
+
+        config = replace_config(standard_config, custom_config)
+
+        models_keys = ['reg_energy', 'cls_gh']
+
+        if config['disp_method'] == 'disp_vector':
+            models_keys.append('reg_disp_vector')
+        elif config['disp_method'] == 'disp_norm_sign':
+            models_keys.extend(['reg_disp_norm', 'cls_disp_sign'])
+
+        models_dict = {}
+        for models_key in models_keys:
+            models_path = Path(self.path_models, f'{models_key}.sav')
+
+            if len(self.input_files) == 1:
+                models_dict[models_key] = models_path
+            else:
+                models_dict[models_key] = joblib.load(models_path)
+
+        self.output_dir.mkdir(exist_ok=True)
+        for input_dl1file in self.input_files:
+            output_dl2file = self.output_dir.joinpath(dl2_filename(input_dl1file.name))
+            if output_dl2file.exists():
+                raise IOError(str(output_dl2file) + ' exists, exiting.')
+            else:
+                apply_to_file(input_dl1file, models_dict, output_dl2file, config)
+                write_provenance(output_dl2file, 'dl2')
+
+
+def apply_to_file(filename, models_dict, output_file, config):
+    """
+    Applies models to the data in the specified file and writes the output to a new file in the output directory.
+
+    Parameters:
+    - filename (Path or str): The path to the input file.
+    - models_dict (dict): A dictionary containing the models to be applied.
+    - output_file (Path or str): The path to the output file.
+    - config (dict): The configuration dictionary containing parameters for the processing.
+    """
 
     data = pd.read_hdf(filename, key=dl1_params_lstcam_key)
 
@@ -196,12 +366,6 @@ def apply_to_file(filename, models_dict, output_dir, config):
         logger.warning("No dl2 output file written.")
         return
 
-    output_dir.mkdir(exist_ok=True)
-    output_file = output_dir.joinpath(filename.name.replace('dl1', 'dl2', 1))
-
-    if output_file.exists():
-        raise IOError(str(output_file) + ' exists, exiting.')
-
     dl1_keys = get_dataset_keys(filename)
 
     if dl1_images_lstcam_key in dl1_keys:
@@ -258,42 +422,12 @@ def apply_to_file(filename, models_dict, output_dir, config):
             write_dataframe(dl2_onlylhfit, output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata)
         write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), output_file, dl2_params_src_dep_lstcam_key, config=config,
                         meta=metadata)
+        
 
 
 def main():
-    args = parser.parse_args()
-
-    custom_config = {}
-    if args.config_file is not None:
-        try:
-            custom_config = read_configuration_file(args.config_file.absolute())
-        except("Custom configuration could not be loaded !!!"):
-            pass
-
-    config = replace_config(standard_config, custom_config)
-
-    models_keys = ['reg_energy', 'cls_gh']
-
-    if config['disp_method'] == 'disp_vector':
-        models_keys.append('reg_disp_vector')
-    elif config['disp_method'] == 'disp_norm_sign':
-        models_keys.extend(['reg_disp_norm', 'cls_disp_sign'])
-
-    models_dict = {}
-    for models_key in models_keys:
-        models_path = Path(args.path_models, f'{models_key}.sav')
-
-        # For a single input file, each model is loaded just before it is used
-        if len(args.input_files)==1:
-            models_dict[models_key] = models_path
-        # For multiple input files, all the models are loaded only once here 
-        else:
-            models_dict[models_key] = joblib.load(models_path)
-
-    for filename in args.input_files:
-        apply_to_file(filename, models_dict, args.output_dir, config)
-
-
+    tool = DL1ToDL2Tool()
+    tool.run()
 
 if __name__ == '__main__':
     main()

From aa55c955d0cf225d8301b096566e3406dab2ab0c Mon Sep 17 00:00:00 2001
From: Thomas Vuillaume <thomas.vuillaume@lapp.in2p3.fr>
Date: Thu, 30 Jan 2025 15:47:57 +0100
Subject: [PATCH 02/17] transform script dl1 to dl2 in tool and add provenance
 into file

---
 lstchain/io/provenance.py                     |  79 +++++++++++++
 lstchain/io/tests/test_provenance.py          |  31 +++++
 lstchain/scripts/lstchain_dl1_to_dl2.py       | 106 +-----------------
 .../scripts/tests/test_lstchain_scripts.py    |   7 ++
 4 files changed, 123 insertions(+), 100 deletions(-)
 create mode 100644 lstchain/io/provenance.py
 create mode 100644 lstchain/io/tests/test_provenance.py

diff --git a/lstchain/io/provenance.py b/lstchain/io/provenance.py
new file mode 100644
index 0000000000..fc6068ae8f
--- /dev/null
+++ b/lstchain/io/provenance.py
@@ -0,0 +1,79 @@
+import json
+import h5py
+from ctapipe.core import Provenance
+import logging
+
+logger = logging.getLogger()
+
+def write_provenance(hdf5_file_path, stage_name):
+    """
+    Write JSON provenance information to an HDF5 file.
+    It uses the current activity's provenance information and should typically be called within a ctapipe Tool.
+
+    Parameters:
+    -----------
+    hdf5_file_path : str or Path
+        Path to the HDF5 file
+    stage_name : str
+        Name of the stage generating the provenance
+
+    Returns:
+    --------
+    None
+    """
+    try:
+        with h5py.File(hdf5_file_path, 'a') as h5file:
+            if 'provenance' not in h5file:
+                h5file.create_group('provenance')
+            
+            # Get the provenance dictionary from the current activity
+            provenance_data = Provenance().current_activity.provenance
+            # Dump the dictionary to a JSON string and write it to the HDF5 file
+            h5file['provenance'].create_dataset(stage_name, data=json.dumps(provenance_data, default=str))
+    
+    except Exception as e:
+        raise Exception(f"Error writing provenance: {e}")
+
+
+def read_provenance(hdf5_file_path, dataset_name):
+    """
+    Read JSON provenance from HDF5 file's dataset attributes.
+
+    Parameters:
+    -----------
+    hdf5_file_path : str
+        Path to the HDF5 file
+    dataset_name : s
+        Name of the dataset containing provenance
+
+    Returns:
+    --------
+    dict
+        Provenance information as JSON-decoded dictionary
+    """
+    logger.log(logging.INFO, f"reading provenance from {hdf5_file_path}")
+    with h5py.File(hdf5_file_path, 'r') as h5file:
+        if 'provenance' not in h5file:
+            raise ValueError("No provenance found in HDF5 file")
+        elif dataset_name not in h5file['provenance']:
+            raise ValueError(f"No provenance found for {dataset_name}")
+        else:
+            return json.loads(h5file['provenance'][dataset_name][()])
+
+
+def read_dl2_provenance(hdf5_file_path):
+    """
+    Read JSON provenance from HDF5 file's dataset attributes.
+    This function is a wrapper around read_provenance() that reads the provenance for the 'dl2' dataset.
+
+    Parameters:
+    -----------
+    hdf5_file_path : str
+        Path to the HDF5 file
+
+    Returns:
+    --------
+    dict
+        Provenance information as JSON-decoded dictionary
+    """
+    return read_provenance(hdf5_file_path, 'dl1_to_dl2')
\ No newline at end of file
diff --git a/lstchain/io/tests/test_provenance.py b/lstchain/io/tests/test_provenance.py
new file mode 100644
index 0000000000..a60116dfea
--- /dev/null
+++ b/lstchain/io/tests/test_provenance.py
@@ -0,0 +1,31 @@
+import pytest
+import json
+import h5py
+import pytest
+
+from lstchain.io.provenance import write_provenance, read_provenance
+from ctapipe.core import Provenance
+
+@pytest.fixture
+def hdf5_file(tmp_path):
+    test_file = tmp_path / 'test_provenance.h5'
+    yield test_file
+
+def test_write_provenance(hdf5_file):
+    stage_name = 'test_stage'
+    p = Provenance()
+    p.add_input_file('input file', role='test input file')
+    p.add_output_file('output file', role='test output file')
+    write_provenance(hdf5_file, stage_name)
+    with h5py.File(hdf5_file, 'r') as h5file:
+        assert 'provenance' in h5file
+        assert stage_name in h5file['provenance']
+
+
+def test_read_provenance(hdf5_file):
+    stage_name = 'test_stage'
+    write_provenance(hdf5_file, stage_name)
+    result = read_provenance(hdf5_file, stage_name)    
+    assert 'activity_name' in result
+    assert 'activity_uuid' in result
+
diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 22a6e3c634..600a661673 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -6,8 +6,6 @@
 Run lstchain_dl1_to_dl2 --help to see the options.
 """
 
-import h5py
-import json
 from pathlib import Path
 import joblib
 import logging
@@ -39,107 +37,13 @@
     dl2_likelihood_params_lstcam_key,
     write_dataframe,
 )
+from lstchain.io.provenance import write_provenance
 from lstchain.reco import dl1_to_dl2
 from lstchain.reco.utils import filter_events, impute_pointing, add_delta_t_key
 
 logger = logging.getLogger(__name__)
 
 
-# def write_provenance(hdf5_file_path, stage_name):
-#     """
-#     Write JSON provenance information to an HDF5 file.
-#     It uses the current activity's provenance information and should typically be called within a ctapipe Tool.
-
-#     Parameters:
-#     -----------
-#     hdf5_file_path : str or Path
-#         Path to the HDF5 file
-#     stage_name : str
-#         Name of the stage generating the provenance
-
-#     Returns:
-#     --------
-#     None
-#     """
-#     try:
-#         # Open the HDF5 file in read-write mode
-#         with h5py.File(hdf5_file_path, 'a') as h5file:
-#             # Ensure the /provenance group exists
-#             if 'provenance' not in h5file:
-#                 h5file.create_group('provenance')
-            
-#             # Convert the provenance dictionary to a JSON string
-#             provenance_json = json.dumps(Provenance().current_activity.provenance, indent=2, default=str)
-            
-#             # Create the dataset in the tool's group
-#             h5file['provenance'].create_dataset(
-#                 stage_name,
-#                 data=provenance_json.encode('utf-8'),
-#                 dtype=h5py.special_dtype(vlen=str)
-#             )
-        
-#         print(f"Provenance for {stage_name} written successfully to {hdf5_file_path}")
-    
-#     except Exception as e:
-#         print(f"Error writing provenance: {e}")
-#         raise
-
-
-def write_provenance(hdf5_file_path, dataset_name):
-    """
-    Write JSON provenance information to an HDF5 file.
-    It uses the current activity's provenance information and should typically be called within a ctapipe Tool.
-
-    Parameters:
-    -----------
-    hdf5_file_path : str or Path
-        Path to the HDF5 file
-    stage_name : str
-        Name of the stage generating the provenance
-
-    Returns:
-    --------
-    None
-    """
-    try:
-        with h5py.File(hdf5_file_path, 'a') as h5file:
-            if dataset_name not in h5file:
-                h5file.create_group(dataset_name)
-            
-            # Convert to JSON string
-            provenance_json = json.dumps(Provenance().current_activity.provenance, indent=2, default=str)
-            
-            # Store as an attribute instead of a dataset
-            h5file[dataset_name].attrs['provenance'] = provenance_json
-        
-    except Exception as e:
-        print(f"Error writing provenance: {e}")
-        raise
-
-
-def read_provenance(hdf5_file_path, dataset_name):
-    """
-    Read JSON provenance from HDF5 file's dataset attributes.
-
-    Parameters:
-    -----------
-    hdf5_file_path : str
-        Path to the HDF5 file
-    dataset_name : str
-        Name of the dataset containing provenance
-
-    Returns:
-    --------
-    dict
-        Provenance information as JSON-decoded dictionary
-    """
-    with h5py.File(hdf5_file_path, 'r') as h5file:
-        if dataset_name not in h5file or 'provenance' not in h5file[dataset_name].attrs:
-            print(f"No provenance found for {dataset_name}")
-            return {}
-        
-        return json.loads(h5file[dataset_name].attrs['provenance'])
-
 def dl2_filename(dl1_filename):
     """
     Create the name of the DL2 file from the DL1 file name.
@@ -233,7 +137,11 @@ def start(self):
                 raise IOError(str(output_dl2file) + ' exists, exiting.')
             else:
                 apply_to_file(input_dl1file, models_dict, output_dl2file, config)
-                write_provenance(output_dl2file, 'dl2')
+                p = Provenance()
+                p.add_input_file(input_dl1file, role='dl1 input file')
+                p.add_output_file(output_dl2file)
+                p.add_input_file(self.path_models, role='trained model directory')
+                write_provenance(output_dl2file, 'dl1_to_dl2')
 
 
 def apply_to_file(filename, models_dict, output_file, config):
@@ -411,7 +319,6 @@ def apply_to_file(filename, models_dict, output_file, config):
             dl2.drop(lhfit_keys, axis=1, inplace=True)
             write_dl2_dataframe(dl2, output_file, config=config, meta=metadata)
             write_dataframe(dl2_onlylhfit, output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata)
-
     else:
         if 'lh_fit_config' not in config.keys():
             write_dl2_dataframe(dl2_srcindep, output_file, config=config, meta=metadata)
@@ -424,7 +331,6 @@ def apply_to_file(filename, models_dict, output_file, config):
                         meta=metadata)
         
 
-
 def main():
     tool = DL1ToDL2Tool()
     tool.run()
diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py
index 52b7a570be..f3db91106d 100644
--- a/lstchain/scripts/tests/test_lstchain_scripts.py
+++ b/lstchain/scripts/tests/test_lstchain_scripts.py
@@ -17,6 +17,7 @@
 from ctapipe.io import read_table
 from ctapipe.io import EventSource
 from ctapipe.containers import EventType
+from lstchain.io.provenance import read_dl2_provenance
 
 
 from lstchain.io.config import get_srcdep_config, get_standard_config
@@ -385,6 +386,12 @@ def test_lstchain_dl1_to_dl2(simulated_dl2_file):
     assert "reco_disp_dy" in dl2_df.columns
     assert "reco_src_x" in dl2_df.columns
     assert "reco_src_y" in dl2_df.columns
+    
+    prov = read_dl2_provenance(simulated_dl2_file)
+    assert "activity_name" in prov
+    assert "config" in prov
+    assert "path_models" in prov['config']['DL1ToDL2Tool']
+    assert prov['config']['DL1ToDL2Tool']['path_models'] is not None
 
 
 def test_lstchain_dl1_to_dl2_srcdep(simulated_srcdep_dl2_file):

From f24951d854064224fb1c7cb5a594fb148fe65022 Mon Sep 17 00:00:00 2001
From: Thomas Vuillaume <thomas.vuillaume@lapp.in2p3.fr>
Date: Thu, 30 Jan 2025 16:06:07 +0100
Subject: [PATCH 03/17] fix imports tests

---
 lstchain/io/tests/test_provenance.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/lstchain/io/tests/test_provenance.py b/lstchain/io/tests/test_provenance.py
index a60116dfea..625e4a6692 100644
--- a/lstchain/io/tests/test_provenance.py
+++ b/lstchain/io/tests/test_provenance.py
@@ -1,8 +1,5 @@
 import pytest
-import json
 import h5py
-import pytest
-
 from lstchain.io.provenance import write_provenance, read_provenance
 from ctapipe.core import Provenance
 

From 5e6514c9d6eb1249edf4b8bb7ad26a87ec91c38e Mon Sep 17 00:00:00 2001
From: Thomas Vuillaume <thomas.vuillaume@lapp.in2p3.fr>
Date: Thu, 30 Jan 2025 16:47:21 +0100
Subject: [PATCH 04/17] revert output filename mess

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 46 ++++++++++++++-----------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 67f1c98279..2bbbdc2dd3 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -131,18 +131,13 @@ def start(self):
             else:
                 models_dict[models_key] = joblib.load(models_path)
 
-        self.output_dir.mkdir(exist_ok=True)
         for input_dl1file in self.input_files:
-            output_dl2file = self.output_dir.joinpath(dl2_filename(input_dl1file.name))
-            if output_dl2file.exists():
-                raise IOError(str(output_dl2file) + ' exists, exiting.')
-            else:
-                apply_to_file(input_dl1file, models_dict, output_dl2file, config, self.path_models)
-                p = Provenance()
-                p.add_input_file(input_dl1file, role='dl1 input file')
-                p.add_output_file(output_dl2file)
-                p.add_input_file(self.path_models, role='trained model directory')
-                write_provenance(output_dl2file, 'dl1_to_dl2')
+            output_filepath = apply_to_file(input_dl1file, models_dict, output_dir, config, self.path_models)
+            p = Provenance()
+            p.add_input_file(input_dl1file, role='dl1 input file')
+            p.add_output_file(output_filepath, role='dl2 output file')
+            p.add_input_file(self.path_models, role='trained model directory')
+            write_provenance(output_filepath, 'dl1_to_dl2')
                 
 
 def apply_to_file(filename, models_dict, output_dir, config, models_path):
@@ -152,10 +147,15 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
     Parameters:
     - filename (Path or str): The path to the input file.
     - models_dict (dict): A dictionary containing the models to be applied.
-    - output_file (Path or str): The path to the output file.
+    - output_dir (Path or str): The path for the output directory.
     - config (dict): The configuration dictionary containing parameters for the processing.
     - models_path (Path or str): The path to the directory containing the trained models.
     """
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    dl2_output_file = output_dir.joinpath(dl2_filename(filename.name))
+    if dl2_output_file.exists():
+        raise IOError(str(dl2_output_file) + ' exists, exiting.')
 
     data = pd.read_hdf(filename, key=dl1_params_lstcam_key)
 
@@ -334,10 +334,10 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
         dl1_keys.remove(dl1_likelihood_params_lstcam_key)
 
     metadata = global_metadata()
-    write_metadata(metadata, output_file)
+    write_metadata(metadata, dl2_output_file)
 
     with open_file(filename, 'r') as h5in:
-        with open_file(output_file, 'a') as h5out:
+        with open_file(dl2_output_file, 'a') as h5out:
 
             # Write the selected DL1 info
             for k in dl1_keys:
@@ -358,28 +358,32 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
     # need container to use lstchain.io.add_global_metadata and lstchain.io.add_config_metadata
     if not config['source_dependent']:
         if 'lh_fit_config' not in config.keys():
-            write_dl2_dataframe(dl2, output_file, config=config, meta=metadata)
+            write_dl2_dataframe(dl2, dl2_output_file, config=config, meta=metadata)
         else:
             dl2_onlylhfit = dl2[lhfit_keys]
             dl2.drop(lhfit_keys, axis=1, inplace=True)
-            write_dl2_dataframe(dl2, output_file, config=config, meta=metadata)
-            write_dataframe(dl2_onlylhfit, output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata)
+            write_dl2_dataframe(dl2, dl2_output_file, config=config, meta=metadata)
+            write_dataframe(dl2_onlylhfit, dl2_output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata)
     else:
         if 'lh_fit_config' not in config.keys():
-            write_dl2_dataframe(dl2_srcindep, output_file, config=config, meta=metadata)
+            write_dl2_dataframe(dl2_srcindep, dl2_output_file, config=config, meta=metadata)
         else:
             dl2_onlylhfit = dl2_srcindep[lhfit_keys]
             dl2_srcindep.drop(lhfit_keys, axis=1, inplace=True)
-            write_dl2_dataframe(dl2_srcindep, output_file, config=config, meta=metadata)
-            write_dataframe(dl2_onlylhfit, output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata)
-        write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), output_file, dl2_params_src_dep_lstcam_key, config=config,
+            write_dl2_dataframe(dl2_srcindep, dl2_output_file, config=config, meta=metadata)
+            write_dataframe(dl2_onlylhfit, dl2_output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata)
+        write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), dl2_output_file, dl2_params_src_dep_lstcam_key, config=config,
                         meta=metadata)
         
+    return dl2_output_file
+        
 
 def main():
 
     tool = DL1ToDL2Tool()
     tool.run()
 
+
 if __name__ == '__main__':
     main()
+

From 913739676da933551db8d5f7ab4a6ee366001407 Mon Sep 17 00:00:00 2001
From: Thomas Vuillaume <thomas.vuillaume@lapp.in2p3.fr>
Date: Thu, 30 Jan 2025 16:54:11 +0100
Subject: [PATCH 05/17] fix outputdir

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 2bbbdc2dd3..f86f78f2bb 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -132,7 +132,7 @@ def start(self):
                 models_dict[models_key] = joblib.load(models_path)
 
         for input_dl1file in self.input_files:
-            output_filepath = apply_to_file(input_dl1file, models_dict, output_dir, config, self.path_models)
+            output_filepath = apply_to_file(input_dl1file, models_dict, self.output_dir, config, self.path_models)
             p = Provenance()
             p.add_input_file(input_dl1file, role='dl1 input file')
             p.add_output_file(output_filepath, role='dl2 output file')

From 2d118d29bbca5db7d98d66d655faecddb950795f Mon Sep 17 00:00:00 2001
From: Thomas Vuillaume <thomas.vuillaume@lapp.in2p3.fr>
Date: Thu, 30 Jan 2025 17:02:55 +0100
Subject: [PATCH 06/17] modify docs accordingly

---
 docs/lstchain_api/scripts/index.rst | 13 -------------
 docs/lstchain_api/tools/index.rst   |  2 ++
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/docs/lstchain_api/scripts/index.rst b/docs/lstchain_api/scripts/index.rst
index d2006908c5..9a317edc29 100644
--- a/docs/lstchain_api/scripts/index.rst
+++ b/docs/lstchain_api/scripts/index.rst
@@ -99,19 +99,6 @@ Usage
    :func: parser
    :prog: lstchain_dl1_muon_analysis
 
-.. _lstchain_dl1_to_dl2:
-
-lstchain_dl1_to_dl2
-+++++++++++++++++++
-
-.. automodule:: lstchain.scripts.lstchain_dl1_to_dl2
-
-Usage
------
-.. argparse::
-   :module: lstchain.scripts.lstchain_dl1_to_dl2
-   :func: parser
-   :prog: lstchain_dl1_to_dl2
 
 .. _lstchain_dl1ab:
 
diff --git a/docs/lstchain_api/tools/index.rst b/docs/lstchain_api/tools/index.rst
index 92101749e5..c475ad7abf 100644
--- a/docs/lstchain_api/tools/index.rst
+++ b/docs/lstchain_api/tools/index.rst
@@ -24,3 +24,5 @@ Reference/API
    :no-inheritance-diagram:
 .. automodapi:: lstchain.tools.lstchain_fit_intensity_scan
    :no-inheritance-diagram:
+.. automodapi:: lstchain.scripts.lstchain_dl1_to_dl2
+   :no-inheritance-diagram:

From d7f6d9d6bbd37114bc6c9ff162e5e0dab26184b0 Mon Sep 17 00:00:00 2001
From: Thomas Vuillaume <thomas.vuillaume@lapp.in2p3.fr>
Date: Fri, 31 Jan 2025 11:01:49 +0100
Subject: [PATCH 07/17] rm dl1_to_dl2 in doc list

---
 docs/lstchain_api/scripts/index.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/lstchain_api/scripts/index.rst b/docs/lstchain_api/scripts/index.rst
index 9a317edc29..42db059a93 100644
--- a/docs/lstchain_api/scripts/index.rst
+++ b/docs/lstchain_api/scripts/index.rst
@@ -14,7 +14,6 @@ The scripts to be executed from the command line are described below:
 * `lstchain_data_create_time_calibration_file`_
 * `lstchain_data_r0_to_dl1`_
 * `lstchain_dl1_muon_analysis`_
-* `lstchain_dl1_to_dl2`_
 * `lstchain_dl1ab`_
 * `lstchain_dump_config`_
 * `lstchain_find_pedestals`_

From 72d831f814787d5226318bd1f87e995d13d6d556 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Mon, 3 Feb 2025 15:00:04 +0100
Subject: [PATCH 08/17] default => default_value in traits

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index f86f78f2bb..323a895ee5 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -72,17 +72,17 @@ class DL1ToDL2Tool(Tool):
 
     path_models = traits.Path(
         help="Path where to find the trained RF",
-        default='./trained_models',
+        default_value='./trained_models',
     ).tag(config=True)
 
     output_dir = traits.Path(
         help="Path where to store the reco dl2 events",
-        default='./dl2_data',
+        default_value='./dl2_data',
     ).tag(config=True)
 
     config_file = traits.Path(
         help="Path to a configuration file. If none is given, a standard configuration is applied",
-        default=None,
+        default_value=None,
     ).tag(config=True)
 
     aliases = {

From 15f49b3b9fc5461fa18b943f5ecfd31893f3c8c0 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Mon, 3 Feb 2025 15:04:10 +0100
Subject: [PATCH 09/17] Allow None as default for config file

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 323a895ee5..11f5c59e09 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -81,6 +81,7 @@ class DL1ToDL2Tool(Tool):
     ).tag(config=True)
 
     config_file = traits.Path(
+        allow_none=True,
         help="Path to a configuration file. If none is given, a standard configuration is applied",
         default_value=None,
     ).tag(config=True)

From 73c6e012d77ab724207f8c2dc55bca1b329f1dbb Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Mon, 3 Feb 2025 20:12:47 +0100
Subject: [PATCH 10/17] Update test_lstchain_scripts.py

Apparently traits.List requires one switch "-f" per input file. I do not know if this is intended.
---
 lstchain/scripts/tests/test_lstchain_scripts.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py
index f3db91106d..073e731645 100644
--- a/lstchain/scripts/tests/test_lstchain_scripts.py
+++ b/lstchain/scripts/tests/test_lstchain_scripts.py
@@ -366,6 +366,7 @@ def test_lstchain_merged_dl1_to_dl2(
         "lstchain_dl1_to_dl2",
         "-f",
         simulated_dl1_file_,
+        "-f",
         merged_simulated_dl1_file,
         "-p",
         rf_models["path"],

From df36e4102ec4183199ab188d5a8d8de584bca797 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Tue, 4 Feb 2025 09:40:07 +0100
Subject: [PATCH 11/17] Moved dl1_to_dl2 tool to the tools folder

---
 lstchain/{scripts => tools}/lstchain_dl1_to_dl2.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename lstchain/{scripts => tools}/lstchain_dl1_to_dl2.py (100%)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/tools/lstchain_dl1_to_dl2.py
similarity index 100%
rename from lstchain/scripts/lstchain_dl1_to_dl2.py
rename to lstchain/tools/lstchain_dl1_to_dl2.py

From e99e7cc82832d8bc7fae8bcd8c99c115a9dfd319 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Tue, 4 Feb 2025 09:55:50 +0100
Subject: [PATCH 12/17] scripts => tools  (lstchain_dl1_to_dl2 docs)

---
 docs/introduction.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/introduction.rst b/docs/introduction.rst
index 3f8dda8d10..8e8e426d74 100644
--- a/docs/introduction.rst
+++ b/docs/introduction.rst
@@ -78,7 +78,7 @@ Here is an example configuration file for this step.
 DL1 to DL2
 ----------
 
-Use ``lstchain.scripts.lstchain_dl1_to_dl2`` for real data and MC.
+Use ``lstchain.tools.lstchain_dl1_to_dl2`` for real data and MC.
 
 For more information, try ``--help`` or see the :doc:`lstchain_api/index`.
 

From bbe8796b9e127f51b1fc18554d67d1b26cf6f9b1 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Tue, 4 Feb 2025 10:45:53 +0100
Subject: [PATCH 13/17] Make --overwrite switch work

---
 lstchain/tools/lstchain_dl1_to_dl2.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/lstchain/tools/lstchain_dl1_to_dl2.py b/lstchain/tools/lstchain_dl1_to_dl2.py
index 11f5c59e09..63ea12ded6 100644
--- a/lstchain/tools/lstchain_dl1_to_dl2.py
+++ b/lstchain/tools/lstchain_dl1_to_dl2.py
@@ -133,7 +133,8 @@ def start(self):
                 models_dict[models_key] = joblib.load(models_path)
 
         for input_dl1file in self.input_files:
-            output_filepath = apply_to_file(input_dl1file, models_dict, self.output_dir, config, self.path_models)
+            output_filepath = apply_to_file(input_dl1file, models_dict, self.output_dir, config, 
+                                            self.path_models, self.overwrite)
             p = Provenance()
             p.add_input_file(input_dl1file, role='dl1 input file')
             p.add_output_file(output_filepath, role='dl2 output file')
@@ -141,7 +142,7 @@ def start(self):
             write_provenance(output_filepath, 'dl1_to_dl2')
                 
 
-def apply_to_file(filename, models_dict, output_dir, config, models_path):
+def apply_to_file(filename, models_dict, output_dir, config, models_path, overwrite=False):
     """
     Applies models to the data in the specified file and writes the output to a new file in the output directory.
 
@@ -155,6 +156,11 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
     output_dir = Path(output_dir)
     output_dir.mkdir(exist_ok=True, parents=True)
     dl2_output_file = output_dir.joinpath(dl2_filename(filename.name))
+
+    # Remove previous file if overwrite option is used:
+    if overwrite:
+        dl2_output_file.unlink(missing_ok=True)
+
     if dl2_output_file.exists():
         raise IOError(str(dl2_output_file) + ' exists, exiting.')
 

From 5d8c05544d61400acc2815d56934c23c8c279485 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Tue, 4 Feb 2025 15:00:33 +0100
Subject: [PATCH 14/17] tool name

---
 lstchain/tools/lstchain_dl1_to_dl2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/tools/lstchain_dl1_to_dl2.py b/lstchain/tools/lstchain_dl1_to_dl2.py
index 63ea12ded6..1c403f3a5e 100644
--- a/lstchain/tools/lstchain_dl1_to_dl2.py
+++ b/lstchain/tools/lstchain_dl1_to_dl2.py
@@ -62,7 +62,7 @@ def dl2_filename(dl1_filename):
     return dl1_filename.replace('dl1', 'dl2', 1)
 
 class DL1ToDL2Tool(Tool):
-    name = "DL1 to DL2 Tool"
+    name = "lstchain_dl1_to_dl2"
     description = __doc__
 
     input_files = traits.List(

From 5e0640fef02663a283ffedd0adb0cd524784643b Mon Sep 17 00:00:00 2001
From: Daniel Morcuende <dmorcuende@iaa.es>
Date: Tue, 4 Feb 2025 15:08:40 +0100
Subject: [PATCH 15/17] add DL1toDL2 to module's __all__

---
 lstchain/tools/lstchain_dl1_to_dl2.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lstchain/tools/lstchain_dl1_to_dl2.py b/lstchain/tools/lstchain_dl1_to_dl2.py
index 1c403f3a5e..8d45ea7fdb 100644
--- a/lstchain/tools/lstchain_dl1_to_dl2.py
+++ b/lstchain/tools/lstchain_dl1_to_dl2.py
@@ -45,6 +45,9 @@
 logger = logging.getLogger(__name__)
 
 
+__all__ = ["DL1ToDL2Tool"]
+
+
 def dl2_filename(dl1_filename):
     """
     Create the name of the DL2 file from the DL1 file name.

From 1acdbe0292c43d8ffdbf5361934de3750be5be6e Mon Sep 17 00:00:00 2001
From: Daniel Morcuende <dmorcuende@iaa.es>
Date: Tue, 4 Feb 2025 15:09:10 +0100
Subject: [PATCH 16/17] change script by tool in automodapi

---
 docs/lstchain_api/tools/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/lstchain_api/tools/index.rst b/docs/lstchain_api/tools/index.rst
index c475ad7abf..2a0aa21cb4 100644
--- a/docs/lstchain_api/tools/index.rst
+++ b/docs/lstchain_api/tools/index.rst
@@ -24,5 +24,5 @@ Reference/API
    :no-inheritance-diagram:
 .. automodapi:: lstchain.tools.lstchain_fit_intensity_scan
    :no-inheritance-diagram:
-.. automodapi:: lstchain.scripts.lstchain_dl1_to_dl2
+.. automodapi:: lstchain.tools.lstchain_dl1_to_dl2
    :no-inheritance-diagram:

From 57af1bbf6c9a078499e71c578e7ae6b200673c85 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Tue, 4 Feb 2025 15:55:46 +0100
Subject: [PATCH 17/17] Fill provenance earlier

---
 lstchain/tools/lstchain_dl1_to_dl2.py | 38 +++++++++++++++------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/lstchain/tools/lstchain_dl1_to_dl2.py b/lstchain/tools/lstchain_dl1_to_dl2.py
index 8d45ea7fdb..2f514eb531 100644
--- a/lstchain/tools/lstchain_dl1_to_dl2.py
+++ b/lstchain/tools/lstchain_dl1_to_dl2.py
@@ -134,38 +134,42 @@ def start(self):
                 models_dict[models_key] = models_path
             else:
                 models_dict[models_key] = joblib.load(models_path)
-
+                
+        output_dir = Path(self.output_dir)
+        output_dir.mkdir(exist_ok=True, parents=True)
+        
         for input_dl1file in self.input_files:
-            output_filepath = apply_to_file(input_dl1file, models_dict, self.output_dir, config, 
-                                            self.path_models, self.overwrite)
+            dl2_output_file = output_dir.joinpath(dl2_filename(input_dl1file.name))
+
             p = Provenance()
             p.add_input_file(input_dl1file, role='dl1 input file')
-            p.add_output_file(output_filepath, role='dl2 output file')
+            p.add_output_file(dl2_output_file, role='dl2 output file')
             p.add_input_file(self.path_models, role='trained model directory')
-            write_provenance(output_filepath, 'dl1_to_dl2')
+
+            # Remove previous file if overwrite option is used:
+            if self.overwrite:
+                dl2_output_file.unlink(missing_ok=True)
+
+            if dl2_output_file.exists():
+                raise IOError(str(dl2_output_file) + ' exists, exiting.')
+
+            write_provenance(dl2_output_file, 'dl1_to_dl2')
+            
+            apply_to_file(input_dl1file, models_dict, dl2_output_file, config, 
+                          self.path_models)
                 
 
-def apply_to_file(filename, models_dict, output_dir, config, models_path, overwrite=False):
+def apply_to_file(filename, models_dict, dl2_output_file, config, models_path):
     """
     Applies models to the data in the specified file and writes the output to a new file in the output directory.
 
     Parameters:
     - filename (Path or str): The path to the input file.
     - models_dict (dict): A dictionary containing the models to be applied.
-    - output_dir (Path or str): The path for the output directory.
+    - dl2_output_file (Path or str): The path for the output DL2 file.
     - config (dict): The configuration dictionary containing parameters for the processing.
     - models_path (Path or str): The path to the directory containing the trained models.
     """
-    output_dir = Path(output_dir)
-    output_dir.mkdir(exist_ok=True, parents=True)
-    dl2_output_file = output_dir.joinpath(dl2_filename(filename.name))
-
-    # Remove previous file if overwrite option is used:
-    if overwrite:
-        dl2_output_file.unlink(missing_ok=True)
-
-    if dl2_output_file.exists():
-        raise IOError(str(dl2_output_file) + ' exists, exiting.')
 
     data = pd.read_hdf(filename, key=dl1_params_lstcam_key)