From 48de170280aeebc93a92e374e047e665c01d3c0d Mon Sep 17 00:00:00 2001 From: Iason Krommydas Date: Fri, 13 Dec 2024 04:12:01 -0600 Subject: [PATCH] allow the same name to contain multiple datasets in fetch_datasets.py --- scripts/fetch_datasets.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/fetch_datasets.py b/scripts/fetch_datasets.py index d9d95d9..138bd08 100644 --- a/scripts/fetch_datasets.py +++ b/scripts/fetch_datasets.py @@ -131,11 +131,12 @@ def get_dataset_dict_grid(fset: Iterable[Iterable[str]], xrd: str, dbs_instance: logger.error(f"Unexpected error while fetching files for dataset '{dataset}': {e}") raise e - # Append xrootd prefix to each file path flist = [xrd + f for f in flist if f.strip()] - # Store in the desired JSON format - fdict[name] = {"files": {file_path: "Events" for file_path in flist}} + if name not in fdict: + fdict[name] = {"files": {file_path: "Events" for file_path in flist}} + else: + fdict[name]["files"].update({file_path: "Events" for file_path in flist}) logger.info(f"Found {len(flist)} files for dataset '{name}'.") return fdict @@ -167,7 +168,10 @@ def get_dataset_dict_local(fset: Iterable[Iterable[str]], recursive: bool, exten for file in directory.glob(pattern) if file.is_file() and (not extensions or file.suffix.lower() in [ext.lower() for ext in extensions]) ] - fdict[name] = {"files": {file_path: "Events" for file_path in files}} + if name not in fdict: + fdict[name] = {"files": {file_path: "Events" for file_path in files}} + else: + fdict[name]["files"].update({file_path: "Events" for file_path in files}) logger.info(f"Found {len(files)} files for local dataset '{name}'.") except Exception as e: