Skip to content

Commit

Permalink
allow the same name to contain multiple datasets in fetch_datasets.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ikrommyd committed Dec 13, 2024
1 parent 148d1dc commit 48de170
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions scripts/fetch_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,12 @@ def get_dataset_dict_grid(fset: Iterable[Iterable[str]], xrd: str, dbs_instance:
logger.error(f"Unexpected error while fetching files for dataset '{dataset}': {e}")
raise e

# Append xrootd prefix to each file path
flist = [xrd + f for f in flist if f.strip()]

# Store in the desired JSON format
fdict[name] = {"files": {file_path: "Events" for file_path in flist}}
if name not in fdict:
fdict[name] = {"files": {file_path: "Events" for file_path in flist}}
else:
fdict[name]["files"].update({file_path: "Events" for file_path in flist})
logger.info(f"Found {len(flist)} files for dataset '{name}'.")

return fdict
Expand Down Expand Up @@ -167,7 +168,10 @@ def get_dataset_dict_local(fset: Iterable[Iterable[str]], recursive: bool, exten
for file in directory.glob(pattern)
if file.is_file() and (not extensions or file.suffix.lower() in [ext.lower() for ext in extensions])
]
fdict[name] = {"files": {file_path: "Events" for file_path in files}}
if name not in fdict:
fdict[name] = {"files": {file_path: "Events" for file_path in files}}
else:
fdict[name]["files"].update({file_path: "Events" for file_path in files})
logger.info(f"Found {len(files)} files for local dataset '{name}'.")

except Exception as e:
Expand Down

0 comments on commit 48de170

Please sign in to comment.