From 0a47c428e96178bdc0f177fae592c631efee3dd5 Mon Sep 17 00:00:00 2001 From: Lindsey Gray <lindsey.gray@gmail.com> Date: Tue, 2 Jan 2024 17:02:56 -0600 Subject: [PATCH] add known_base_form option so that opening root files can be avoided for mature analyses --- src/uproot/_dask.py | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index fd66335c4..1b3d16bae 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -42,6 +42,7 @@ def dask( open_files=True, form_mapping=None, allow_read_errors_with_report=False, + known_base_form=None, **options, ): """ @@ -99,6 +100,8 @@ def dask( empty array for these nodes in the task graph. The return of this function then becomes a two element tuple, where the first return is the dask-awkward collection of interest and the second return is a report dask-awkward collection. + known_base_form (awkward.forms.Form | None): If not none use this form instead of opening + one file to determine the dataset's form. Only available with open_files=False. options: See below. Returns dask equivalents of the backends supported by uproot. If ``library='np'``, @@ -200,6 +203,9 @@ def dask( else: steps_per_file = 1 + if known_base_form is not None and open_files: + raise TypeError("known_base_form must be None if open_files is True") + if library.name == "pd": raise NotImplementedError() @@ -279,6 +285,7 @@ def dask( form_mapping, steps_per_file, allow_read_errors_with_report, + known_base_form, ) else: raise NotImplementedError() @@ -1481,26 +1488,30 @@ def _get_dak_array_delay_open( form_mapping, steps_per_file, allow_read_errors_with_report, + known_base_form, ): dask_awkward = uproot.extras.dask_awkward() awkward = uproot.extras.awkward() ffile_path, fobject_path = files[0][0:2] - obj = uproot._util.regularize_object_path( - ffile_path, fobject_path, custom_classes, allow_missing, real_options - ) - common_keys = obj.keys( - recursive=recursive, - filter_name=filter_name, - filter_typename=filter_typename, - filter_branch=filter_branch, - full_paths=full_paths, - ) - - base_form = _get_ttree_form( - awkward, obj, common_keys, interp_options.get("ak_add_doc") - ) + if known_base_form is not None: + common_keys = list(known_base_form.fields) + base_form = known_base_form + else: + obj = uproot._util.regularize_object_path( + ffile_path, fobject_path, custom_classes, allow_missing, real_options + ) + common_keys = obj.keys( + recursive=recursive, + filter_name=filter_name, + filter_typename=filter_typename, + filter_branch=filter_branch, + full_paths=full_paths, + ) + base_form = _get_ttree_form( + awkward, obj, common_keys, interp_options.get("ak_add_doc") + ) divisions = [0] partition_args = []