Skip to content

Commit

Permalink
feat: Use a single handler argument on uproot.reading.open (#971)
Browse files Browse the repository at this point in the history
* single argument for handlers

* style: pre-commit fixes

* fix missing object source

* add deprecation warnings for handler options

* style: pre-commit fixes

* use handler instead of *_handler

* use file in skhep testdata

* remove hyphen

* use tag instead of branch (#973 (comment))

* remove network pytest mark (#973 (comment))

* use proper http links instead of local paths

* use fsspec to split url

* add comment

* fix bad strip

* working github test

* add test for path split object

* Update src/uproot/_util.py

Co-authored-by: Jim Pivarski <jpivarski@users.noreply.github.com>

* use urllib instead of fsspec for url parsing

* move tests to dedicated file

* do not shadow `object`

* add more test cases

* fsspec source as default

* fix test

* add xrootd handler default

* correctly strip obj

* revert merge

* Revert "revert merge"

This reverts commit 60300d6.

* Revert "Revert "revert merge""

This reverts commit 2884e02.

* update docstrings

* update docstrings

* direct the user to the handler option in docs

* explain order of handler options

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jim Pivarski <jpivarski@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 12, 2023
1 parent f999bd9 commit 80718ab
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 51 deletions.
13 changes: 7 additions & 6 deletions src/uproot/_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,17 +133,18 @@ def dask(
Options (type; default):
* file_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.file.MemmapSource`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.xrootd.XRootDSource`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.s3.S3Source`)
* http_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.http.HTTPSource`)
* object_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.object.ObjectSource`)
* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
* use_threads (bool; False on the emscripten platform (i.e. in a web browser), else True)
* num_fallback_workers (int; 10)
* begin_chunk_size (memory_size; 512)
* begin_chunk_size (memory_size; 403, the smallest a ROOT file can be)
* minimal_ttree_metadata (bool; True)
Other file entry points:
Expand Down
104 changes: 104 additions & 0 deletions src/uproot/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,22 +315,65 @@ def file_object_path_split(path):
def file_path_to_source_class(file_path, options):
"""
Use a file path to get the :doc:`uproot.source.chunk.Source` class that would read it.
Returns a tuple of (class, file_path) where the class is a subclass of :doc:`uproot.source.chunk.Source`.
The "handler" option is the preferred way to specify a custom source class.
The "*_handler" options are for backwards compatibility and will override the "handler" option if set.
"""
import uproot.source.chunk

file_path = regularize_path(file_path)

out = options["handler"]
if out is not None:
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'handler' is not a class object inheriting from Source: " + repr(out)
)
# check if "object_handler" is set
if (
options["object_handler"] is not None
or options["file_handler"] is not None
or options["xrootd_handler"] is not None
or options["s3_handler"] is not None
or options["http_handler"] is not None
):
# These options will override the "handler" option for backwards compatibility
warnings.warn(
"""In version 5.2.0, the '*_handler' argument ('http_handler`, 's3_handler', etc.) will be removed from 'uproot.open'. Use 'handler' instead.""",
stacklevel=1,
)
else:
return out, file_path

if (
not isstr(file_path)
and hasattr(file_path, "read")
and hasattr(file_path, "seek")
):
out = options["object_handler"]
if out is None:
out = uproot.source.object.ObjectSource
else:
warnings.warn(
f"""In version 5.2.0, the 'object_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r})
instead.
To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'object_handler' is not a class object inheriting from Source: "
+ repr(out)
)

return out, file_path

windows_absolute_path = None
Expand Down Expand Up @@ -363,6 +406,22 @@ def file_path_to_source_class(file_path, options):
file_path = windows_absolute_path

out = options["file_handler"]
if out is None:
out = uproot.source.file.MemmapSource
else:
warnings.warn(
f"""In version 5.2.0, the 'file_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r}
instead.
To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)

if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'file_handler' is not a class object inheriting from Source: "
Expand All @@ -372,6 +431,21 @@ def file_path_to_source_class(file_path, options):

elif parsed_url.scheme.upper() == "ROOT":
out = options["xrootd_handler"]
if out is None:
out = uproot.source.root.XRootDSource
else:
warnings.warn(
f"""In version 5.2.0, the 'xrootd_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r}
instead.
To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'xrootd_handler' is not a class object inheriting from Source: "
Expand All @@ -381,6 +455,21 @@ def file_path_to_source_class(file_path, options):

elif parsed_url.scheme.upper() in {"S3"}:
out = options["s3_handler"]
if out is None:
out = uproot.source.s3.S3Source
else:
warnings.warn(
f"""In version 5.2.0, the 's3_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r}
instead.
To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'s3' is not a class object inheriting from Source: " + repr(out)
Expand All @@ -389,6 +478,21 @@ def file_path_to_source_class(file_path, options):

elif parsed_url.scheme.upper() in {"HTTP", "HTTPS"}:
out = options["http_handler"]
if out is None:
out = uproot.source.http.HTTPSource
else:
warnings.warn(
f"""In version 5.2.0, the 'http_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r}
instead.
To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'http_handler' is not a class object inheriting from Source: "
Expand Down
26 changes: 14 additions & 12 deletions src/uproot/behaviors/TBranch.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,17 +152,18 @@ def iterate(
Options (type; default):
* file_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.file.MemmapSource`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.xrootd.XRootDSource`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.s3.S3Source`)
* http_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.http.HTTPSource`)
* object_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.object.ObjectSource`)
* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
* use_threads (bool; False on the emscripten platform (i.e. in a web browser), else True)
* num_fallback_workers (int; 10)
* begin_chunk_size (memory_size; 512)
* begin_chunk_size (memory_size; 403, the smallest a ROOT file can be)
* minimal_ttree_metadata (bool; True)
See also :ref:`uproot.behaviors.TBranch.HasBranches.iterate` to iterate
Expand Down Expand Up @@ -325,17 +326,18 @@ def concatenate(
Options (type; default):
* file_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.file.MemmapSource`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.xrootd.XRootDSource`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.s3.S3Source`)
* http_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.http.HTTPSource`)
* object_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.object.ObjectSource`)
* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
* use_threads (bool; False on the emscripten platform (i.e. in a web browser), else True)
* num_fallback_workers (int; 10)
* begin_chunk_size (memory_size; 512)
* begin_chunk_size (memory_size; 403, the smallest a ROOT file can be)
* minimal_ttree_metadata (bool; True)
Other file entry points:
Expand Down
34 changes: 18 additions & 16 deletions src/uproot/reading.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
and :doc:`uproot.reading.ReadOnlyKey` (``TKey``).
"""


import struct
import sys
import uuid
Expand Down Expand Up @@ -77,11 +76,12 @@ def open(
Options (type; default):
* file_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.file.MemmapSource`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.xrootd.XRootDSource`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.s3.S3Source`)
* http_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.http.HTTPSource`)
* object_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.object.ObjectSource`)
* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
Expand Down Expand Up @@ -178,10 +178,12 @@ def __getitem__(self, where):

open.defaults = _OpenDefaults(
{
"file_handler": uproot.source.file.MemmapSource,
"s3_handler": uproot.source.s3.S3Source,
"http_handler": uproot.source.http.HTTPSource,
"object_handler": uproot.source.object.ObjectSource,
"handler": None, # To be updated to fsspec source
"file_handler": None, # Deprecated
"s3_handler": None, # Deprecated
"http_handler": None, # Deprecated
"object_handler": None, # Deprecated
"xrootd_handler": None, # Deprecated
"timeout": 30,
"max_num_elements": None,
"num_workers": 1,
Expand All @@ -192,7 +194,6 @@ def __getitem__(self, where):
}
)


must_be_attached = [
"TROOT",
"TDirectory",
Expand Down Expand Up @@ -534,11 +535,12 @@ class ReadOnlyFile(CommonFileMethods):
Options (type; default):
* file_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.file.MemmapSource`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.xrootd.XRootDSource`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.xrootd.S3Source`)
* http_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.http.HTTPSource`)
* object_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.object.ObjectSource`)
* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
Expand Down
24 changes: 12 additions & 12 deletions tests/test_0017-multi-basket-multi-branch-fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,13 @@ def test_ranges_or_baskets_to_arrays():


@pytest.mark.parametrize(
"file_handler",
"handler",
[uproot.source.file.MultithreadedFileSource, uproot.source.file.MemmapSource],
)
def test_branch_array_1(file_handler):
def test_branch_array_1(handler):
with uproot.open(
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"),
file_handler=file_handler,
handler=handler,
)["sample/i4"] as branch:
assert branch.array(
uproot.interpretation.numerical.AsDtype(">i4"), library="np"
Expand Down Expand Up @@ -210,13 +210,13 @@ def test_branch_array_1(file_handler):


@pytest.mark.parametrize(
"file_handler",
"handler",
[uproot.source.file.MultithreadedFileSource, uproot.source.file.MemmapSource],
)
def test_branch_array_2(file_handler):
def test_branch_array_2(handler):
with uproot.open(
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"),
file_handler=file_handler,
handler=handler,
)["sample/i4"] as branch:
assert branch.array(
uproot.interpretation.numerical.AsDtype(">i4"),
Expand Down Expand Up @@ -250,14 +250,14 @@ def test_branch_array_2(file_handler):


@pytest.mark.parametrize(
"file_handler",
"handler",
[uproot.source.file.MultithreadedFileSource, uproot.source.file.MemmapSource],
)
def test_branch_array_3(file_handler):
def test_branch_array_3(handler):
executor = uproot.ThreadPoolExecutor()
with uproot.open(
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"),
file_handler=file_handler,
handler=handler,
interpretation_executor=executor,
decompression_executor=executor,
)["sample/i4"] as branch:
Expand Down Expand Up @@ -293,13 +293,13 @@ def test_branch_array_3(file_handler):


@pytest.mark.parametrize(
"file_handler",
"handler",
[uproot.source.file.MultithreadedFileSource, uproot.source.file.MemmapSource],
)
def test_branch_array_4(file_handler):
def test_branch_array_4(handler):
with uproot.open(
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"),
file_handler=file_handler,
handler=handler,
)["sample/i4"] as branch:
with pytest.raises(ValueError):
branch.array(uproot.interpretation.numerical.AsDtype(">i8"), library="np")
Expand Down
Loading

0 comments on commit 80718ab

Please sign in to comment.