From 80718abb50a2c911eca5e179959b4f569b441beb Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio <35803280+lobis@users.noreply.github.com> Date: Thu, 12 Oct 2023 14:52:20 -0500 Subject: [PATCH] feat: Use a single `handler` argument on `uproot.reading.open` (#971) * single argument for handlers * style: pre-commit fixes * fix missing object source * add deprecation warnings for handler options * style: pre-commit fixes * use handler instead of *_handler * use file in skhep testdata * remove hyphen * use tag instead of branch (https://github.com/scikit-hep/uproot5/pull/973#discussion_r1346510055) * remove network pytest mark (https://github.com/scikit-hep/uproot5/pull/973#discussion_r1346504192) * use proper http links instead of local paths * use fsspec to split url * add comment * fix bad strip * working github test * add test for path split object * Update src/uproot/_util.py Co-authored-by: Jim Pivarski * use urllib instead of fsspec for url parsing * move tests to dedicated file * do not shadow `object` * add more test cases * fsspec source as default * fix test * add xrootd handler default * correctly strip obj * revert merge * Revert "revert merge" This reverts commit 60300d63b943083f0a2579917c7c76a24edb7bf7. * Revert "Revert "revert merge"" This reverts commit 2884e02d82b7eb8d57b2da1c28b5c7ac6a9d6bb8. * update docstrings * update docstrings * direct the user to the handler option in docs * explain order of handler options --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jim Pivarski --- src/uproot/_dask.py | 13 ++- src/uproot/_util.py | 104 ++++++++++++++++++ src/uproot/behaviors/TBranch.py | 26 +++-- src/uproot/reading.py | 34 +++--- ...st_0017-multi-basket-multi-branch-fetch.py | 24 ++-- tests/test_0692_fsspec.py | 10 +- 6 files changed, 160 insertions(+), 51 deletions(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index 2a2a11f62..3d77d5f78 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -133,17 +133,18 @@ def dask( Options (type; default): - * file_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.file.MemmapSource`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.xrootd.XRootDSource`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.s3.S3Source`) - * http_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.http.HTTPSource`) - * object_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.object.ObjectSource`) + * handler (:doc:`uproot.source.chunk.Source` class; None) + * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) * use_threads (bool; False on the emscripten platform (i.e. in a web browser), else True) * num_fallback_workers (int; 10) - * begin_chunk_size (memory_size; 512) + * begin_chunk_size (memory_size; 403, the smallest a ROOT file can be) * minimal_ttree_metadata (bool; True) Other file entry points: diff --git a/src/uproot/_util.py b/src/uproot/_util.py index b45b47ac0..bf946ea3d 100644 --- a/src/uproot/_util.py +++ b/src/uproot/_util.py @@ -315,22 +315,65 @@ def file_object_path_split(path): def file_path_to_source_class(file_path, options): """ Use a file path to get the :doc:`uproot.source.chunk.Source` class that would read it. + + Returns a tuple of (class, file_path) where the class is a subclass of :doc:`uproot.source.chunk.Source`. + + The "handler" option is the preferred way to specify a custom source class. + The "*_handler" options are for backwards compatibility and will override the "handler" option if set. """ import uproot.source.chunk file_path = regularize_path(file_path) + out = options["handler"] + if out is not None: + if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): + raise TypeError( + "'handler' is not a class object inheriting from Source: " + repr(out) + ) + # check if "object_handler" is set + if ( + options["object_handler"] is not None + or options["file_handler"] is not None + or options["xrootd_handler"] is not None + or options["s3_handler"] is not None + or options["http_handler"] is not None + ): + # These options will override the "handler" option for backwards compatibility + warnings.warn( + """In version 5.2.0, the '*_handler' argument ('http_handler`, 's3_handler', etc.) will be removed from 'uproot.open'. Use 'handler' instead.""", + stacklevel=1, + ) + else: + return out, file_path + if ( not isstr(file_path) and hasattr(file_path, "read") and hasattr(file_path, "seek") ): out = options["object_handler"] + if out is None: + out = uproot.source.object.ObjectSource + else: + warnings.warn( + f"""In version 5.2.0, the 'object_handler' argument will be removed from 'uproot.open'. Use +uproot.open(..., handler={out!r}) +instead. + +To raise these warnings as errors (and get stack traces to find out where they're called), run +import warnings +warnings.filterwarnings("error", module="uproot.*") +after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", + DeprecationWarning, + stacklevel=1, + ) if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): raise TypeError( "'object_handler' is not a class object inheriting from Source: " + repr(out) ) + return out, file_path windows_absolute_path = None @@ -363,6 +406,22 @@ def file_path_to_source_class(file_path, options): file_path = windows_absolute_path out = options["file_handler"] + if out is None: + out = uproot.source.file.MemmapSource + else: + warnings.warn( + f"""In version 5.2.0, the 'file_handler' argument will be removed from 'uproot.open'. Use + uproot.open(..., handler={out!r} + instead. + + To raise these warnings as errors (and get stack traces to find out where they're called), run + import warnings + warnings.filterwarnings("error", module="uproot.*") + after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", + DeprecationWarning, + stacklevel=1, + ) + if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): raise TypeError( "'file_handler' is not a class object inheriting from Source: " @@ -372,6 +431,21 @@ def file_path_to_source_class(file_path, options): elif parsed_url.scheme.upper() == "ROOT": out = options["xrootd_handler"] + if out is None: + out = uproot.source.root.XRootDSource + else: + warnings.warn( + f"""In version 5.2.0, the 'xrootd_handler' argument will be removed from 'uproot.open'. Use + uproot.open(..., handler={out!r} + instead. + + To raise these warnings as errors (and get stack traces to find out where they're called), run + import warnings + warnings.filterwarnings("error", module="uproot.*") + after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", + DeprecationWarning, + stacklevel=1, + ) if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): raise TypeError( "'xrootd_handler' is not a class object inheriting from Source: " @@ -381,6 +455,21 @@ def file_path_to_source_class(file_path, options): elif parsed_url.scheme.upper() in {"S3"}: out = options["s3_handler"] + if out is None: + out = uproot.source.s3.S3Source + else: + warnings.warn( + f"""In version 5.2.0, the 's3_handler' argument will be removed from 'uproot.open'. Use +uproot.open(..., handler={out!r} +instead. + +To raise these warnings as errors (and get stack traces to find out where they're called), run +import warnings +warnings.filterwarnings("error", module="uproot.*") +after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", + DeprecationWarning, + stacklevel=1, + ) if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): raise TypeError( "'s3' is not a class object inheriting from Source: " + repr(out) @@ -389,6 +478,21 @@ def file_path_to_source_class(file_path, options): elif parsed_url.scheme.upper() in {"HTTP", "HTTPS"}: out = options["http_handler"] + if out is None: + out = uproot.source.http.HTTPSource + else: + warnings.warn( + f"""In version 5.2.0, the 'http_handler' argument will be removed from 'uproot.open'. Use +uproot.open(..., handler={out!r} +instead. + +To raise these warnings as errors (and get stack traces to find out where they're called), run +import warnings +warnings.filterwarnings("error", module="uproot.*") +after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", + DeprecationWarning, + stacklevel=1, + ) if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): raise TypeError( "'http_handler' is not a class object inheriting from Source: " diff --git a/src/uproot/behaviors/TBranch.py b/src/uproot/behaviors/TBranch.py index c277d3b30..bb66be634 100644 --- a/src/uproot/behaviors/TBranch.py +++ b/src/uproot/behaviors/TBranch.py @@ -152,17 +152,18 @@ def iterate( Options (type; default): - * file_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.file.MemmapSource`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.xrootd.XRootDSource`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.s3.S3Source`) - * http_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.http.HTTPSource`) - * object_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.object.ObjectSource`) + * handler (:doc:`uproot.source.chunk.Source` class; None) + * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) * use_threads (bool; False on the emscripten platform (i.e. in a web browser), else True) * num_fallback_workers (int; 10) - * begin_chunk_size (memory_size; 512) + * begin_chunk_size (memory_size; 403, the smallest a ROOT file can be) * minimal_ttree_metadata (bool; True) See also :ref:`uproot.behaviors.TBranch.HasBranches.iterate` to iterate @@ -325,17 +326,18 @@ def concatenate( Options (type; default): - * file_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.file.MemmapSource`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.xrootd.XRootDSource`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.s3.S3Source`) - * http_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.http.HTTPSource`) - * object_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.object.ObjectSource`) + * handler (:doc:`uproot.source.chunk.Source` class; None) + * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) * use_threads (bool; False on the emscripten platform (i.e. in a web browser), else True) * num_fallback_workers (int; 10) - * begin_chunk_size (memory_size; 512) + * begin_chunk_size (memory_size; 403, the smallest a ROOT file can be) * minimal_ttree_metadata (bool; True) Other file entry points: diff --git a/src/uproot/reading.py b/src/uproot/reading.py index 83d9487cb..2620245ed 100644 --- a/src/uproot/reading.py +++ b/src/uproot/reading.py @@ -8,7 +8,6 @@ and :doc:`uproot.reading.ReadOnlyKey` (``TKey``). """ - import struct import sys import uuid @@ -77,11 +76,12 @@ def open( Options (type; default): - * file_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.file.MemmapSource`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.xrootd.XRootDSource`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.s3.S3Source`) - * http_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.http.HTTPSource`) - * object_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.object.ObjectSource`) + * handler (:doc:`uproot.source.chunk.Source` class; None) + * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) @@ -178,10 +178,12 @@ def __getitem__(self, where): open.defaults = _OpenDefaults( { - "file_handler": uproot.source.file.MemmapSource, - "s3_handler": uproot.source.s3.S3Source, - "http_handler": uproot.source.http.HTTPSource, - "object_handler": uproot.source.object.ObjectSource, + "handler": None, # To be updated to fsspec source + "file_handler": None, # Deprecated + "s3_handler": None, # Deprecated + "http_handler": None, # Deprecated + "object_handler": None, # Deprecated + "xrootd_handler": None, # Deprecated "timeout": 30, "max_num_elements": None, "num_workers": 1, @@ -192,7 +194,6 @@ def __getitem__(self, where): } ) - must_be_attached = [ "TROOT", "TDirectory", @@ -534,11 +535,12 @@ class ReadOnlyFile(CommonFileMethods): Options (type; default): - * file_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.file.MemmapSource`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.xrootd.XRootDSource`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.xrootd.S3Source`) - * http_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.http.HTTPSource`) - * object_handler (:doc:`uproot.source.chunk.Source` class; :doc:`uproot.source.object.ObjectSource`) + * handler (:doc:`uproot.source.chunk.Source` class; None) + * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) + * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) diff --git a/tests/test_0017-multi-basket-multi-branch-fetch.py b/tests/test_0017-multi-basket-multi-branch-fetch.py index c8df24263..b42f6db4b 100644 --- a/tests/test_0017-multi-basket-multi-branch-fetch.py +++ b/tests/test_0017-multi-basket-multi-branch-fetch.py @@ -165,13 +165,13 @@ def test_ranges_or_baskets_to_arrays(): @pytest.mark.parametrize( - "file_handler", + "handler", [uproot.source.file.MultithreadedFileSource, uproot.source.file.MemmapSource], ) -def test_branch_array_1(file_handler): +def test_branch_array_1(handler): with uproot.open( skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"), - file_handler=file_handler, + handler=handler, )["sample/i4"] as branch: assert branch.array( uproot.interpretation.numerical.AsDtype(">i4"), library="np" @@ -210,13 +210,13 @@ def test_branch_array_1(file_handler): @pytest.mark.parametrize( - "file_handler", + "handler", [uproot.source.file.MultithreadedFileSource, uproot.source.file.MemmapSource], ) -def test_branch_array_2(file_handler): +def test_branch_array_2(handler): with uproot.open( skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"), - file_handler=file_handler, + handler=handler, )["sample/i4"] as branch: assert branch.array( uproot.interpretation.numerical.AsDtype(">i4"), @@ -250,14 +250,14 @@ def test_branch_array_2(file_handler): @pytest.mark.parametrize( - "file_handler", + "handler", [uproot.source.file.MultithreadedFileSource, uproot.source.file.MemmapSource], ) -def test_branch_array_3(file_handler): +def test_branch_array_3(handler): executor = uproot.ThreadPoolExecutor() with uproot.open( skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"), - file_handler=file_handler, + handler=handler, interpretation_executor=executor, decompression_executor=executor, )["sample/i4"] as branch: @@ -293,13 +293,13 @@ def test_branch_array_3(file_handler): @pytest.mark.parametrize( - "file_handler", + "handler", [uproot.source.file.MultithreadedFileSource, uproot.source.file.MemmapSource], ) -def test_branch_array_4(file_handler): +def test_branch_array_4(handler): with uproot.open( skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"), - file_handler=file_handler, + handler=handler, )["sample/i4"] as branch: with pytest.raises(ValueError): branch.array(uproot.interpretation.numerical.AsDtype(">i8"), library="np") diff --git a/tests/test_0692_fsspec.py b/tests/test_0692_fsspec.py index eb6dd08c8..c426673e7 100644 --- a/tests/test_0692_fsspec.py +++ b/tests/test_0692_fsspec.py @@ -14,7 +14,7 @@ def test_open_fsspec_http(): with uproot.open( "https://github.com/scikit-hep/scikit-hep-testdata/raw/v0.4.33/src/skhep_testdata/data/uproot-issue121.root", - http_handler=uproot.source.fsspec.FSSpecSource, + handler=uproot.source.fsspec.FSSpecSource, ) as f: data = f["Events/MET_pt"].array(library="np") assert len(data) == 40 @@ -27,7 +27,7 @@ def test_open_fsspec_github(): ) with uproot.open( "github://scikit-hep:scikit-hep-testdata@v0.4.33/src/skhep_testdata/data/uproot-issue121.root", - http_handler=uproot.source.fsspec.FSSpecSource, + handler=uproot.source.fsspec.FSSpecSource, ) as f: data = f["Events/MET_pt"].array(library="np") assert len(data) == 40 @@ -38,7 +38,7 @@ def test_open_fsspec_local(tmp_path): with uproot.open( local_path, - file_handler=uproot.source.fsspec.FSSpecSource, + handler=uproot.source.fsspec.FSSpecSource, ) as f: data = f["Events/MET_pt"].array(library="np") assert len(data) == 40 @@ -51,7 +51,7 @@ def test_open_fsspec_s3(): with uproot.open( "s3://pivarski-princeton/pythia_ppZee_run17emb.picoDst.root:PicoDst", anon=True, - s3_handler=uproot.source.fsspec.FSSpecSource, + handler=uproot.source.fsspec.FSSpecSource, ) as f: data = f["Event/Event.mEventId"].array(library="np") assert len(data) == 8004 @@ -63,7 +63,7 @@ def test_open_fsspec_xrootd(): pytest.importorskip("XRootD") with uproot.open( "root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/Run2012B_DoubleMuParked.root", - xrootd_handler=uproot.source.fsspec.FSSpecSource, + handler=uproot.source.fsspec.FSSpecSource, ) as f: data = f["Events/run"].array(library="np", entry_stop=20) assert len(data) == 20