nested directories

scikit-hep · Sep 18, 2017 · 565e927 · 565e927
1 parent cd5a7b5
commit 565e927
Show file tree

Hide file tree

Showing 5 changed files with 65 additions and 20 deletions.
diff --git a/tests/README.md b/tests/README.md
@@ -14,8 +14,8 @@
 | `std::vector`, `std::string` (???)                                        | **(unsure)** |
 | branch with "speed bumps"                                                 | &#x2713; |
 | all compression algorithms (none, zlib, lzma, lz4; ignoring "old")        | &#x2713; |
-| `TTree` versions from 16 (2009) to 19 (present)                           | &#x2713; |
-| nested directories                                                        | **no!** |
+| files from 2009 (`TTree` version 16) to present (`TTree` version 19)      | &#x2713; |
+| nested directories, cycle numbers, '/' and ';' notation                   | &#x2713; |
 | arrays interface                                                          | &#x2713; |
 | iterator interface                                                        | &#x2713; |
 | selection by list of branch names                                         | **no!** |

diff --git a/tests/nesteddirs.root b/tests/nesteddirs.root
diff --git a/tests/test_tree.py b/tests/test_tree.py
@@ -218,3 +218,28 @@ def test_tree_iterator4(self):
         for arrays in uproot.iterator(1, ["tests/foriter2.root", "tests/foriter2.root"], "foriter2"):
             self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 1])
             i += 1
+
+    def test_directories(self):
+        file = uproot.open("tests/nesteddirs.root")
+        self.assertEqual(file.contents, {b"one;1": b"TDirectory", b"three;1": b"TDirectory"})
+        self.assertEqual(file.allcontents, {b"one/two;1": b"TDirectory", b"one/two/tree;1": b"TTree", b"three/tree;1": b"TTree", b"one;1": b"TDirectory", b"one/tree;1": b"TTree", b"three;1": b"TDirectory"})
+
+        self.assertEqual(file["one"]["tree"].branchnames, [b"one", b"two", b"three"])
+        self.assertEqual(file["one"].get("tree", 1).branchnames, [b"one", b"two", b"three"])
+        self.assertEqual(file["one/tree;1"].branchnames, [b"one", b"two", b"three"])
+        self.assertEqual(file["one/two/tree;1"].branchnames, [b"Int32", b"Int64", b"UInt32", b"UInt64", b"Float32", b"Float64", b"Str", b"ArrayInt32", b"ArrayInt64", b"ArrayUInt32", b"ArrayUInt64", b"ArrayFloat32", b"ArrayFloat64", b"N", b"SliceInt32", b"SliceInt64", b"SliceUInt32", b"SliceUInt64", b"SliceFloat32", b"SliceFloat64"])
+        self.assertEqual(file["three/tree;1"].branchnames, [b"evt"])
+
+        self.assertEqual(dict((name, array.tolist()) for name, array in file["one/tree"].arrays(["one", "two", "three"]).items()), {b"one": [1, 2, 3, 4], b"two": [1.100000023841858, 2.200000047683716, 3.299999952316284, 4.400000095367432], b"three": [b"uno", b"dos", b"tres", b"quatro"]})
+        self.assertEqual(file["one/two/tree"].array("Int32").shape, (100,))
+        self.assertEqual(file["three/tree"].array("I32").shape, (100,))
+
+        file = uproot.open("tests/nesteddirs.root")
+
+        self.assertEqual(file["one/tree"].branchnames, [b"one", b"two", b"three"])
+        self.assertEqual(file["one/two/tree"].branchnames, [b"Int32", b"Int64", b"UInt32", b"UInt64", b"Float32", b"Float64", b"Str", b"ArrayInt32", b"ArrayInt64", b"ArrayUInt32", b"ArrayUInt64", b"ArrayFloat32", b"ArrayFloat64", b"N", b"SliceInt32", b"SliceInt64", b"SliceUInt32", b"SliceUInt64", b"SliceFloat32", b"SliceFloat64"])
+        self.assertEqual(file["three/tree"].branchnames, [b"evt"])
+
+        self.assertEqual(dict((name, array.tolist()) for name, array in file["one/tree;1"].arrays(["one", "two", "three"]).items()), {b"one": [1, 2, 3, 4], b"two": [1.100000023841858, 2.200000047683716, 3.299999952316284, 4.400000095367432], b"three": [b"uno", b"dos", b"tres", b"quatro"]})
+        self.assertEqual(file["one/two/tree;1"].array("Int32").shape, (100,))
+        self.assertEqual(file["three/tree;1"].array("I32").shape, (100,))
diff --git a/uproot/rootio.py b/uproot/rootio.py
@@ -80,8 +80,9 @@ class TFile(object):
     """Represents a ROOT file; use to extract objects.
 
         * `file.get(name, cycle=None)` to extract an object (aware of '/' and ';' notations).
-        * `file.ls()` for a `{name: classname}` dict of objects in the top directory.
-        * `file.compression` for a Compression(algo, level) namedtuple.
+        * `file.contents` is a `{name: classname}` dict of objects in the top directory.
+        * `file.allcontents` is a `{name: classname}` dict of all objects in the file.
+        * `file.compression` is a Compression(algo, level) namedtuple describing the compression.
         * `file.dir` is the top directory.
 
     `file[name]` is a synonym for `file.get(name)`.
@@ -134,10 +135,13 @@ def __len__(self):
     def __iter__(self):
         return iter(self.dir.keys)
 
-    def ls(self):
-        """Get a `{name: classname}` dict of objects in the top directory.
-        """
-        return self.dir.ls()
+    @property
+    def contents(self):
+        return self.dir.contents
+
+    @property
+    def allcontents(self):
+        return self.dir.allcontents
 
     def get(self, name, cycle=None):
         """Get an object from the file, interpreting '/' as subdirectories and ';' to delimit cycle number.
@@ -152,7 +156,8 @@ class TDirectory(object):
     """Represents a ROOT directory; use to extract objects.
 
         * `dir.get(name, cycle=None)` to extract an object (aware of '/' and ';' notations).
-        * `dir.ls()` for a `{name: classname}` dict of objects in this directory.
+        * `dir.contents` is a `{name: classname}` dict of objects in this directory.
+        * `dir.allcontents` is a `{name: classname}` dict of all objects under this directory.
         * `dir.keys` is the keys.
 
     `dir[name]` is a synonym for `dir.get(name)`.
@@ -188,10 +193,13 @@ def __len__(self):
     def __iter__(self):
         return iter(self.keys)
 
-    def ls(self):
-        """Get a `{name: classname}` dict of objects in this directory.
-        """
-        return self.keys.ls()
+    @property
+    def contents(self):
+        return self.keys.contents
+
+    @property
+    def allcontents(self):
+        return self.keys.allcontents
 
     def get(self, name, cycle=None):
         """Get an object from the directory, interpreting '/' as subdirectories and ';' to delimit cycle number.
@@ -209,7 +217,8 @@ class TKeys(object):
     """Represents a collection of keys.
 
         * `keys.get(name, cycle=None)` to extract an object (aware of ';' notation).
-        * `keys.ls()` for a `{name: classname}` dict.
+        * `keys.contents` is a `{name: classname}` dict of objects directly in this set of TKeys.
+        * `keys.allcontents` is a `{name: classname}` dict of all objects under this set of TKeys.
 
     `keys[name]` is a synonym for `keys.get(name)`.
 
@@ -240,10 +249,21 @@ def __len__(self):
     def __iter__(self):
         return iter(self.keys)
 
-    def ls(self):
-        """Get a `{name: classname}` dict.
+    @property
+    def contents(self):
+        return dict(("{0};{1}".format(x.name.decode("ascii"), x.cycle).encode("ascii"), x.classname) for x in self.keys)
+
+    @property
+    def allcontents(self):
+        """Get a `{name: classname}` dict of objects directly in this set of TKeys.
         """
-        return dict((x.name, x.classname) for x in self.keys)
+        out = {}
+        for name, classname in self.contents.items():
+            out[name] = classname
+            if classname == b"TDirectory":
+                for name2, classname2 in self.get(name).allcontents.items():
+                    out["{0}/{1}".format(name[:name.rindex(b";")].decode("ascii"), name2.decode("ascii")).encode("ascii")] = classname2
+        return out
 
     def get(self, name, cycle=None):
         """Get an object from the keys, interpreting ';' to delimit cycle number.

diff --git a/uproot/tree.py b/uproot/tree.py
@@ -247,13 +247,13 @@ def _normalizeselection(branchdtypes, allbranches):
                     if name in allbranches:
                         branch = allbranches[name]
                         if hasattr(branch, "dtype"):
-                            yield branch, dtype
+                            yield branch, branch.dtype
                         else:
                             raise ValueError("cannot produce an array from branch {0}".format(repr(name)))
                     else:
                         raise ValueError("cannot find branch {0}".format(repr(name)))
 
-    def iterator(self, entries, branchdtypes=lambda branch: branch.dtype, executor=None, outputtype=dict, reportentries=False):
+    def iterator(self, entries, branchdtypes=lambda branch: getattr(branch, "dtype", None), executor=None, outputtype=dict, reportentries=False):
         """Iterates over a fixed number of entries at a time.
 
         Instead of loading all entries from a tree with `tree.arrays()`, load a manageable number that will fit in memory at once and apply a continuous process to it. Example use:
@@ -344,7 +344,7 @@ def dobranch(branchdtypecache):
             else:
                 yield out
 
-    def arrays(self, branchdtypes=lambda branch: branch.dtype, executor=None, outputtype=dict, block=True):
+    def arrays(self, branchdtypes=lambda branch: getattr(branch, "dtype", None), executor=None, outputtype=dict, block=True):
         """Extracts whole branches into Numpy arrays.
 
         Individual branches from TTrees are typically small enough to fit into memory. If this is not your case, consider `tree.iterator(entries)` to load a given number of entries at a time.