Skip to content
This repository has been archived by the owner on Jun 21, 2022. It is now read-only.

Commit

Permalink
Merge pull request #453 from kreczko/kreczko-issue-447
Browse files Browse the repository at this point in the history
Fixing issue #447: uproot.rootio.TTree.arrays omitting variables
  • Loading branch information
jpivarski authored Jun 18, 2020
2 parents 484b626 + a81504c commit 634667f
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 6 deletions.
Binary file added tests/samples/issue447.root
Binary file not shown.
Binary file added tests/samples/issue447_recursive.root
Binary file not shown.
27 changes: 27 additions & 0 deletions tests/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,3 +425,30 @@ def test_issue448(self):
tree = f['Events']
assert len(tree.arrays(entrystop=0)) == 4179
assert len(tree.arrays('recoMuons_muons__RECO.*', entrystop=10)) == 93

@pytest.mark.parametrize("treename, branchtest", [
('l1CaloTowerEmuTree/L1CaloTowerTree', b'L1CaloTowerTree/L1CaloCluster/phi'),
('l1CaloTowerTree/L1CaloTowerTree', b'L1CaloTowerTree/L1CaloTower/et'),
])
def test_issue447_tree_arrays_omitting_variables(self, treename, branchtest):
with uproot.open("tests/samples/issue447.root") as f:
t1 = f[treename]
arrays = t1.arrays(recursive=b'/')
array_keys = arrays.keys()
n_array_vars = len(array_keys)
n_tree_vars = sum([len(t1[k].keys()) for k in t1.keys()])
assert n_tree_vars == n_array_vars
assert branchtest in array_keys

def test_issue447_recursive_provenance(self):
expectedKeys = [
'tree/b1',
'tree/b1/b2',
'tree/b1/b2/b3',
'tree/b1/b2/b3/b4',
]
expectedKeys = sorted([k.encode(encoding='UTF-8') for k in expectedKeys])
with uproot.open('tests/samples/issue447_recursive.root') as f:
t1 = f['tree']
arrays = t1.arrays(recursive=b'/')
assert sorted(list(arrays.keys())) == expectedKeys
36 changes: 30 additions & 6 deletions uproot/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,13 +286,23 @@ def _attachstreamer(self, branch, streamer, streamerinfosmap, isTClonesArray):

self._attachstreamer(subbranch, submembers.get(name, None), streamerinfosmap, isTClonesArray)

def _addprovenance(self, branch, context, parents = None):
if parents is None:
parents = [context.treename]
if len(branch._provenance) == 0:
branch._provenance = parents
for x in branch.itervalues():
x._provenance = parents + [branch.name]
self._addprovenance(x, context, x._provenance)

def _postprocess(self, source, cursor, context, parent):
self._context = context
self._context.treename = self.name
self._context.speedbump = True

for branch in self._fBranches:
self._attachstreamer(branch, context.streamerinfosmap.get(getattr(branch, "_fClassName", None), None), context.streamerinfosmap, False)
self._addprovenance(branch, context)

self._branchlookup = {}
self._fill_branchlookup(self._branchlookup)
Expand Down Expand Up @@ -511,7 +521,7 @@ def array(self, branch, interpretation=None, entrystart=None, entrystop=None, fl
raise ValueError("list of branch names or glob/regex matches more than one branch; use TTree.arrays (plural)")
return tbranch.array(interpretation=interpretation, entrystart=entrystart, entrystop=entrystop, flatten=flatten, awkwardlib=awkwardlib, cache=cache, basketcache=basketcache, keycache=keycache, executor=executor, blocking=blocking)

def arrays(self, branches=None, outputtype=dict, namedecode=None, entrystart=None, entrystop=None, flatten=False, flatname=None, awkwardlib=None, cache=None, basketcache=None, keycache=None, executor=None, blocking=True):
def arrays(self, branches=None, outputtype=dict, namedecode=None, entrystart=None, entrystop=None, flatten=False, flatname=None, awkwardlib=None, cache=None, basketcache=None, keycache=None, executor=None, blocking=True, recursive=True):
awkward = _normalize_awkwardlib(awkwardlib)
branches = list(self._normalize_branches(branches, awkward))
for branch, interpretation in branches:
Expand All @@ -526,7 +536,20 @@ def arrays(self, branches=None, outputtype=dict, namedecode=None, entrystart=Non
entrystart, entrystop = _normalize_entrystartstop(self.numentries, entrystart, entrystop)

# start the job of filling the arrays
futures = [(branch.name if namedecode is None else branch.name.decode(namedecode), interpretation, branch.array(interpretation=interpretation, entrystart=entrystart, entrystop=entrystop, flatten=(flatten and not ispandas), awkwardlib=awkward, cache=cache, basketcache=basketcache, keycache=keycache, executor=executor, blocking=False)) for branch, interpretation in branches]
futures = None
if recursive and recursive is not True:
def wrap_name(branch, namedecode):
if len(branch._provenance) != 0:
if namedecode is None:
return recursive.join(branch._provenance + [branch.name])
else:
return recursive.join([p.decode(namedecode) for p in (branch._provenance + [branch.name])])
else:
return branch.name if namedecode is None else branch.name.decode(namedecode)

futures = [(wrap_name(branch, namedecode), interpretation, branch.array(interpretation=interpretation, entrystart=entrystart, entrystop=entrystop, flatten=(flatten and not ispandas), awkwardlib=awkward, cache=cache, basketcache=basketcache, keycache=keycache, executor=executor, blocking=False)) for branch, interpretation in branches]
else:
futures = [(branch.name if namedecode is None else branch.name.decode(namedecode), interpretation, branch.array(interpretation=interpretation, entrystart=entrystart, entrystop=entrystop, flatten=(flatten and not ispandas), awkwardlib=awkward, cache=cache, basketcache=basketcache, keycache=keycache, executor=executor, blocking=False)) for branch, interpretation in branches]

# make functions that wait for the filling job to be done and return the right outputtype
if outputtype == namedtuple:
Expand Down Expand Up @@ -634,7 +657,7 @@ def _normalize_entrysteps(self, entrysteps, branches, entrystart, entrystop, key
starts = numpy.arange(entrystart, effectivestop, entrystepsize)
stops = numpy.append(starts[1:], effectivestop)
return zip(starts, stops)

else:
try:
iter(entrysteps)
Expand Down Expand Up @@ -662,7 +685,7 @@ def iterate(self, branches=None, entrysteps=None, outputtype=dict, namedecode=No

# for the case of outputtype == pandas.DataFrame, do some preparation to fill DataFrames efficiently
ispandas = getattr(outputtype, "__name__", None) == "DataFrame" and getattr(outputtype, "__module__", None) == "pandas.core.frame"

def evaluate(branch, interpretation, future, past, cachekey, pythonize):
if future is None:
return past
Expand Down Expand Up @@ -888,6 +911,7 @@ def _postprocess(self, source, cursor, context, parent):
self._context = context
self._streamer = None
self._interpretation = None
self._provenance = []

self._numgoodbaskets = 0
for i, x in enumerate(self._fBasketSeek):
Expand Down Expand Up @@ -1848,7 +1872,7 @@ def __getstate__(self):
"xrootdsource": self.xrootdsource,
"httpsource": self.httpsource,
"options": self.options}

def __setstate__(self, state):
self.paths = state["paths"]
self.treepath = state["treepath"]
Expand Down Expand Up @@ -1916,7 +1940,7 @@ def __setstate__(self, state):

def __call__(self, branch, entrystart, entrystop):
return self.tree[branch].array(interpretation=self.interpretation[branch], entrystart=entrystart, entrystop=entrystop, flatten=self.flatten, awkwardlib=self.awkwardlib, cache=None, basketcache=self.basketcache, keycache=self.keycache, executor=self.executor)

class _LazyBranch(object):
def __init__(self, path, treepath, branchname, branch, interpretation, flatten, awkwardlib, basketcache, keycache, executor):
self.path = path
Expand Down

0 comments on commit 634667f

Please sign in to comment.