Skip to content
This repository has been archived by the owner on Jun 21, 2022. It is now read-only.

Commit

Permalink
Fixes #465 by explicitly trimming STL vector size (some ROOT files ha…
Browse files Browse the repository at this point in the history
…ve unused/padding/junk in each event after the vector's serialized data).
  • Loading branch information
jpivarski committed Mar 17, 2020
1 parent 3f60b2e commit a76458e
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 29 deletions.
50 changes: 25 additions & 25 deletions uproot/interp/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def transform(node, tofloat=True):
if branch._isTClonesArray:
return asgenobj(SimpleArray(STLVector(asdtype(fromdtype, fromdtype))), branch._context, 6)
else:
return asjagged(ascontent, skipbytes=10)
return asjagged(ascontent, skipbytes=10, sizeat=6)

except _NotNumerical:
if branch._vecstreamer is not None:
Expand All @@ -348,37 +348,37 @@ def transform(node, tofloat=True):
return asgenobj(STLVector(streamerClass), branch._context, 6)
else:
if streamerClass._methods is None:
return asjagged(astable(asdtype(recarray)), skipbytes=10)
return asjagged(astable(asdtype(recarray)), skipbytes=10, sizeat=6)
else:
return asjagged(asobj(astable(asdtype(recarray)), streamerClass._methods), skipbytes=10)
return asjagged(asobj(astable(asdtype(recarray)), streamerClass._methods), skipbytes=10, sizeat=6)

if hasattr(branch._streamer, "_fTypeName"):
m = re.match(b"bitset<([1-9][0-9]*)>", branch._streamer._fTypeName)
if m is not None:
return asjagged(asstlbitset(int(m.group(1))), skipbytes=6)

if getattr(branch._streamer, "_fTypeName", None) == b"vector<bool>" or getattr(branch._streamer, "_fTypeName", None) == b"vector<Bool_t>":
return asjagged(asdtype(awkward.numpy.bool_), skipbytes=10)
return asjagged(asdtype(awkward.numpy.bool_), skipbytes=10, sizeat=6)
elif getattr(branch._streamer, "_fTypeName", None) == b"vector<char>" or getattr(branch._streamer, "_fTypeName", None) == b"vector<Char_t>":
return asjagged(asdtype("i1"), skipbytes=10)
return asjagged(asdtype("i1"), skipbytes=10, sizeat=6)
elif getattr(branch._streamer, "_fTypeName", None) == b"vector<unsigned char>" or getattr(branch._streamer, "_fTypeName", None) == b"vector<UChar_t>" or getattr(branch._streamer, "_fTypeName", None) == b"vector<Byte_t>":
return asjagged(asdtype("u1"), skipbytes=10)
return asjagged(asdtype("u1"), skipbytes=10, sizeat=6)
elif getattr(branch._streamer, "_fTypeName", None) == b"vector<short>" or getattr(branch._streamer, "_fTypeName", None) == b"vector<Short_t>":
return asjagged(asdtype("i2"), skipbytes=10)
return asjagged(asdtype("i2"), skipbytes=10, sizeat=6)
elif getattr(branch._streamer, "_fTypeName", None) == b"vector<unsigned short>" or getattr(branch._streamer, "_fTypeName", None) == b"vector<UShort_t>":
return asjagged(asdtype("u2"), skipbytes=10)
return asjagged(asdtype("u2"), skipbytes=10, sizeat=6)
elif getattr(branch._streamer, "_fTypeName", None) == b"vector<int>" or getattr(branch._streamer, "_fTypeName", None) == b"vector<Int_t>":
return asjagged(asdtype("i4"), skipbytes=10)
return asjagged(asdtype("i4"), skipbytes=10, sizeat=6)
elif getattr(branch._streamer, "_fTypeName", None) == b"vector<unsigned int>" or getattr(branch._streamer, "_fTypeName", None) == b"vector<UInt_t>":
return asjagged(asdtype("u4"), skipbytes=10)
return asjagged(asdtype("u4"), skipbytes=10, sizeat=6)
elif getattr(branch._streamer, "_fTypeName", None) == b"vector<long>" or getattr(branch._streamer, "_fTypeName", None) == b"vector<Long_t>":
return asjagged(asdtype("i8"), skipbytes=10)
return asjagged(asdtype("i8"), skipbytes=10, sizeat=6)
elif getattr(branch._streamer, "_fTypeName", None) == b"vector<unsigned long>" or getattr(branch._streamer, "_fTypeName", None) == b"vector<ULong64_t>":
return asjagged(asdtype("u8"), skipbytes=10)
return asjagged(asdtype("u8"), skipbytes=10, sizeat=6)
elif getattr(branch._streamer, "_fTypeName", None) == b"vector<float>" or getattr(branch._streamer, "_fTypeName", None) == b"vector<Float_t>":
return asjagged(asdtype("f4"), skipbytes=10)
return asjagged(asdtype("f4"), skipbytes=10, sizeat=6)
elif getattr(branch._streamer, "_fTypeName", None) == b"vector<double>" or getattr(branch._streamer, "_fTypeName", None) == b"vector<Double_t>":
return asjagged(asdtype("f8"), skipbytes=10)
return asjagged(asdtype("f8"), skipbytes=10, sizeat=6)
elif getattr(branch._streamer, "_fTypeName", None) == b"vector<string>":
return asgenobj(STLVector(STLString(awkward)), branch._context, 6)
else:
Expand Down Expand Up @@ -445,27 +445,27 @@ def transform(node, tofloat=True):
return asstring(skipbytes=1)

if branch._fClassName == b"vector<bool>" or branch._fClassName == b"vector<Bool_t>":
return asjagged(asdtype(awkward.numpy.bool_), skipbytes=10)
return asjagged(asdtype(awkward.numpy.bool_), skipbytes=10, sizeat=6)
elif branch._fClassName == b"vector<char>" or branch._fClassName == b"vector<Char_t>":
return asjagged(asdtype("i1"), skipbytes=10)
return asjagged(asdtype("i1"), skipbytes=10, sizeat=6)
elif branch._fClassName == b"vector<unsigned char>" or branch._fClassName == b"vector<UChar_t>" or branch._fClassName == b"vector<Byte_t>":
return asjagged(asdtype("u1"), skipbytes=10)
return asjagged(asdtype("u1"), skipbytes=10, sizeat=6)
elif branch._fClassName == b"vector<short>" or branch._fClassName == b"vector<Short_t>":
return asjagged(asdtype("i2"), skipbytes=10)
return asjagged(asdtype("i2"), skipbytes=10, sizeat=6)
elif branch._fClassName == b"vector<unsigned short>" or branch._fClassName == b"vector<UShort_t>":
return asjagged(asdtype("u2"), skipbytes=10)
return asjagged(asdtype("u2"), skipbytes=10, sizeat=6)
elif branch._fClassName == b"vector<int>" or branch._fClassName == b"vector<Int_t>":
return asjagged(asdtype("i4"), skipbytes=10)
return asjagged(asdtype("i4"), skipbytes=10, sizeat=6)
elif branch._fClassName == b"vector<unsigned int>" or branch._fClassName == b"vector<UInt_t>":
return asjagged(asdtype("u4"), skipbytes=10)
return asjagged(asdtype("u4"), skipbytes=10, sizeat=6)
elif branch._fClassName == b"vector<long>" or branch._fClassName == b"vector<Long_t>":
return asjagged(asdtype("i8"), skipbytes=10)
return asjagged(asdtype("i8"), skipbytes=10, sizeat=6)
elif branch._fClassName == b"vector<unsigned long>" or branch._fClassName == b"vector<ULong64_t>":
return asjagged(asdtype("u8"), skipbytes=10)
return asjagged(asdtype("u8"), skipbytes=10, sizeat=6)
elif branch._fClassName == b"vector<float>" or branch._fClassName == b"vector<Float_t>":
return asjagged(asdtype("f4"), skipbytes=10)
return asjagged(asdtype("f4"), skipbytes=10, sizeat=6)
elif branch._fClassName == b"vector<double>" or branch._fClassName == b"vector<Double_t>":
return asjagged(asdtype("f8"), skipbytes=10)
return asjagged(asdtype("f8"), skipbytes=10, sizeat=6)
elif branch._fClassName == b"vector<string>":
return asgenobj(STLVector(STLString(awkward)), branch._context, 6)

Expand Down
24 changes: 21 additions & 3 deletions uproot/interp/jagged.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,13 @@ class asjagged(uproot.interp.interp.Interpretation):
# makes __doc__ attribute mutable before Python 3.3
__metaclass__ = type.__new__(type, "type", (uproot.interp.interp.Interpretation.__metaclass__,), {})

def __init__(self, content, skipbytes=0):
def __init__(self, content, skipbytes=0, sizeat=None):
self.content = content
self.skipbytes = skipbytes
self.sizeat = sizeat

def __repr__(self):
return "asjagged({0}{1})".format(repr(self.content), "" if self.skipbytes == 0 else ", {0}".format(self.skipbytes))
return "asjagged({0}{1}{2})".format(repr(self.content), "" if self.skipbytes == 0 else ", {0}".format(self.skipbytes), "" if self.sizeat == 0 else ", {0}".format(self.sizeat))

def to(self, todtype=None, todims=None, skipbytes=None):
if skipbytes is None:
Expand Down Expand Up @@ -75,6 +76,16 @@ def fromroot(self, data, byteoffsets, local_entrystart, local_entrystop, keylen)
return self.awkward.JaggedArray(starts, stops, content)

else:
if self.sizeat is not None:
sizeat_bytestarts = byteoffsets[local_entrystart : local_entrystop] + self.sizeat
sizeat_good_bytestarts = sizeat_bytestarts[sizeat_bytestarts + 4 <= len(data)]
sizeat_mask = self.awkward.numpy.zeros(len(data), dtype=self.awkward.numpy.int8)
sizeat_mask[sizeat_good_bytestarts + 0] = 1
sizeat_mask[sizeat_good_bytestarts + 1] = 1
sizeat_mask[sizeat_good_bytestarts + 2] = 1
sizeat_mask[sizeat_good_bytestarts + 3] = 1
sizeat = data[sizeat_mask.view(self.awkward.numpy.bool_)].view(">i4")

bytestarts = byteoffsets[local_entrystart : local_entrystop ] + self.skipbytes
bytestops = byteoffsets[local_entrystart + 1 : local_entrystop + 1]

Expand Down Expand Up @@ -106,7 +117,14 @@ def fromroot(self, data, byteoffsets, local_entrystart, local_entrystop, keylen)
offsets[0] = 0
self.awkward.numpy.cumsum(counts, out=offsets[1:])

return self.awkward.JaggedArray(offsets[:-1], offsets[1:], content)
starts = offsets[:-1]
if self.sizeat is not None:
stops = starts + sizeat
if not (stops == offsets[1:]).all():
return self.awkward.JaggedArray(starts, stops, content).compact()

stops = offsets[1:]
return self.awkward.JaggedArray(starts, stops, content)

def destination(self, numitems, numentries):
content = self.content.destination(numitems, numentries)
Expand Down
2 changes: 1 addition & 1 deletion uproot/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import re

__version__ = "3.11.3"
__version__ = "3.11.4"
version = __version__
version_info = tuple(re.split(r"[-\.]", __version__))

Expand Down

0 comments on commit a76458e

Please sign in to comment.