Skip to content
This repository has been archived by the owner on Jun 21, 2022. It is now read-only.

Commit

Permalink
solved the compression > 16 MB issue
Browse files Browse the repository at this point in the history
  • Loading branch information
jpivarski committed Dec 11, 2017
1 parent 34a5c0e commit 8b33061
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 48 deletions.
26 changes: 3 additions & 23 deletions uproot/interp/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,18 +74,9 @@ def toarray(self, array):
return asarray(self.fromdtype, array, self.fromdims)

def __repr__(self):
args = []

if self.fromdtype.byteorder == ">":
args.append(repr(str(self.fromdtype)))
else:
args.append(repr(self.fromdtype))

args = [repr(str(self.fromdtype))]
if self.todtype.newbyteorder(">") != self.fromdtype.newbyteorder(">"):
if self.todtype.byteorder == "=":
args.append(repr(str(self.todtype)))
else:
args.append(repr(self.todtype))
args.append(repr(str(self.todtype)))

if self.fromdims != ():
args.append(repr(self.fromdims))
Expand Down Expand Up @@ -174,18 +165,7 @@ def todims(self):
return self.toarray.shape[1:]

def __repr__(self):
args = []

if self.fromdtype.byteorder == ">":
args.append(repr(str(self.fromdtype)))
else:
args.append(repr(self.fromdtype))

if self.todtype.byteorder == "=":
args.append("<array dtype={0} at 0x{1:012x}>".format(repr(str(self.todtype)), id(self.todtype)))
else:
args.append("<array dtype={0} at 0x{1:012x}>".format(repr(self.todtype), id(self.todtype)))

args = [repr(str(self.fromdtype)), "<array dtype={0} at 0x{1:012x}>".format(repr(str(self.todtype)), id(self.todtype))]
return "asarray(" + ", ".join(args) + ")"

@property
Expand Down
69 changes: 45 additions & 24 deletions uproot/source/compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import struct

import numpy

import uproot.const
Expand Down Expand Up @@ -122,34 +124,53 @@ def parent(self):
def threadlocal(self):
return self

_header = struct.Struct("2sBBBBBBB")

def _prepare(self):
if self._uncompressed is None:
cursor = self._cursor.copied()
algo = self._compressed.data(cursor.index, cursor.index + 2).tostring()

if algo == b"ZL":
compression = self.compression.copy(uproot.const.kZLIB)
skip = 9 # https://github.com/root-project/root/blob/master/core/zip/src/RZip.cxx#L217

elif algo == b"XZ":
compression = self.compression.copy(uproot.const.kLZMA)
skip = 9 # https://github.com/root-project/root/blob/master/core/lzma/src/ZipLZMA.c#L81

elif algo == b"L4":
compression = self.compression.copy(uproot.const.kLZ4)
skip = 9 + 8 # https://github.com/root-project/root/blob/master/core/lz4/src/ZipLZ4.cxx#L38

elif algo == b"CS":
raise ValueError("unsupported compression algorithm: 'old' (according to ROOT comments, hasn't been used in 20+ years!)")

else:
raise ValueError("unrecognized compression algorithm: {0}".format(algo))

header = cursor.bytes(self._compressed, skip).tostring()
asstr = compression.decompress(self._compressed, cursor, self._compressedbytes - skip, self._uncompressedbytes)
if len(asstr) != self._uncompressedbytes:
raise ValueError("block with header {0} ({1}) decompressed to {2} bytes, but the object key says the decompressed size should be {3} bytes".format(repr(header), compression.algoname, len(asstr), self._uncompressedbytes))
self._uncompressed = numpy.frombuffer(asstr, dtype=numpy.uint8)
start = cursor.index
filled = 0
numblocks = 0
while cursor.index - start < self._compressedbytes:
# https://github.com/root-project/root/blob/master/core/zip/src/RZip.cxx#L217
# https://github.com/root-project/root/blob/master/core/lzma/src/ZipLZMA.c#L81
# https://github.com/root-project/root/blob/master/core/lz4/src/ZipLZ4.cxx#L38
algo, method, c1, c2, c3, u1, u2, u3 = cursor.fields(self._compressed, self._header)
compressedbytes = c1 + (c2 << 8) + (c3 << 16)
uncompressedbytes = u1 + (u2 << 8) + (u3 << 16)

if algo == b"ZL":
compression = self.compression.copy(uproot.const.kZLIB)
elif algo == b"XZ":
compression = self.compression.copy(uproot.const.kLZMA)
elif algo == b"L4":
compression = self.compression.copy(uproot.const.kLZ4)
cursor.skip(8) # FIXME: use this checksum!
compressedbytes -= 8
elif algo == b"CS":
raise ValueError("unsupported compression algorithm: 'old' (according to ROOT comments, hasn't been used in 20+ years!)")
else:
raise ValueError("unrecognized compression algorithm: {0}".format(algo))

asstr = compression.decompress(self._compressed, cursor, compressedbytes, uncompressedbytes)
numblocks += 1

if len(asstr) != uncompressedbytes:
raise ValueError("block with header {0} ({1}) decompressed to {2} bytes, but the object key says the decompressed size should be {3} bytes".format(repr(header), compression.algoname, len(asstr), self._uncompressedbytes))
if filled + uncompressedbytes > self._uncompressedbytes:
raise ValueError("uncompressed {0} bytes in {1} blocks so far, but expected only {2} bytes".format(filled + uncompressedbytes, numblocks, self._uncompressedbytes))

if filled == 0:
if uncompressedbytes == self._uncompressedbytes: # usual case: only one block
self._uncompressed = numpy.frombuffer(asstr, dtype=numpy.uint8)
return
else:
self._uncompressed = numpy.empty(self._uncompressedbytes, dtype=numpy.uint8)

self._uncompressed[filled : filled + uncompressedbytes] = numpy.frombuffer(asstr, dtype=numpy.uint8)
filled += uncompressedbytes

def size(self):
self._prepare()
Expand Down
2 changes: 1 addition & 1 deletion uproot/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

import re

__version__ = "2.5.7"
__version__ = "2.5.8"
version = __version__
version_info = tuple(re.split(r"[-\.]", __version__))

Expand Down

0 comments on commit 8b33061

Please sign in to comment.