Skip to content

Commit

Permalink
fix: working TList serialization (#763)
Browse files Browse the repository at this point in the history
* fix: initialize empty `TObject` members on `to_TObjString`

* add test for serialization of `TObjString`

* remove unused dependency on test

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add `tojson` method to `TObjString`

* add additional check to `TObjString` write test

* fix bad field in `TList` tojson conversion

* add inexpensive `assert` to `TList` serialization

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bad serialization of non-empty TList due to options (#763 (comment))

* add tests for TList serialization

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixed bad `__repr__` for `TObject`

* add serialization of `fUniqueID` to `TObject`

* add `empty` method to `TObject`

* remove redundant `TObject` member initialization

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update src/uproot/writing/identify.py

Co-authored-by: Jim Pivarski <jpivarski@users.noreply.github.com>

* moved `TList` serialization list to `serialize` method

* add helper serialization method `bytestring` as suggested in #763 (comment) by @agoose77

* keep `TList` `_options` as python `bytes` and update serialization to use the new `bytestring` helper

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* reset `serialization.py` to `main` branch status

* Revert "keep `TList` `_options` as python `bytes` and update serialization to use the new `bytestring` helper"

This reverts commit 8e6ad2e.

* Revert "[pre-commit.ci] auto fixes from pre-commit.com hooks"

This reverts commit 897972f.

# Conflicts:
#	src/uproot/serialization.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Revert "reset `serialization.py` to `main` branch status"

This reverts commit 9a500c1.

* Revert "Revert "keep `TList` `_options` as python `bytes` and update serialization to use the new `bytestring` helper""

This reverts commit 884ec4d.

* Update src/uproot/models/TObject.py

Co-authored-by: Angus Hollands <goosey15@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jim Pivarski <jpivarski@users.noreply.github.com>
Co-authored-by: Angus Hollands <goosey15@gmail.com>
  • Loading branch information
4 people authored Oct 28, 2022
1 parent 33777b1 commit cb8c776
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 16 deletions.
8 changes: 6 additions & 2 deletions src/uproot/models/TList.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def tojson(self):
"_typename": "TList",
"name": "TList",
"arr": [x.tojson() for x in self._data],
"opt": [],
"opt": ["" for _ in self._data],
}

writable = True
Expand All @@ -91,6 +91,10 @@ def _to_writable_postprocess(self, original):
self._options = original._options

def _serialize(self, out, header, name, tobject_flags):
assert (
self._members["fSize"] == len(self._data) == len(self._options)
), "Fatal error in TList serialization."

import uproot.writing._cascade

where = len(out)
Expand All @@ -102,7 +106,7 @@ def _serialize(self, out, header, name, tobject_flags):

for datum, option in zip(self._data, self._options):
uproot.serialization._serialize_object_any(out, datum, None)
out.append(option)
out.append(uproot.serialization.bytestring(option))

if header:
num_bytes = sum(len(x) for x in out[where:])
Expand Down
6 changes: 6 additions & 0 deletions src/uproot/models/TObjString.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ def fTitle(self):

writable = True

def tojson(self):
out = self._bases[0].tojson() # TObject
out["_typename"] = self.classname
out["fString"] = str(self)
return out

def _serialize(self, out, header, name, tobject_flags):
where = len(out)
for x in self._bases:
Expand Down
14 changes: 12 additions & 2 deletions src/uproot/models/TObject.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ def read_members(self, chunk, cursor, context, file):
writable = True

def _serialize(self, out, header, name, tobject_flags):
out.append(b"\x00\x01" + _tobject_format2.pack(0, tobject_flags))
out.append(
b"\x00\x01"
+ _tobject_format2.pack(self.member("@fUniqueID"), tobject_flags)
)

@classmethod
def strided_interpretation(
Expand Down Expand Up @@ -114,7 +117,7 @@ def awkward_form(cls, file, context):

def __repr__(self):
return "<TObject {} {} at 0x{:012x}>".format(
self._members.get("fUniqueID"), self._members.get("fBits"), id(self)
self.member("@fUniqueID"), self.member("@fBits"), id(self)
)

def tojson(self):
Expand All @@ -124,5 +127,12 @@ def tojson(self):
"fBits": self.member("@fBits"),
}

@classmethod
def empty(cls):
self = super().empty()
self._members["@fUniqueID"] = 0
self._members["@fBits"] = 0
return self


uproot.classes["TObject"] = Model_TObject
18 changes: 14 additions & 4 deletions src/uproot/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,22 @@ def string(data):
is preceded by a 1-byte length; otherwise, it is preceded by ``b'\xff'`` and a
4-byte length.
"""
bytestring = data.encode(errors="surrogateescape")
length = len(bytestring)
return bytestring(data.encode(errors="surrogateescape"))


def bytestring(data):
"""
Converts Python bytes into a length-prefixed bytestring, ready to be written to a file.
If the string's byte representation (UTF-8) has fewer than 255 bytes, it
is preceded by a 1-byte length; otherwise, it is preceded by ``b'\xff'`` and a
4-byte length.
"""
length = len(data)
if length < 255:
return struct.pack(">B%ds" % length, length, bytestring)
return struct.pack(">B%ds" % length, length, data)
else:
return struct.pack(">BI%ds" % length, 255, length, bytestring)
return struct.pack(">BI%ds" % length, 255, length, data)


def numbytes_version(num_bytes, version):
Expand Down
10 changes: 3 additions & 7 deletions src/uproot/writing/identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,12 +735,14 @@ def to_TObjString(string):
This function is for developers to create TObjString objects that can be
written to ROOT files, to implement conversion routines.
"""
tobject = uproot.models.TObject.Model_TObject.empty()

tobjstring = uproot.models.TObjString.Model_TObjString(str(string))
tobjstring._deeply_writable = True
tobjstring._cursor = None
tobjstring._parent = None
tobjstring._members = {}
tobjstring._bases = (uproot.models.TObject.Model_TObject(),)
tobjstring._bases = (tobject,)
tobjstring._num_bytes = len(string) + (1 if len(string) < 255 else 5) + 16
tobjstring._instance_version = 1
return tobjstring
Expand All @@ -761,8 +763,6 @@ def to_TList(data, name=""):
)

tobject = uproot.models.TObject.Model_TObject.empty()
tobject._members["@fUniqueID"] = 0
tobject._members["@fBits"] = 0

tlist = uproot.models.TList.Model_TList.empty()
tlist._bases.append(tobject)
Expand Down Expand Up @@ -874,8 +874,6 @@ def to_TAxis(
written to ROOT files, to implement conversion routines.
"""
tobject = uproot.models.TObject.Model_TObject.empty()
tobject._members["@fUniqueID"] = 0
tobject._members["@fBits"] = 0

tnamed = uproot.models.TNamed.Model_TNamed.empty()
tnamed._deeply_writable = True
Expand Down Expand Up @@ -1018,8 +1016,6 @@ def to_TH1x(
TH1C, TH1D, TH1F, TH1I, or TH1S depends on the dtype of the ``data`` array.
"""
tobject = uproot.models.TObject.Model_TObject.empty()
tobject._members["@fUniqueID"] = 0
tobject._members["@fBits"] = 0

tnamed = uproot.models.TNamed.Model_TNamed.empty()
tnamed._deeply_writable = True
Expand Down
20 changes: 19 additions & 1 deletion tests/test_0349-write-TObjString.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import os

import numpy as np
import pytest

import uproot
Expand Down Expand Up @@ -78,3 +77,22 @@ def test_update(tmp_path):
assert f6["subdir/wowie"] == "wowie"
assert f6["subdir/zowie"] == "zowie"
assert list(f6.file.streamers) == ["TObjString"]


def test_serialization(tmp_path):
filename = os.path.join(tmp_path, "whatever.root")

string = "hey"
tobjstring = uproot.writing.identify.to_TObjString(string)
assert (
tobjstring.tojson()["_typename"] == "TObjString"
) # https://github.com/scikit-hep/uproot5/issues/762
assert tobjstring.tojson()["fString"] == str(tobjstring)

with uproot.recreate(filename) as f1:
f1["first"] = tobjstring
f1["second"] = str(tobjstring) # also checks conversion to "str"

with uproot.open(filename) as f2:
assert f2["first"] == f2["second"]
assert str(f2["first"]) == string
64 changes: 64 additions & 0 deletions tests/test_0351-write-TList.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/main/LICENSE

import os

import pytest

import uproot
import uproot.writing


def test_write_empty(tmp_path):
filename = os.path.join(tmp_path, "whatever.root")

tlist = uproot.writing.identify.to_TList([])

with uproot.recreate(filename) as f:
f["test"] = tlist

with uproot.open(filename) as f2:
assert len(f2["test"]) == 0


def test_write_single_key(tmp_path):
filename = os.path.join(tmp_path, "whatever.root")

tlist = uproot.writing.identify.to_TList(
[uproot.writing.identify.to_TObjString("test string")]
)

with uproot.recreate(filename) as f:
f["test"] = tlist

with uproot.open(filename) as f2:
assert len(f2["test"]) == 1


def test_write_nested(tmp_path):
filename = os.path.join(tmp_path, "whatever.root")

tlist_child = uproot.writing.identify.to_TList(
[uproot.writing.identify.to_TObjString(s) for s in "this is a test".split()]
)

entries = [
uproot.writing.identify.to_TObjString("this string goes in the front"),
tlist_child,
uproot.writing.identify.to_TObjString("test string"),
]

tlist = uproot.writing.identify.to_TList(entries)

with uproot.recreate(filename) as f:
f["test"] = tlist

with uproot.open(filename) as f2:
parent_list = f2["test"]
assert len(parent_list) == 3
assert isinstance(parent_list[0], uproot.models.TObjString.Model_TObjString)
assert str(parent_list[0]) == "this string goes in the front"
assert str(parent_list[2]) == "test string"
child_list = parent_list[1]
assert isinstance(child_list, uproot.models.TList.Model_TList)
assert len(child_list) == 4
assert " ".join([str(s) for s in child_list]) == "this is a test"

0 comments on commit cb8c776

Please sign in to comment.