Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: working TList serialization #763

Merged
merged 34 commits into from
Oct 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
5c90b48
fix: initialize empty `TObject` members on `to_TObjString`
lobis Oct 24, 2022
a3d9c6a
add test for serialization of `TObjString`
lobis Oct 24, 2022
1419d06
remove unused dependency on test
lobis Oct 24, 2022
149a27b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2022
8209f72
add `tojson` method to `TObjString`
lobis Oct 24, 2022
a30e8e1
add additional check to `TObjString` write test
lobis Oct 24, 2022
4c828c8
fix bad field in `TList` tojson conversion
lobis Oct 24, 2022
5c95c4b
add inexpensive `assert` to `TList` serialization
lobis Oct 24, 2022
163c8a2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2022
4da653f
fix bad serialization of non-empty TList due to options (https://gith…
lobis Oct 25, 2022
db35954
add tests for TList serialization
lobis Oct 25, 2022
5fff9dd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 25, 2022
bc95096
fixed bad `__repr__` for `TObject`
lobis Oct 25, 2022
6697fa0
add serialization of `fUniqueID` to `TObject`
lobis Oct 25, 2022
94e93a8
add `empty` method to `TObject`
lobis Oct 25, 2022
46424e1
remove redundant `TObject` member initialization
lobis Oct 25, 2022
50c94a7
Merge branch 'to_TObjString-fix' of github.com:lobis/uproot5 into to_…
lobis Oct 25, 2022
6b1f188
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 25, 2022
303514d
Update src/uproot/writing/identify.py
lobis Oct 25, 2022
d6e1543
moved `TList` serialization list to `serialize` method
lobis Oct 25, 2022
0621c67
add helper serialization method `bytestring` as suggested in https://…
lobis Oct 25, 2022
8e6ad2e
keep `TList` `_options` as python `bytes` and update serialization to…
lobis Oct 25, 2022
897972f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 25, 2022
9a500c1
reset `serialization.py` to `main` branch status
lobis Oct 26, 2022
884ec4d
Revert "keep `TList` `_options` as python `bytes` and update serializ…
lobis Oct 26, 2022
3c8b85b
Revert "[pre-commit.ci] auto fixes from pre-commit.com hooks"
lobis Oct 26, 2022
45720ff
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 26, 2022
6dde481
Revert "reset `serialization.py` to `main` branch status"
lobis Oct 26, 2022
3eac7ca
Revert "Revert "keep `TList` `_options` as python `bytes` and update …
lobis Oct 26, 2022
b8d5207
Merge branch 'main' into to_TObjString-fix
lobis Oct 26, 2022
ee0b5ba
Merge branch 'main' into to_TObjString-fix
lobis Oct 26, 2022
683c6ff
Merge branch 'main' into to_TObjString-fix
lobis Oct 27, 2022
cb6deb7
Update src/uproot/models/TObject.py
lobis Oct 27, 2022
2d69174
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 27, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/uproot/models/TList.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def tojson(self):
"_typename": "TList",
"name": "TList",
"arr": [x.tojson() for x in self._data],
"opt": [],
"opt": ["" for _ in self._data],
}

writable = True
Expand All @@ -91,6 +91,10 @@ def _to_writable_postprocess(self, original):
self._options = original._options

def _serialize(self, out, header, name, tobject_flags):
assert (
self._members["fSize"] == len(self._data) == len(self._options)
), "Fatal error in TList serialization."

import uproot.writing._cascade

where = len(out)
Expand All @@ -102,7 +106,7 @@ def _serialize(self, out, header, name, tobject_flags):

for datum, option in zip(self._data, self._options):
uproot.serialization._serialize_object_any(out, datum, None)
out.append(option)
out.append(uproot.serialization.bytestring(option))

if header:
num_bytes = sum(len(x) for x in out[where:])
Expand Down
6 changes: 6 additions & 0 deletions src/uproot/models/TObjString.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ def fTitle(self):

writable = True

def tojson(self):
out = self._bases[0].tojson() # TObject
out["_typename"] = self.classname
out["fString"] = str(self)
return out

def _serialize(self, out, header, name, tobject_flags):
where = len(out)
for x in self._bases:
Expand Down
14 changes: 12 additions & 2 deletions src/uproot/models/TObject.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ def read_members(self, chunk, cursor, context, file):
writable = True

def _serialize(self, out, header, name, tobject_flags):
out.append(b"\x00\x01" + _tobject_format2.pack(0, tobject_flags))
out.append(
b"\x00\x01"
+ _tobject_format2.pack(self.member("@fUniqueID"), tobject_flags)
)

@classmethod
def strided_interpretation(
Expand Down Expand Up @@ -114,7 +117,7 @@ def awkward_form(cls, file, context):

def __repr__(self):
return "<TObject {} {} at 0x{:012x}>".format(
self._members.get("fUniqueID"), self._members.get("fBits"), id(self)
self.member("@fUniqueID"), self.member("@fBits"), id(self)
)

def tojson(self):
Expand All @@ -124,5 +127,12 @@ def tojson(self):
"fBits": self.member("@fBits"),
}

@classmethod
def empty(cls):
self = super().empty()
self._members["@fUniqueID"] = 0
self._members["@fBits"] = 0
return self


uproot.classes["TObject"] = Model_TObject
18 changes: 14 additions & 4 deletions src/uproot/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,22 @@ def string(data):
is preceded by a 1-byte length; otherwise, it is preceded by ``b'\xff'`` and a
4-byte length.
"""
bytestring = data.encode(errors="surrogateescape")
length = len(bytestring)
return bytestring(data.encode(errors="surrogateescape"))


def bytestring(data):
"""
Converts Python bytes into a length-prefixed bytestring, ready to be written to a file.

If the string's byte representation (UTF-8) has fewer than 255 bytes, it
is preceded by a 1-byte length; otherwise, it is preceded by ``b'\xff'`` and a
4-byte length.
"""
length = len(data)
if length < 255:
return struct.pack(">B%ds" % length, length, bytestring)
return struct.pack(">B%ds" % length, length, data)
else:
return struct.pack(">BI%ds" % length, 255, length, bytestring)
return struct.pack(">BI%ds" % length, 255, length, data)


def numbytes_version(num_bytes, version):
Expand Down
10 changes: 3 additions & 7 deletions src/uproot/writing/identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,12 +735,14 @@ def to_TObjString(string):
This function is for developers to create TObjString objects that can be
written to ROOT files, to implement conversion routines.
"""
tobject = uproot.models.TObject.Model_TObject.empty()

tobjstring = uproot.models.TObjString.Model_TObjString(str(string))
tobjstring._deeply_writable = True
tobjstring._cursor = None
tobjstring._parent = None
tobjstring._members = {}
tobjstring._bases = (uproot.models.TObject.Model_TObject(),)
tobjstring._bases = (tobject,)
tobjstring._num_bytes = len(string) + (1 if len(string) < 255 else 5) + 16
tobjstring._instance_version = 1
return tobjstring
Expand All @@ -761,8 +763,6 @@ def to_TList(data, name=""):
)

tobject = uproot.models.TObject.Model_TObject.empty()
tobject._members["@fUniqueID"] = 0
tobject._members["@fBits"] = 0

tlist = uproot.models.TList.Model_TList.empty()
tlist._bases.append(tobject)
Expand Down Expand Up @@ -874,8 +874,6 @@ def to_TAxis(
written to ROOT files, to implement conversion routines.
"""
tobject = uproot.models.TObject.Model_TObject.empty()
tobject._members["@fUniqueID"] = 0
tobject._members["@fBits"] = 0

tnamed = uproot.models.TNamed.Model_TNamed.empty()
tnamed._deeply_writable = True
Expand Down Expand Up @@ -1018,8 +1016,6 @@ def to_TH1x(
TH1C, TH1D, TH1F, TH1I, or TH1S depends on the dtype of the ``data`` array.
"""
tobject = uproot.models.TObject.Model_TObject.empty()
tobject._members["@fUniqueID"] = 0
tobject._members["@fBits"] = 0

tnamed = uproot.models.TNamed.Model_TNamed.empty()
tnamed._deeply_writable = True
Expand Down
20 changes: 19 additions & 1 deletion tests/test_0349-write-TObjString.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import os

import numpy as np
import pytest

import uproot
Expand Down Expand Up @@ -78,3 +77,22 @@ def test_update(tmp_path):
assert f6["subdir/wowie"] == "wowie"
assert f6["subdir/zowie"] == "zowie"
assert list(f6.file.streamers) == ["TObjString"]


def test_serialization(tmp_path):
filename = os.path.join(tmp_path, "whatever.root")

string = "hey"
tobjstring = uproot.writing.identify.to_TObjString(string)
assert (
tobjstring.tojson()["_typename"] == "TObjString"
) # https://github.com/scikit-hep/uproot5/issues/762
assert tobjstring.tojson()["fString"] == str(tobjstring)

with uproot.recreate(filename) as f1:
f1["first"] = tobjstring
f1["second"] = str(tobjstring) # also checks conversion to "str"

with uproot.open(filename) as f2:
assert f2["first"] == f2["second"]
assert str(f2["first"]) == string
64 changes: 64 additions & 0 deletions tests/test_0351-write-TList.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/main/LICENSE

import os

import pytest

import uproot
import uproot.writing


def test_write_empty(tmp_path):
filename = os.path.join(tmp_path, "whatever.root")

tlist = uproot.writing.identify.to_TList([])

with uproot.recreate(filename) as f:
f["test"] = tlist

with uproot.open(filename) as f2:
assert len(f2["test"]) == 0


def test_write_single_key(tmp_path):
filename = os.path.join(tmp_path, "whatever.root")

tlist = uproot.writing.identify.to_TList(
[uproot.writing.identify.to_TObjString("test string")]
)

with uproot.recreate(filename) as f:
f["test"] = tlist

with uproot.open(filename) as f2:
assert len(f2["test"]) == 1


def test_write_nested(tmp_path):
filename = os.path.join(tmp_path, "whatever.root")

tlist_child = uproot.writing.identify.to_TList(
[uproot.writing.identify.to_TObjString(s) for s in "this is a test".split()]
)

entries = [
uproot.writing.identify.to_TObjString("this string goes in the front"),
tlist_child,
uproot.writing.identify.to_TObjString("test string"),
]

tlist = uproot.writing.identify.to_TList(entries)

with uproot.recreate(filename) as f:
f["test"] = tlist

with uproot.open(filename) as f2:
parent_list = f2["test"]
assert len(parent_list) == 3
assert isinstance(parent_list[0], uproot.models.TObjString.Model_TObjString)
assert str(parent_list[0]) == "this string goes in the front"
assert str(parent_list[2]) == "test string"
child_list = parent_list[1]
assert isinstance(child_list, uproot.models.TList.Model_TList)
assert len(child_list) == 4
assert " ".join([str(s) for s in child_list]) == "this is a test"