From 5c90b481ace60ca803bf3e4498dd3f1e1a1f5130 Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Mon, 24 Oct 2022 14:11:13 +0200 Subject: [PATCH 01/30] fix: initialize empty `TObject` members on `to_TObjString` --- src/uproot/writing/identify.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/uproot/writing/identify.py b/src/uproot/writing/identify.py index 837edc022..322faf6f2 100644 --- a/src/uproot/writing/identify.py +++ b/src/uproot/writing/identify.py @@ -735,12 +735,16 @@ def to_TObjString(string): This function is for developers to create TObjString objects that can be written to ROOT files, to implement conversion routines. """ + tobject = uproot.models.TObject.Model_TObject.empty() + tobject._members["@fUniqueID"] = 0 + tobject._members["@fBits"] = 0 + tobjstring = uproot.models.TObjString.Model_TObjString(str(string)) tobjstring._deeply_writable = True tobjstring._cursor = None tobjstring._parent = None tobjstring._members = {} - tobjstring._bases = (uproot.models.TObject.Model_TObject(),) + tobjstring._bases = [tobject] tobjstring._num_bytes = len(string) + (1 if len(string) < 255 else 5) + 16 tobjstring._instance_version = 1 return tobjstring From a3d9c6a6b9b2a4f834ed40ee0f9dcf9dfccb0fbd Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Mon, 24 Oct 2022 16:33:38 +0200 Subject: [PATCH 02/30] add test for serialization of `TObjString` --- tests/test_0349-write-TObjString.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_0349-write-TObjString.py b/tests/test_0349-write-TObjString.py index 0f04eaef5..0d32aecb5 100644 --- a/tests/test_0349-write-TObjString.py +++ b/tests/test_0349-write-TObjString.py @@ -78,3 +78,19 @@ def test_update(tmp_path): assert f6["subdir/wowie"] == "wowie" assert f6["subdir/zowie"] == "zowie" assert list(f6.file.streamers) == ["TObjString"] + + +def test_serialization(tmp_path): + filename = os.path.join(tmp_path, "whatever.root") + + string = "hey" + tobjstring = uproot.writing.identify.to_TObjString(string) + assert tobjstring.tojson()["_typename"] == "TObjString" # https://github.com/scikit-hep/uproot5/issues/762 + + with uproot.recreate(filename) as f1: + f1["first"] = tobjstring + f1["second"] = str(tobjstring) # also checks conversion to "str" + + with uproot.open(filename) as f2: + assert f2["first"] == f2["second"] + assert str(f2["first"]) == string From 1419d06bdd468de9e668bc340fbcd61f712b0125 Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Mon, 24 Oct 2022 16:34:12 +0200 Subject: [PATCH 03/30] remove unused dependency on test --- tests/test_0349-write-TObjString.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_0349-write-TObjString.py b/tests/test_0349-write-TObjString.py index 0d32aecb5..097c91738 100644 --- a/tests/test_0349-write-TObjString.py +++ b/tests/test_0349-write-TObjString.py @@ -2,7 +2,6 @@ import os -import numpy as np import pytest import uproot From 149a27b1d536a77629a59405c9797d32e8265e97 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Oct 2022 14:35:10 +0000 Subject: [PATCH 04/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_0349-write-TObjString.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_0349-write-TObjString.py b/tests/test_0349-write-TObjString.py index 097c91738..d036c194e 100644 --- a/tests/test_0349-write-TObjString.py +++ b/tests/test_0349-write-TObjString.py @@ -84,7 +84,9 @@ def test_serialization(tmp_path): string = "hey" tobjstring = uproot.writing.identify.to_TObjString(string) - assert tobjstring.tojson()["_typename"] == "TObjString" # https://github.com/scikit-hep/uproot5/issues/762 + assert ( + tobjstring.tojson()["_typename"] == "TObjString" + ) # https://github.com/scikit-hep/uproot5/issues/762 with uproot.recreate(filename) as f1: f1["first"] = tobjstring From 8209f7237f377dfd8a15932662a1c396d1abce42 Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Mon, 24 Oct 2022 20:11:31 +0200 Subject: [PATCH 05/30] add `tojson` method to `TObjString` --- src/uproot/models/TObjString.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/uproot/models/TObjString.py b/src/uproot/models/TObjString.py index 544dc7faf..f51aaede7 100644 --- a/src/uproot/models/TObjString.py +++ b/src/uproot/models/TObjString.py @@ -64,6 +64,12 @@ def fTitle(self): writable = True + def tojson(self): + out = self._bases[0].tojson() # TObject + out["_typename"] = self.classname + out["fString"] = str(self) + return out + def _serialize(self, out, header, name, tobject_flags): where = len(out) for x in self._bases: From a30e8e1bcff10626a2db76c5814e16a54e74a17c Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Mon, 24 Oct 2022 20:53:20 +0200 Subject: [PATCH 06/30] add additional check to `TObjString` write test --- tests/test_0349-write-TObjString.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_0349-write-TObjString.py b/tests/test_0349-write-TObjString.py index d036c194e..b1ee83651 100644 --- a/tests/test_0349-write-TObjString.py +++ b/tests/test_0349-write-TObjString.py @@ -87,6 +87,7 @@ def test_serialization(tmp_path): assert ( tobjstring.tojson()["_typename"] == "TObjString" ) # https://github.com/scikit-hep/uproot5/issues/762 + assert tobjstring.tojson()["fString"] == str(tobjstring) with uproot.recreate(filename) as f1: f1["first"] = tobjstring From 4c828c80ffc4fccb7b9da88dd67deed84aa71e5f Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Mon, 24 Oct 2022 20:55:16 +0200 Subject: [PATCH 07/30] fix bad field in `TList` tojson conversion --- src/uproot/models/TList.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uproot/models/TList.py b/src/uproot/models/TList.py index 0990e3259..43dc1ae8c 100644 --- a/src/uproot/models/TList.py +++ b/src/uproot/models/TList.py @@ -81,7 +81,7 @@ def tojson(self): "_typename": "TList", "name": "TList", "arr": [x.tojson() for x in self._data], - "opt": [], + "opt": ["" for _ in self._data], } writable = True From 5c95c4b78e29fb98ffaac3b400777c7bcf6764df Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Mon, 24 Oct 2022 21:04:12 +0200 Subject: [PATCH 08/30] add inexpensive `assert` to `TList` serialization --- src/uproot/models/TList.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/uproot/models/TList.py b/src/uproot/models/TList.py index 43dc1ae8c..7b638ac4d 100644 --- a/src/uproot/models/TList.py +++ b/src/uproot/models/TList.py @@ -91,6 +91,8 @@ def _to_writable_postprocess(self, original): self._options = original._options def _serialize(self, out, header, name, tobject_flags): + assert self._members["fSize"] == len(self._data) == len(self._options), "Fatal error in TList serialization." + import uproot.writing._cascade where = len(out) From 163c8a20432c1ad9585a1cc361248e80c3f8e156 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Oct 2022 19:28:53 +0000 Subject: [PATCH 09/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/uproot/models/TList.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/uproot/models/TList.py b/src/uproot/models/TList.py index 7b638ac4d..8223c3157 100644 --- a/src/uproot/models/TList.py +++ b/src/uproot/models/TList.py @@ -91,7 +91,9 @@ def _to_writable_postprocess(self, original): self._options = original._options def _serialize(self, out, header, name, tobject_flags): - assert self._members["fSize"] == len(self._data) == len(self._options), "Fatal error in TList serialization." + assert ( + self._members["fSize"] == len(self._data) == len(self._options) + ), "Fatal error in TList serialization." import uproot.writing._cascade From 4da653fee0f28789fbb20abcad3a4541acd6701a Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Tue, 25 Oct 2022 12:08:49 +0200 Subject: [PATCH 10/30] fix bad serialization of non-empty TList due to options (https://github.com/scikit-hep/uproot5/pull/763#issuecomment-1290149135) --- src/uproot/writing/identify.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/uproot/writing/identify.py b/src/uproot/writing/identify.py index 322faf6f2..8dd459e13 100644 --- a/src/uproot/writing/identify.py +++ b/src/uproot/writing/identify.py @@ -773,7 +773,8 @@ def to_TList(data, name=""): tlist._members["fName"] = name tlist._data = list(data) tlist._members["fSize"] = len(tlist._data) - tlist._options = [b""] * len(tlist._data) + # see https://github.com/scikit-hep/uproot5/pull/763#issuecomment-1290149135 + tlist._options = [uproot.serialization.string("")] * len(tlist._data) if all(x._deeply_writable for x in tlist._data): tlist._deeply_writable = True From db3595446c4c79d2fcd6bb5e17c180d9d7937bcb Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Tue, 25 Oct 2022 12:30:03 +0200 Subject: [PATCH 11/30] add tests for TList serialization --- tests/test_0351-write-TList.py | 58 ++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tests/test_0351-write-TList.py diff --git a/tests/test_0351-write-TList.py b/tests/test_0351-write-TList.py new file mode 100644 index 000000000..091b98837 --- /dev/null +++ b/tests/test_0351-write-TList.py @@ -0,0 +1,58 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/main/LICENSE + +import os + +import pytest + +import uproot +import uproot.writing + + +def test_write_empty(tmp_path): + filename = os.path.join(tmp_path, "whatever.root") + + tlist = uproot.writing.identify.to_TList([]) + + with uproot.recreate(filename) as f: + f["test"] = tlist + + with uproot.open(filename) as f2: + assert len(f2["test"]) == 0 + + +def test_write_single_key(tmp_path): + filename = os.path.join(tmp_path, "whatever.root") + + tlist = uproot.writing.identify.to_TList([uproot.writing.identify.to_TObjString("test string")]) + + with uproot.recreate(filename) as f: + f["test"] = tlist + + with uproot.open(filename) as f2: + assert len(f2["test"]) == 1 + + +def test_write_nested(tmp_path): + filename = os.path.join(tmp_path, "whatever.root") + + tlist_child = uproot.writing.identify.to_TList( + [uproot.writing.identify.to_TObjString(s) for s in "this is a test".split()]) + + entries = [uproot.writing.identify.to_TObjString("this string goes in the front"), tlist_child, + uproot.writing.identify.to_TObjString("test string")] + + tlist = uproot.writing.identify.to_TList(entries) + + with uproot.recreate(filename) as f: + f["test"] = tlist + + with uproot.open(filename) as f2: + parent_list = f2["test"] + assert len(parent_list) == 3 + assert isinstance(parent_list[0], uproot.models.TObjString.Model_TObjString) + assert str(parent_list[0]) == "this string goes in the front" + assert str(parent_list[2]) == "test string" + child_list = parent_list[1] + assert isinstance(child_list, uproot.models.TList.Model_TList) + assert len(child_list) == 4 + assert " ".join([str(s) for s in child_list]) == "this is a test" From 5fff9ddc05bd1ae1d6f71cb8b79822fbce70c23b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Oct 2022 10:31:31 +0000 Subject: [PATCH 12/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_0351-write-TList.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/test_0351-write-TList.py b/tests/test_0351-write-TList.py index 091b98837..2355b5b84 100644 --- a/tests/test_0351-write-TList.py +++ b/tests/test_0351-write-TList.py @@ -23,7 +23,9 @@ def test_write_empty(tmp_path): def test_write_single_key(tmp_path): filename = os.path.join(tmp_path, "whatever.root") - tlist = uproot.writing.identify.to_TList([uproot.writing.identify.to_TObjString("test string")]) + tlist = uproot.writing.identify.to_TList( + [uproot.writing.identify.to_TObjString("test string")] + ) with uproot.recreate(filename) as f: f["test"] = tlist @@ -36,10 +38,14 @@ def test_write_nested(tmp_path): filename = os.path.join(tmp_path, "whatever.root") tlist_child = uproot.writing.identify.to_TList( - [uproot.writing.identify.to_TObjString(s) for s in "this is a test".split()]) - - entries = [uproot.writing.identify.to_TObjString("this string goes in the front"), tlist_child, - uproot.writing.identify.to_TObjString("test string")] + [uproot.writing.identify.to_TObjString(s) for s in "this is a test".split()] + ) + + entries = [ + uproot.writing.identify.to_TObjString("this string goes in the front"), + tlist_child, + uproot.writing.identify.to_TObjString("test string"), + ] tlist = uproot.writing.identify.to_TList(entries) From bc95096d78e145e170c05c7bdd56f779d7c08df7 Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Tue, 25 Oct 2022 12:36:53 +0200 Subject: [PATCH 13/30] fixed bad `__repr__` for `TObject` --- src/uproot/models/TObject.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uproot/models/TObject.py b/src/uproot/models/TObject.py index 5e5617b41..36230d56d 100644 --- a/src/uproot/models/TObject.py +++ b/src/uproot/models/TObject.py @@ -114,7 +114,7 @@ def awkward_form(cls, file, context): def __repr__(self): return "".format( - self._members.get("fUniqueID"), self._members.get("fBits"), id(self) + self.member("@fUniqueID"), self.member("@fBits"), id(self) ) def tojson(self): From 6697fa093d79f2b2ab79b8e2effef7f2630949a5 Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Tue, 25 Oct 2022 12:38:38 +0200 Subject: [PATCH 14/30] add serialization of `fUniqueID` to `TObject` --- src/uproot/models/TObject.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uproot/models/TObject.py b/src/uproot/models/TObject.py index 36230d56d..2a37ba8cf 100644 --- a/src/uproot/models/TObject.py +++ b/src/uproot/models/TObject.py @@ -69,7 +69,7 @@ def read_members(self, chunk, cursor, context, file): writable = True def _serialize(self, out, header, name, tobject_flags): - out.append(b"\x00\x01" + _tobject_format2.pack(0, tobject_flags)) + out.append(b"\x00\x01" + _tobject_format2.pack(self.member("@fUniqueID"), tobject_flags)) @classmethod def strided_interpretation( From 94e93a8c33c40eb5c8dcb5c66167ff0710aa78f9 Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Tue, 25 Oct 2022 13:48:45 +0200 Subject: [PATCH 15/30] add `empty` method to `TObject` --- src/uproot/models/TObject.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/uproot/models/TObject.py b/src/uproot/models/TObject.py index 2a37ba8cf..57219d27e 100644 --- a/src/uproot/models/TObject.py +++ b/src/uproot/models/TObject.py @@ -124,5 +124,13 @@ def tojson(self): "fBits": self.member("@fBits"), } + @classmethod + def empty(cls): + self = uproot.model.Model.empty() + self.__class__ = cls + self._members["@fUniqueID"] = 0 + self._members["@fBits"] = 0 + return self + uproot.classes["TObject"] = Model_TObject From 46424e1057546ebfd1f1674ef2ba80ab54a6c412 Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Tue, 25 Oct 2022 13:50:12 +0200 Subject: [PATCH 16/30] remove redundant `TObject` member initialization --- src/uproot/writing/identify.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/uproot/writing/identify.py b/src/uproot/writing/identify.py index 8dd459e13..8b7b88c9b 100644 --- a/src/uproot/writing/identify.py +++ b/src/uproot/writing/identify.py @@ -736,8 +736,6 @@ def to_TObjString(string): written to ROOT files, to implement conversion routines. """ tobject = uproot.models.TObject.Model_TObject.empty() - tobject._members["@fUniqueID"] = 0 - tobject._members["@fBits"] = 0 tobjstring = uproot.models.TObjString.Model_TObjString(str(string)) tobjstring._deeply_writable = True @@ -765,8 +763,6 @@ def to_TList(data, name=""): ) tobject = uproot.models.TObject.Model_TObject.empty() - tobject._members["@fUniqueID"] = 0 - tobject._members["@fBits"] = 0 tlist = uproot.models.TList.Model_TList.empty() tlist._bases.append(tobject) @@ -879,8 +875,6 @@ def to_TAxis( written to ROOT files, to implement conversion routines. """ tobject = uproot.models.TObject.Model_TObject.empty() - tobject._members["@fUniqueID"] = 0 - tobject._members["@fBits"] = 0 tnamed = uproot.models.TNamed.Model_TNamed.empty() tnamed._deeply_writable = True @@ -1023,8 +1017,6 @@ def to_TH1x( TH1C, TH1D, TH1F, TH1I, or TH1S depends on the dtype of the ``data`` array. """ tobject = uproot.models.TObject.Model_TObject.empty() - tobject._members["@fUniqueID"] = 0 - tobject._members["@fBits"] = 0 tnamed = uproot.models.TNamed.Model_TNamed.empty() tnamed._deeply_writable = True From 6b1f1883a4526aaee3690f3349026cc4dc965fa5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Oct 2022 11:51:25 +0000 Subject: [PATCH 17/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/uproot/models/TObject.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/uproot/models/TObject.py b/src/uproot/models/TObject.py index 57219d27e..92c43ea69 100644 --- a/src/uproot/models/TObject.py +++ b/src/uproot/models/TObject.py @@ -69,7 +69,10 @@ def read_members(self, chunk, cursor, context, file): writable = True def _serialize(self, out, header, name, tobject_flags): - out.append(b"\x00\x01" + _tobject_format2.pack(self.member("@fUniqueID"), tobject_flags)) + out.append( + b"\x00\x01" + + _tobject_format2.pack(self.member("@fUniqueID"), tobject_flags) + ) @classmethod def strided_interpretation( From 303514d30f18ca4c420f545d6b1facd5097d69d3 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio <35803280+lobis@users.noreply.github.com> Date: Tue, 25 Oct 2022 23:07:09 +0200 Subject: [PATCH 18/30] Update src/uproot/writing/identify.py Co-authored-by: Jim Pivarski --- src/uproot/writing/identify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uproot/writing/identify.py b/src/uproot/writing/identify.py index 8b7b88c9b..8fc9d01a8 100644 --- a/src/uproot/writing/identify.py +++ b/src/uproot/writing/identify.py @@ -742,7 +742,7 @@ def to_TObjString(string): tobjstring._cursor = None tobjstring._parent = None tobjstring._members = {} - tobjstring._bases = [tobject] + tobjstring._bases = (tobject,) tobjstring._num_bytes = len(string) + (1 if len(string) < 255 else 5) + 16 tobjstring._instance_version = 1 return tobjstring From d6e154308dee74c8a21170578e32af0f20827551 Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Tue, 25 Oct 2022 23:29:45 +0200 Subject: [PATCH 19/30] moved `TList` serialization list to `serialize` method --- src/uproot/models/TList.py | 2 +- src/uproot/writing/identify.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/uproot/models/TList.py b/src/uproot/models/TList.py index 8223c3157..324860838 100644 --- a/src/uproot/models/TList.py +++ b/src/uproot/models/TList.py @@ -106,7 +106,7 @@ def _serialize(self, out, header, name, tobject_flags): for datum, option in zip(self._data, self._options): uproot.serialization._serialize_object_any(out, datum, None) - out.append(option) + out.append(uproot.serialization.string(option)) if header: num_bytes = sum(len(x) for x in out[where:]) diff --git a/src/uproot/writing/identify.py b/src/uproot/writing/identify.py index 8fc9d01a8..e7ac769a9 100644 --- a/src/uproot/writing/identify.py +++ b/src/uproot/writing/identify.py @@ -769,8 +769,7 @@ def to_TList(data, name=""): tlist._members["fName"] = name tlist._data = list(data) tlist._members["fSize"] = len(tlist._data) - # see https://github.com/scikit-hep/uproot5/pull/763#issuecomment-1290149135 - tlist._options = [uproot.serialization.string("")] * len(tlist._data) + tlist._options = [""] * len(tlist._data) if all(x._deeply_writable for x in tlist._data): tlist._deeply_writable = True From 0621c67a64becfa8d7e847e2af24274eb27bcac2 Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Wed, 26 Oct 2022 01:14:23 +0200 Subject: [PATCH 20/30] add helper serialization method `bytestring` as suggested in https://github.com/scikit-hep/uproot5/pull/763#issuecomment-1290149135 by @agoose77 --- src/uproot/serialization.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/uproot/serialization.py b/src/uproot/serialization.py index fcfcf3c81..d4edc932a 100644 --- a/src/uproot/serialization.py +++ b/src/uproot/serialization.py @@ -24,12 +24,24 @@ def string(data): is preceded by a 1-byte length; otherwise, it is preceded by ``b'\xff'`` and a 4-byte length. """ - bytestring = data.encode(errors="surrogateescape") - length = len(bytestring) + return bytestring( + data.encode(errors="surrogateescape") + ) + + +def bytestring(data): + """ + Converts Python bytes into a length-prefixed bytestring, ready to be written to a file. + + If the string's byte representation (UTF-8) has fewer than 255 bytes, it + is preceded by a 1-byte length; otherwise, it is preceded by ``b'\xff'`` and a + 4-byte length. + """ + length = len(data) if length < 255: - return struct.pack(">B%ds" % length, length, bytestring) + return struct.pack(">B%ds" % length, length, data) else: - return struct.pack(">BI%ds" % length, 255, length, bytestring) + return struct.pack(">BI%ds" % length, 255, length, data) def numbytes_version(num_bytes, version): From 8e6ad2e17404b9c387bfab8fe60e97b511562e7e Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Wed, 26 Oct 2022 01:16:02 +0200 Subject: [PATCH 21/30] keep `TList` `_options` as python `bytes` and update serialization to use the new `bytestring` helper --- src/uproot/models/TList.py | 2 +- src/uproot/writing/identify.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uproot/models/TList.py b/src/uproot/models/TList.py index 324860838..347b0ba45 100644 --- a/src/uproot/models/TList.py +++ b/src/uproot/models/TList.py @@ -106,7 +106,7 @@ def _serialize(self, out, header, name, tobject_flags): for datum, option in zip(self._data, self._options): uproot.serialization._serialize_object_any(out, datum, None) - out.append(uproot.serialization.string(option)) + out.append(uproot.serialization.bytestring(option)) if header: num_bytes = sum(len(x) for x in out[where:]) diff --git a/src/uproot/writing/identify.py b/src/uproot/writing/identify.py index e7ac769a9..2a2239685 100644 --- a/src/uproot/writing/identify.py +++ b/src/uproot/writing/identify.py @@ -769,7 +769,7 @@ def to_TList(data, name=""): tlist._members["fName"] = name tlist._data = list(data) tlist._members["fSize"] = len(tlist._data) - tlist._options = [""] * len(tlist._data) + tlist._options = [b""] * len(tlist._data) if all(x._deeply_writable for x in tlist._data): tlist._deeply_writable = True From 897972ff5d3730ed96c221528be9111a4eb2745e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Oct 2022 23:16:57 +0000 Subject: [PATCH 22/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/uproot/serialization.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/uproot/serialization.py b/src/uproot/serialization.py index d4edc932a..71218dd84 100644 --- a/src/uproot/serialization.py +++ b/src/uproot/serialization.py @@ -24,9 +24,7 @@ def string(data): is preceded by a 1-byte length; otherwise, it is preceded by ``b'\xff'`` and a 4-byte length. """ - return bytestring( - data.encode(errors="surrogateescape") - ) + return bytestring(data.encode(errors="surrogateescape")) def bytestring(data): From 9a500c14b19094d41337c678b1ba16754e1921f3 Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Wed, 26 Oct 2022 10:37:11 +0200 Subject: [PATCH 23/30] reset `serialization.py` to `main` branch status --- src/uproot/serialization.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/uproot/serialization.py b/src/uproot/serialization.py index 71218dd84..fcfcf3c81 100644 --- a/src/uproot/serialization.py +++ b/src/uproot/serialization.py @@ -24,22 +24,12 @@ def string(data): is preceded by a 1-byte length; otherwise, it is preceded by ``b'\xff'`` and a 4-byte length. """ - return bytestring(data.encode(errors="surrogateescape")) - - -def bytestring(data): - """ - Converts Python bytes into a length-prefixed bytestring, ready to be written to a file. - - If the string's byte representation (UTF-8) has fewer than 255 bytes, it - is preceded by a 1-byte length; otherwise, it is preceded by ``b'\xff'`` and a - 4-byte length. - """ - length = len(data) + bytestring = data.encode(errors="surrogateescape") + length = len(bytestring) if length < 255: - return struct.pack(">B%ds" % length, length, data) + return struct.pack(">B%ds" % length, length, bytestring) else: - return struct.pack(">BI%ds" % length, 255, length, data) + return struct.pack(">BI%ds" % length, 255, length, bytestring) def numbytes_version(num_bytes, version): From 884ec4de4ec22e25974d72e814b9f02b578d22c0 Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Wed, 26 Oct 2022 10:37:53 +0200 Subject: [PATCH 24/30] Revert "keep `TList` `_options` as python `bytes` and update serialization to use the new `bytestring` helper" This reverts commit 8e6ad2e17404b9c387bfab8fe60e97b511562e7e. --- src/uproot/models/TList.py | 2 +- src/uproot/writing/identify.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uproot/models/TList.py b/src/uproot/models/TList.py index 347b0ba45..324860838 100644 --- a/src/uproot/models/TList.py +++ b/src/uproot/models/TList.py @@ -106,7 +106,7 @@ def _serialize(self, out, header, name, tobject_flags): for datum, option in zip(self._data, self._options): uproot.serialization._serialize_object_any(out, datum, None) - out.append(uproot.serialization.bytestring(option)) + out.append(uproot.serialization.string(option)) if header: num_bytes = sum(len(x) for x in out[where:]) diff --git a/src/uproot/writing/identify.py b/src/uproot/writing/identify.py index 2a2239685..e7ac769a9 100644 --- a/src/uproot/writing/identify.py +++ b/src/uproot/writing/identify.py @@ -769,7 +769,7 @@ def to_TList(data, name=""): tlist._members["fName"] = name tlist._data = list(data) tlist._members["fSize"] = len(tlist._data) - tlist._options = [b""] * len(tlist._data) + tlist._options = [""] * len(tlist._data) if all(x._deeply_writable for x in tlist._data): tlist._deeply_writable = True From 3c8b85bb57e42089340054411ce7af7bb0f72e6e Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Wed, 26 Oct 2022 10:39:02 +0200 Subject: [PATCH 25/30] Revert "[pre-commit.ci] auto fixes from pre-commit.com hooks" This reverts commit 897972ff5d3730ed96c221528be9111a4eb2745e. # Conflicts: # src/uproot/serialization.py --- src/uproot/serialization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uproot/serialization.py b/src/uproot/serialization.py index fcfcf3c81..7005f968d 100644 --- a/src/uproot/serialization.py +++ b/src/uproot/serialization.py @@ -80,4 +80,4 @@ def serialize_object_any(model, name=None): """ out = [] _serialize_object_any(out, model, name) - return b"".join(out) + return b"".join(out) \ No newline at end of file From 45720ffc1239b69b42c4720db65173657e182be8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Oct 2022 08:41:10 +0000 Subject: [PATCH 26/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/uproot/serialization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uproot/serialization.py b/src/uproot/serialization.py index 7005f968d..fcfcf3c81 100644 --- a/src/uproot/serialization.py +++ b/src/uproot/serialization.py @@ -80,4 +80,4 @@ def serialize_object_any(model, name=None): """ out = [] _serialize_object_any(out, model, name) - return b"".join(out) \ No newline at end of file + return b"".join(out) From 6dde481ed074468c790d6759ee5b2670d3f857ea Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Wed, 26 Oct 2022 10:43:42 +0200 Subject: [PATCH 27/30] Revert "reset `serialization.py` to `main` branch status" This reverts commit 9a500c14b19094d41337c678b1ba16754e1921f3. --- src/uproot/serialization.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/uproot/serialization.py b/src/uproot/serialization.py index fcfcf3c81..71218dd84 100644 --- a/src/uproot/serialization.py +++ b/src/uproot/serialization.py @@ -24,12 +24,22 @@ def string(data): is preceded by a 1-byte length; otherwise, it is preceded by ``b'\xff'`` and a 4-byte length. """ - bytestring = data.encode(errors="surrogateescape") - length = len(bytestring) + return bytestring(data.encode(errors="surrogateescape")) + + +def bytestring(data): + """ + Converts Python bytes into a length-prefixed bytestring, ready to be written to a file. + + If the string's byte representation (UTF-8) has fewer than 255 bytes, it + is preceded by a 1-byte length; otherwise, it is preceded by ``b'\xff'`` and a + 4-byte length. + """ + length = len(data) if length < 255: - return struct.pack(">B%ds" % length, length, bytestring) + return struct.pack(">B%ds" % length, length, data) else: - return struct.pack(">BI%ds" % length, 255, length, bytestring) + return struct.pack(">BI%ds" % length, 255, length, data) def numbytes_version(num_bytes, version): From 3eac7cafbfbfceb8afd072c714590691619ecb8a Mon Sep 17 00:00:00 2001 From: Luis Obis Date: Wed, 26 Oct 2022 10:43:48 +0200 Subject: [PATCH 28/30] Revert "Revert "keep `TList` `_options` as python `bytes` and update serialization to use the new `bytestring` helper"" This reverts commit 884ec4de4ec22e25974d72e814b9f02b578d22c0. --- src/uproot/models/TList.py | 2 +- src/uproot/writing/identify.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uproot/models/TList.py b/src/uproot/models/TList.py index 324860838..347b0ba45 100644 --- a/src/uproot/models/TList.py +++ b/src/uproot/models/TList.py @@ -106,7 +106,7 @@ def _serialize(self, out, header, name, tobject_flags): for datum, option in zip(self._data, self._options): uproot.serialization._serialize_object_any(out, datum, None) - out.append(uproot.serialization.string(option)) + out.append(uproot.serialization.bytestring(option)) if header: num_bytes = sum(len(x) for x in out[where:]) diff --git a/src/uproot/writing/identify.py b/src/uproot/writing/identify.py index e7ac769a9..2a2239685 100644 --- a/src/uproot/writing/identify.py +++ b/src/uproot/writing/identify.py @@ -769,7 +769,7 @@ def to_TList(data, name=""): tlist._members["fName"] = name tlist._data = list(data) tlist._members["fSize"] = len(tlist._data) - tlist._options = [""] * len(tlist._data) + tlist._options = [b""] * len(tlist._data) if all(x._deeply_writable for x in tlist._data): tlist._deeply_writable = True From cb6deb7d21fa0ab1f2933a37dd48fef2c6b28dd8 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio <35803280+lobis@users.noreply.github.com> Date: Thu, 27 Oct 2022 09:38:19 +0200 Subject: [PATCH 29/30] Update src/uproot/models/TObject.py Co-authored-by: Angus Hollands --- src/uproot/models/TObject.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/uproot/models/TObject.py b/src/uproot/models/TObject.py index 8af4a3eb0..2016df227 100644 --- a/src/uproot/models/TObject.py +++ b/src/uproot/models/TObject.py @@ -129,8 +129,7 @@ def tojson(self): @classmethod def empty(cls): - self = uproot.model.Model.empty() - self.__class__ = cls + self = super(Model_TObject, cls).empty() self._members["@fUniqueID"] = 0 self._members["@fBits"] = 0 return self From 2d6917490b86a7cb89884d5b08e9bcb7ac0424e1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 27 Oct 2022 07:38:53 +0000 Subject: [PATCH 30/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/uproot/models/TObject.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uproot/models/TObject.py b/src/uproot/models/TObject.py index 2016df227..1303e60b5 100644 --- a/src/uproot/models/TObject.py +++ b/src/uproot/models/TObject.py @@ -129,7 +129,7 @@ def tojson(self): @classmethod def empty(cls): - self = super(Model_TObject, cls).empty() + self = super().empty() self._members["@fUniqueID"] = 0 self._members["@fBits"] = 0 return self