Skip to content
This repository has been archived by the owner on Jun 21, 2022. It is now read-only.

Commit

Permalink
Merge pull request #369 from scikit-hep/flush-alt
Browse files Browse the repository at this point in the history
Removing the flushing interface
  • Loading branch information
reikdas authored Oct 6, 2019
2 parents 7d11d3a + 5cdba80 commit 03840b3
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 305 deletions.
46 changes: 1 addition & 45 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4479,52 +4479,8 @@ extend method:
|
| Remember to add entries to all the branches and the number of entries added to the branches is the same!
|
| You can specify a flush parameter to True or False in the extend method.
.. code-block:: python3
f["t"].extend({"branch1": numpy.array([1, 2, 3, 4, 5]), "branch2": [6, 7, 8, 9, 10]}, flush=True)
By default, it is true. This means that these values are immediately
flushed to the file.

| You can choose not to flush the baskets immediately by setting flush =
False.
.. code-block:: python3
f["t"].extend({"branch1": numpy.array([1, 2, 3, 4, 5]), "branch2": [1, 2, 3, 4, 5]}, flush=False)
| The baskets are added to a buffer which are flushed to the file
depending on the flush size set by the user.
The flush size can be set at the branch level and the tree level.

To set it at the branch level:

.. code-block:: python3
t = uproot.newbranch("int32", flushsize="10 KB")
and to set it at the tree level:

.. code-block:: python3
tree = uproot.newtree({"demoflush": t}, flushsize=1000)
You can also use the append function to add baskets to your file if you
need to just add a single value at the end of your current basket
buffer:

.. code-block:: python3
f["t"].append({"branch1": 1, "branch2": 2)
Make sure to add entries to every branch, similar to the extend method.

The append method does not provide a way to explicitly flush data to the
file, the data is added to the end of the buffer and is flushed hased on
the branch and tree flush sizes.
What must be kept in mind is that if you write a lot of small baskets, it is going to be much less performant(slow and will increase the size of the file) than writing large arrays into the TTree as a single basket -> uproot's implementation is optimized for large array oriented operations.

**Low level interface**

Expand Down
174 changes: 1 addition & 173 deletions tests/test_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -1720,7 +1720,7 @@ def test_tree_renames(tmp_path):
for i in range(19):
assert a[0] == treedata[i]

def test_ttree_extend_flush_true(tmp_path):
def test_ttree_extend(tmp_path):
filename = join(str(tmp_path), "example.root")

b = uproot.newbranch(">i4")
Expand All @@ -1741,163 +1741,6 @@ def test_ttree_extend_flush_true(tmp_path):
assert branch1[i] == branch1_test[i]
assert branch2[i] == branch2_test[i]

def test_ttree_extend_flush_false(tmp_path):
filename = join(str(tmp_path), "example.root")

b = uproot.newbranch(">i4")
branchdict = {"intBranch": b}
tree = uproot.newtree(branchdict, flushsize=5)
with uproot.recreate(filename) as f:
f["t"] = tree
basket_add = {"intBranch": numpy.array([1, 2])}
f["t"].extend(basket_add, flush=False)

f = ROOT.TFile.Open(filename)
tree = f.Get("t")
branch1 = tree.AsMatrix(["intBranch"])
branch1_test = numpy.array([1, 2], dtype=">i4")
for i in range(2):
assert branch1[i] == branch1_test[i]

def test_ttree_extend_flush_false_readback_and_proper_close(tmp_path):
filename = join(str(tmp_path), "example.root")

b = uproot.newbranch(">i4")
branchdict = {"intBranch": b}
tree = uproot.newtree(branchdict, flushsize=9)
with uproot.recreate(filename) as f:
f["t"] = tree
basket_add = {"intBranch": numpy.array([1, 2])}
f["t"].extend(basket_add, flush=False)
assert f["t"]["intBranch"].numbaskets == 0

f = ROOT.TFile.Open(filename)
tree = f.Get("t")
branch1 = tree.AsMatrix(["intBranch"])
branch1_test = numpy.array([1, 2], dtype=">i4")
for i in range(2):
assert branch1[i] == branch1_test[i]

def test_ttree_extend_flush_false_multibranch_same_type(tmp_path):
filename = join(str(tmp_path), "example.root")

b = uproot.newbranch(">i4")
branchdict = {"intBranch": b, "intBranch2": b}
tree = uproot.newtree(branchdict, flushsize=9)
with uproot.recreate(filename) as f:
f["t"] = tree
basket_add = {"intBranch": numpy.array([1, 2]), "intBranch2": numpy.array([11, 12])}
f["t"].extend(basket_add, flush=False)

f = ROOT.TFile.Open(filename)
tree = f.Get("t")
branch1 = tree.AsMatrix(["intBranch"])
branch2 = tree.AsMatrix(["intBranch2"])
branch1_test = numpy.array([1, 2], dtype=">i4")
branch2_test = numpy.array([11, 12], dtype=">i4")
for i in range(2):
assert branch1[i] == branch1_test[i]
assert branch2[i] == branch2_test[i]

def test_ttree_extend_flush_false_multibranch_diff_type(tmp_path):
filename = join(str(tmp_path), "example.root")

b = uproot.newbranch(">i4")
b2 = uproot.newbranch(">i8")
branchdict = {"intBranch": b, "intBranch2": b2}
tree = uproot.newtree(branchdict, flushsize=9)
with uproot.recreate(filename) as f:
f["t"] = tree
basket_add = {"intBranch": numpy.array([1, 2]), "intBranch2": numpy.array([11, 12])}
f["t"].extend(basket_add, flush=False)

f = ROOT.TFile.Open(filename)
tree = f.Get("t")
branch1 = tree.AsMatrix(["intBranch"])
branch2 = tree.AsMatrix(["intBranch2"])
branch1_test = numpy.array([1, 2], dtype=">i4")
branch2_test = numpy.array([11, 12], dtype=">i8")
for i in range(2):
assert branch1[i] == branch1_test[i]
assert branch2[i] == branch2_test[i]

def test_ttree_extend_flush_false_multibasket(tmp_path):
filename = join(str(tmp_path), "example.root")

b = uproot.newbranch(">i4")
branchdict = {"intBranch": b}
tree = uproot.newtree(branchdict, flushsize=5)
with uproot.recreate(filename) as f:
f["t"] = tree
basket_add = {"intBranch": numpy.array([1, 2])}
for i in range(3):
f["t"].extend(basket_add, flush=False)

f = ROOT.TFile.Open(filename)
tree = f.Get("t")
branch1 = tree.AsMatrix(["intBranch"])
branch1_test = numpy.array([1, 2]*3, dtype=">i4")
for i in range(6):
assert branch1[i] == branch1_test[i]

def test_ttree_extend_flush_false_multibasket_multibranch_same_type(tmp_path):
filename = join(str(tmp_path), "example.root")

b = uproot.newbranch(">i4")
branchdict = {"intBranch": b, "intBranch2": b}
tree = uproot.newtree(branchdict, flushsize=5)
with uproot.recreate(filename) as f:
f["t"] = tree
basket_add = {"intBranch": numpy.array([1, 2]), "intBranch2": numpy.array([11, 12])}
for i in range(3):
f["t"].extend(basket_add, flush=False)

f = ROOT.TFile.Open(filename)
tree = f.Get("t")
branch1 = tree.AsMatrix(["intBranch"])
branch2 = tree.AsMatrix(["intBranch2"])
branch1_test = numpy.array([1, 2] * 3, dtype=">i4")
branch2_test = numpy.array([11, 12] * 3, dtype=">i4")
for i in range(6):
assert branch1[i] == branch1_test[i]
assert branch2[i] == branch2_test[i]

def test_ttree_extend_flush_false_multibasket_multibranch_diff_type(tmp_path):
filename = join(str(tmp_path), "example.root")

b = uproot.newbranch(">i4")
b2 = uproot.newbranch(">i8")
branchdict = {"intBranch": b, "intBranch2": b2}
tree = uproot.newtree(branchdict, flushsize=5)
with uproot.recreate(filename) as f:
f["t"] = tree
basket_add = {"intBranch": numpy.array([1, 2]), "intBranch2": numpy.array([11, 12])}
for i in range(3):
f["t"].extend(basket_add, flush=False)

f = ROOT.TFile.Open(filename)
tree = f.Get("t")
branch1 = tree.AsMatrix(["intBranch"])
branch2 = tree.AsMatrix(["intBranch2"])
branch1_test = numpy.array([1, 2] * 3, dtype=">i4")
branch2_test = numpy.array([11, 12] * 3, dtype=">i8")
for i in range(6):
assert branch1[i] == branch1_test[i]
assert branch2[i] == branch2_test[i]

def test_ttree_extend_flush_false_diff_flush(tmp_path):
filename = join(str(tmp_path), "example.root")

b = uproot.newbranch(">i4", flushsize=5)
branchdict = {"intBranch": b}
tree = uproot.newtree(branchdict, flushsize=12)
with uproot.recreate(filename) as f:
f["t"] = tree
f["t"].extend({"intBranch": numpy.array([1])}, flush=False)
assert f["t"]["intBranch"].numbaskets == 0
f["t"].extend({"intBranch": numpy.array([2, 3])}, flush=False)
assert f["t"]["intBranch"].numbaskets == 2

def test_issue340(tmp_path):
filename = join(str(tmp_path), "example.root")

Expand All @@ -1910,21 +1753,6 @@ def test_issue340(tmp_path):
for i in range(10):
assert t["normal"].basket(0)[i] == a[i]

def test_basket_append(tmp_path):
filename = join(str(tmp_path), "example.root")

b = newbranch("int32")
branchdict = {"intBranch": b}
tree = newtree(branchdict)
with uproot.recreate(filename, compression=None) as f:
f["t"] = tree
f["t"].append({"intBranch": 1})

f = ROOT.TFile.Open(filename)
tree = f.Get("t")
treedata = tree.AsMatrix().astype(">i4")
assert treedata[0] == 1

def test_rdf(tmp_path):
filename = join(str(tmp_path), "example.root")

Expand Down
8 changes: 3 additions & 5 deletions uproot/write/TFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,12 @@ def _normalizewhere(where):

return where, cycle

def newtree(self, name, branches={}, flushsize=30000, title="", **options):
def newtree(self, name, branches={}, title="", **options):
if "compression" in options:
self.__setitem__(name, uproot.write.objects.TTree.newtree(branches, flushsize, title, compression=options["compression"]))
self.__setitem__(name, uproot.write.objects.TTree.newtree(branches, title, compression=options["compression"]))
del options["compression"]
else:
self.__setitem__(name, uproot.write.objects.TTree.newtree(branches, flushsize, title))
self.__setitem__(name, uproot.write.objects.TTree.newtree(branches, title))
if len(options) > 0:
raise TypeError("{0} not supported".format(options))

Expand Down Expand Up @@ -197,8 +197,6 @@ def __enter__(self):
return self

def __exit__(self, type, value, traceback):
for tree in self._treedict.keys():
self._treedict[tree].flush()
self.close()

class TFileRecreate(TFileUpdate):
Expand Down
Loading

0 comments on commit 03840b3

Please sign in to comment.