From 2c9c0790063276c2ac4238c2d54f4eed86bd9805 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Thu, 10 Dec 2020 18:28:50 -0600
Subject: [PATCH 1/8] Added ak.to_buffers with a new interface; old
 ak.to_arrayset uses it.

---
 src/awkward/operations/convert.py | 376 ++++++++++++++++++++++--------
 1 file changed, 275 insertions(+), 101 deletions(-)

diff --git a/src/awkward/operations/convert.py b/src/awkward/operations/convert.py
index 4d0205c557..c7bc120acd 100644
--- a/src/awkward/operations/convert.py
+++ b/src/awkward/operations/convert.py
@@ -2694,12 +2694,12 @@ def from_parquet(
             return out
 
 
-def _arrayset_key(
+def _buffers_key(
     form_key, attribute, partition, prefix, sep, partition_first,
 ):
     if form_key is None:
         raise ValueError(
-            "cannot read from arrayset using Forms without form_keys"
+            "cannot ak.from_buffers using Forms without form_keys"
             + ak._util.exception_suffix(__file__)
         )
     if attribute is None:
@@ -2756,6 +2756,9 @@ def to_arrayset(
             at the end of the keys. This can be relevant if the `container`
             is sorted or lookup performance depends on alphabetical order.
 
+    **Deprecated:** this will be removed in `awkward>=1.1.0` after January 1,
+    2021. Use #ak.to_buffers instead: the return values have changed.
+
     Decomposes an Awkward Array into a Form and a collection of arrays, so
     that data can be losslessly written to file formats and storage devices
     that only understand named arrays (or binary blobs).
@@ -2855,6 +2858,196 @@ def to_arrayset(
 
     See also #ak.from_arrayset.
     """
+
+    layout = to_layout(array, allow_record=False, allow_other=False)
+
+    if isinstance(layout, ak.partition.PartitionedArray):
+        show_partition = True
+        if partition is not None:
+            raise ValueError(
+                "array is partitioned; an explicit 'partition' should not be "
+                "assigned" + ak._util.exception_suffix(__file__)
+            )
+    else:
+        if partition is None:
+            show_partition = False
+        else:
+            show_partition = True
+
+    if partition is None:
+        partition = 0
+
+    def key_format(**v):
+        v["sep"] = sep
+        if prefix is None:
+            v["prefix"] = ""
+        else:
+            v["prefix"] = prefix + sep
+
+        if not show_partition:
+            if v["attribute"] == "data":
+                return "{prefix}node{node}".format(**v)
+            else:
+                return "{prefix}node{node}{sep}{attribute}".format(**v)
+
+        elif partition_first:
+            if v["attribute"] == "data":
+                return "{prefix}part{partition}{sep}node{node}".format(**v)
+            else:
+                return "{prefix}part{partition}{sep}node{node}{sep}{attribute}".format(**v)
+
+        else:
+            if v["attribute"] == "data":
+                return "{prefix}node{node}{sep}part{partition}".format(**v)
+            else:
+                return "{prefix}node{node}{sep}{attribute}{sep}part{partition}".format(**v)
+
+    def form_key_format(**v):
+        return "node{node}".format(**v)
+
+    form, length, container = to_buffers(
+        layout,
+        container=container,
+        partition=partition,
+        key_format=key_format,
+        form_key_format=form_key_format,
+    )
+
+    if isinstance(length, (numbers.Integral, np.integer)):
+        num_partitions = None
+    else:
+        num_partitions = len(length)
+
+    return form, container, num_partitions
+
+
+def to_buffers(
+    array,
+    container=None,
+    partition=0,
+    key_format="part{partition}-node{node}-{attribute}",
+    form_key_format="node{node}",
+):
+    u"""
+    Args:
+        array: Data to decompose into named buffers.
+        container (None or MutableMapping): The str \u2192 NumPy arrays (or
+            Python buffers) that represent the decomposed Awkward Array. This
+            `container` is only assumed to have a `__setitem__` method that
+            accepts strings as keys.
+        partition (non-negative int): If `array` is not partitioned, this is
+            the partition number that will be used as part of the container
+            key. If `array` is partitioned, this will be added to the partition
+            numbers.
+        key_format (str or callable): Python format string containing
+            `"{partition}"`, `"{node}"`, and/or `"{attribute}"` or a function
+            that takes these as keyword arguments and returns a string to use
+            as keys for buffers in the `container`.
+        form_key_format (str, callable, or None): Python format string containing
+            `"{node}"` or a function that takes this as a keyword argument and
+            returns a string to use as a `form_key` for each Form node. If None,
+            the Form nodes have no keys. (They are not required for reconstruction.)
+
+    Decomposes an Awkward Array into a Form and a collection of Python buffers,
+    so that data can be losslessly written to file formats and storage devices
+    that only map names to binary blobs (such as a filesystem directory).
+
+    This function returns a 3-tuple:
+
+        (form, length, container)
+
+    where the `form` is a #ak.forms.Form (which can be converted to JSON
+    with `tojson`), the `length` is either an integer (`len(array)`) or a list
+    of the lengths of each partition in `array`, and the `container` is either
+    the MutableMapping you passed in or a new dict containing the buffers (as
+    NumPy arrays).
+
+    These are also the first three arguments of #ak.from_buffers, so a full
+    round-trip is
+
+        >>> reconstituted = ak.from_buffers(*ak.to_buffers(original))
+
+    The `container` argument lets you specify your own MutableMapping, which
+    might be an interface to some storage format or device (e.g. h5py). It's
+    okay if the `container` drops NumPy's `dtype` and `shape` information,
+    leaving raw bytes, since `dtype` and `shape` can be reconstituted from
+    the #ak.forms.NumpyForm.
+
+    The `partition` argument lets you fill the `container` gradually or in parallel.
+    If the `array` is not partitioned, the `partition` argument sets its
+    partition number (for the container keys, through `key_format`).
+    If the `array` is partitioned, the `partition` argument is added to each
+    partition number.
+
+    Here is a simple example:
+
+        >>> original = ak.Array([[1, 2, 3], [], [4, 5]])
+        >>> form, length, container = ak.to_buffers(original)
+        >>> form
+        {
+            "class": "ListOffsetArray64",
+            "offsets": "i64",
+            "content": {
+                "class": "NumpyArray",
+                "itemsize": 8,
+                "format": "l",
+                "primitive": "int64",
+                "form_key": "node1"
+            },
+            "form_key": "node0"
+        }
+        >>> length
+        3
+        >>> container
+        {'part0-node0-offsets': array([0, 3, 3, 5], dtype=int64),
+         'part0-node1-data': array([1, 2, 3, 4, 5])}
+
+    which may be read back with
+
+        >>> ak.from_buffers(form, length, container)
+        <Array [[1, 2, 3], [], [4, 5]] type='3 * var * int64'>
+
+    Here is an example that builds up a partitioned array:
+
+        >>> container = {}
+        >>> lengths = []
+        >>> form, length, _ = ak.to_buffers(ak.Array([[1, 2, 3], [], [4, 5]]), container, 0)
+        >>> form, length, _ = ak.to_buffers(ak.Array([[6, 7, 8, 9]]), container, 1)
+        >>> form, length, _ = ak.to_buffers(ak.Array([[], [], []]), container, 2)
+        >>> form, length, _ = ak.to_buffers(ak.Array([[10]]), container, 3)
+        >>> form
+        {
+            "class": "ListOffsetArray64",
+            "offsets": "i64",
+            "content": {
+                "class": "NumpyArray",
+                "itemsize": 8,
+                "format": "l",
+                "primitive": "int64",
+                "form_key": "node1"
+            },
+            "form_key": "node0"
+        }
+        >>> container
+        {'part0-node0-offsets': array([0, 3, 3, 5], dtype=int64),
+         'part0-node1-data': array([1, 2, 3, 4, 5]),
+         'part1-node0-offsets': array([0, 4], dtype=int64),
+         'part1-node1-data': array([6, 7, 8, 9]),
+         'part2-node0-offsets': array([0, 0, 0, 0], dtype=int64),
+         'part2-node1-data': array([], dtype=float64),
+         'part3-node0-offsets': array([0, 1], dtype=int64),
+         'part3-node1-data': array([10])}
+
+    The object returned by #ak.from_buffers is now a partitioned array:
+
+        >>> reconstituted = ak.from_buffers(form, lengths, container)
+        >>> reconstituted
+        <Array [[1, 2, 3], [], [4, ... [], [], [10]] type='8 * var * int64'>
+        >>> ak.partitions(reconstituted)
+        [3, 1, 3, 1]
+
+    See also #ak.from_buffers.
+    """
     if container is None:
         container = {}
 
@@ -2876,33 +3069,13 @@ def index_form(index):
                 + ak._util.exception_suffix(__file__)
             )
 
-    if prefix is None:
-        prefix = ""
-    else:
-        prefix = prefix + sep
-
-    if isinstance(node_format, str) or (
-        ak._util.py27 and isinstance(node_format, ak._util.unicode)
-    ):
-        tmp1 = node_format
-
-        def node_format(x):
-            return tmp1.format(x)
+    if isinstance(key_format, str):
+        key_format = lambda **v: key_format.format(**v)
 
-    if isinstance(partition_format, str) or (
-        ak._util.py27 and isinstance(partition_format, ak._util.unicode)
-    ):
-        tmp2 = partition_format
-
-        def partition_format(x):
-            return tmp2.format(x)
-
-    def key(key_index, attribute, partition):
-        if partition is not None:
-            partition = partition_format(partition)
-        return _arrayset_key(
-            node_format(key_index), attribute, partition, prefix, sep, partition_first,
-        )
+    if form_key_format is None:
+        form_key_format = lambda **v: None
+    elif isinstance(form_key_format, str):
+        form_key_format = lambda **v: form_key_format.format(**v)
 
     num_form_keys = [0]
 
@@ -2917,15 +3090,17 @@ def fill(layout, part):
 
         if has_identities:
             raise NotImplementedError(
-                "ak.to_arrayset for an array with Identities"
+                "ak.to_buffers for an array with Identities"
                 + ak._util.exception_suffix(__file__)
             )
 
         if isinstance(layout, ak.layout.EmptyArray):
             array = numpy.asarray(layout)
-            container[key(key_index, None, part)] = little_endian(array)
+            key = key_format(node=str(key_index), attribute="data", partition=str(part))
+            container[key] = little_endian(array)
+
             return ak.forms.EmptyForm(
-                has_identities, parameters, node_format(key_index)
+                has_identities, parameters, form_key_format(node=str(key_index))
             )
 
         elif isinstance(
@@ -2936,48 +3111,48 @@ def fill(layout, part):
                 ak.layout.IndexedArray64,
             ),
         ):
-            container[key(key_index, "index", part)] = little_endian(
-                numpy.asarray(layout.index)
-            )
+            key = key_format(node=str(key_index), attribute="index", partition=str(part))
+            container[key] = little_endian(numpy.asarray(layout.index))
+
             return ak.forms.IndexedForm(
                 index_form(layout.index),
                 fill(layout.content, part),
                 has_identities,
                 parameters,
-                node_format(key_index),
+                form_key_format(node=str(key_index)),
             )
 
         elif isinstance(
             layout, (ak.layout.IndexedOptionArray32, ak.layout.IndexedOptionArray64)
         ):
-            container[key(key_index, "index", part)] = little_endian(
-                numpy.asarray(layout.index)
-            )
+            key = key_format(node=str(key_index), attribute="index", partition=str(part))
+            container[key] = little_endian(numpy.asarray(layout.index))
+
             return ak.forms.IndexedOptionForm(
                 index_form(layout.index),
                 fill(layout.content, part),
                 has_identities,
                 parameters,
-                node_format(key_index),
+                form_key_format(node=str(key_index)),
             )
 
         elif isinstance(layout, ak.layout.ByteMaskedArray):
-            container[key(key_index, "mask", part)] = little_endian(
-                numpy.asarray(layout.mask)
-            )
+            key = key_format(node=str(key_index), attribute="mask", partition=str(part))
+            container[key] = little_endian(numpy.asarray(layout.mask))
+
             return ak.forms.ByteMaskedForm(
                 index_form(layout.mask),
                 fill(layout.content, part),
                 layout.valid_when,
                 has_identities,
                 parameters,
-                node_format(key_index),
+                form_key_format(node=str(key_index)),
             )
 
         elif isinstance(layout, ak.layout.BitMaskedArray):
-            container[key(key_index, "mask", part)] = little_endian(
-                numpy.asarray(layout.mask)
-            )
+            key = key_format(node=str(key_index), attribute="mask", partition=str(part))
+            container[key] = little_endian(numpy.asarray(layout.mask))
+
             return ak.forms.BitMaskedForm(
                 index_form(layout.mask),
                 fill(layout.content, part),
@@ -2985,7 +3160,7 @@ def fill(layout, part):
                 layout.lsb_order,
                 has_identities,
                 parameters,
-                node_format(key_index),
+                form_key_format(node=str(key_index)),
             )
 
         elif isinstance(layout, ak.layout.UnmaskedArray):
@@ -2993,26 +3168,26 @@ def fill(layout, part):
                 fill(layout.content, part),
                 has_identities,
                 parameters,
-                node_format(key_index),
+                form_key_format(node=str(key_index)),
             )
 
         elif isinstance(
             layout,
             (ak.layout.ListArray32, ak.layout.ListArrayU32, ak.layout.ListArray64),
         ):
-            container[key(key_index, "starts", part)] = little_endian(
-                numpy.asarray(layout.starts)
-            )
-            container[key(key_index, "stops", part)] = little_endian(
-                numpy.asarray(layout.stops)
-            )
+            key = key_format(node=str(key_index), attribute="starts", partition=str(part))
+            container[key] = little_endian(numpy.asarray(layout.starts))
+
+            key = key_format(node=str(key_index), attribute="stops", partition=str(part))
+            container[key] = little_endian(numpy.asarray(layout.stops))
+
             return ak.forms.ListForm(
                 index_form(layout.starts),
                 index_form(layout.stops),
                 fill(layout.content, part),
                 has_identities,
                 parameters,
-                node_format(key_index),
+                form_key_format(node=str(key_index)),
             )
 
         elif isinstance(
@@ -3023,20 +3198,22 @@ def fill(layout, part):
                 ak.layout.ListOffsetArray64,
             ),
         ):
-            container[key(key_index, "offsets", part)] = little_endian(
-                numpy.asarray(layout.offsets)
-            )
+            key = key_format(node=str(key_index), attribute="offsets", partition=str(part))
+            container[key] = little_endian(numpy.asarray(layout.offsets))
+
             return ak.forms.ListOffsetForm(
                 index_form(layout.offsets),
                 fill(layout.content, part),
                 has_identities,
                 parameters,
-                node_format(key_index),
+                form_key_format(node=str(key_index)),
             )
 
         elif isinstance(layout, ak.layout.NumpyArray):
             array = numpy.asarray(layout)
-            container[key(key_index, None, part)] = little_endian(array)
+            key = key_format(node=str(key_index), attribute="data", partition=str(part))
+            container[key] = little_endian(array)
+
             form = ak.forms.Form.from_numpy(array.dtype)
             return ak.forms.NumpyForm(
                 layout.shape[1:],
@@ -3044,7 +3221,7 @@ def fill(layout, part):
                 form.format,
                 has_identities,
                 parameters,
-                node_format(key_index),
+                form_key_format(node=str(key_index)),
             )
 
         elif isinstance(layout, ak.layout.RecordArray):
@@ -3057,8 +3234,9 @@ def fill(layout, part):
                 for k in layout.keys():
                     forms.append(fill(layout[k], part))
                     keys.append(k)
+
             return ak.forms.RecordForm(
-                forms, keys, has_identities, parameters, node_format(key_index),
+                forms, keys, has_identities, parameters, form_key_format(node=str(key_index)),
             )
 
         elif isinstance(layout, ak.layout.RegularArray):
@@ -3067,7 +3245,7 @@ def fill(layout, part):
                 layout.size,
                 has_identities,
                 parameters,
-                node_format(key_index),
+                form_key_format(node=str(key_index)),
             )
 
         elif isinstance(
@@ -3081,19 +3259,20 @@ def fill(layout, part):
             forms = []
             for x in layout.contents:
                 forms.append(fill(x, part))
-            container[key(key_index, "tags", part)] = little_endian(
-                numpy.asarray(layout.tags)
-            )
-            container[key(key_index, "index", part)] = little_endian(
-                numpy.asarray(layout.index)
-            )
+
+            key = key_format(node=str(key_index), attribute="tags", partition=str(part))
+            container[key] = little_endian(numpy.asarray(layout.tags))
+
+            key = key_format(node=str(key_index), attribute="index", partition=str(part))
+            container[key] = little_endian(numpy.asarray(layout.index))
+
             return ak.forms.UnionForm(
                 index_form(layout.tags),
                 index_form(layout.index),
                 forms,
                 has_identities,
                 parameters,
-                node_format(key_index),
+                form_key_format(node=str(key_index)),
             )
 
         elif isinstance(layout, ak.layout.VirtualArray):
@@ -3109,16 +3288,12 @@ def fill(layout, part):
     layout = to_layout(array, allow_record=False, allow_other=False)
 
     if isinstance(layout, ak.partition.PartitionedArray):
-        if partition is not None:
-            raise ValueError(
-                "array is partitioned; an explicit 'partition' should not be "
-                "assigned" + ak._util.exception_suffix(__file__)
-            )
         form = None
+        length = []
         for part, content in enumerate(layout.partitions):
             num_form_keys[0] = 0
 
-            f = fill(content, part)
+            f = fill(content, partition + part)
 
             if form is None:
                 form = f
@@ -3131,18 +3306,17 @@ def fill(layout, part):
 differs from the first Form:
 
     {2}""".format(
-                        part, f.tojson(True, False), form.tojson(True, False)
+                        partition + part, f.tojson(True, False), form.tojson(True, False)
                     )
                     + ak._util.exception_suffix(__file__)
                 )
-
-        num_partitions = len(layout.partitions)
+            length.append(len(content))
 
     else:
         form = fill(layout, partition)
-        num_partitions = None
+        length = len(layout)
 
-    return form, container, num_partitions
+    return form, length, container
 
 
 _index_form_to_dtype = _index_form_to_index = _form_to_layout_class = None
@@ -3197,7 +3371,7 @@ def _form_to_layout(
 
     if form.has_identities:
         raise NotImplementedError(
-            "ak.from_arrayset for an array with Identities"
+            "ak.from_buffers for an array with Identities"
             + ak._util.exception_suffix(__file__)
         )
     else:
@@ -3208,7 +3382,7 @@ def _form_to_layout(
     if isinstance(form, ak.forms.BitMaskedForm):
         raw_mask = (
             container[
-                _arrayset_key(
+                _buffers_key(
                     form.form_key, "mask", partition, prefix, sep, partition_first,
                 )
             ]
@@ -3244,7 +3418,7 @@ def _form_to_layout(
     elif isinstance(form, ak.forms.ByteMaskedForm):
         raw_mask = (
             container[
-                _arrayset_key(
+                _buffers_key(
                     form.form_key, "mask", partition, prefix, sep, partition_first,
                 )
             ]
@@ -3277,7 +3451,7 @@ def _form_to_layout(
     elif isinstance(form, ak.forms.IndexedForm):
         raw_index = (
             container[
-                _arrayset_key(
+                _buffers_key(
                     form.form_key, "index", partition, prefix, sep, partition_first,
                 )
             ]
@@ -3307,7 +3481,7 @@ def _form_to_layout(
     elif isinstance(form, ak.forms.IndexedOptionForm):
         raw_index = (
             container[
-                _arrayset_key(
+                _buffers_key(
                     form.form_key, "index", partition, prefix, sep, partition_first,
                 )
             ]
@@ -3337,7 +3511,7 @@ def _form_to_layout(
     elif isinstance(form, ak.forms.ListForm):
         raw_starts = (
             container[
-                _arrayset_key(
+                _buffers_key(
                     form.form_key, "starts", partition, prefix, sep, partition_first,
                 )
             ]
@@ -3349,7 +3523,7 @@ def _form_to_layout(
         )
         raw_stops = (
             container[
-                _arrayset_key(
+                _buffers_key(
                     form.form_key, "stops", partition, prefix, sep, partition_first,
                 )
             ]
@@ -3379,7 +3553,7 @@ def _form_to_layout(
     elif isinstance(form, ak.forms.ListOffsetForm):
         raw_offsets = (
             container[
-                _arrayset_key(
+                _buffers_key(
                     form.form_key, "offsets", partition, prefix, sep, partition_first,
                 )
             ]
@@ -3409,7 +3583,7 @@ def _form_to_layout(
     elif isinstance(form, ak.forms.NumpyForm):
         raw_array = (
             container[
-                _arrayset_key(
+                _buffers_key(
                     form.form_key, None, partition, prefix, sep, partition_first,
                 )
             ]
@@ -3476,7 +3650,7 @@ def _form_to_layout(
     elif isinstance(form, ak.forms.UnionForm):
         raw_tags = (
             container[
-                _arrayset_key(
+                _buffers_key(
                     form.form_key, "tags", partition, prefix, sep, partition_first,
                 )
             ]
@@ -3488,7 +3662,7 @@ def _form_to_layout(
         )
         raw_index = (
             container[
-                _arrayset_key(
+                _buffers_key(
                     form.form_key, "index", partition, prefix, sep, partition_first,
                 )
             ]
@@ -3550,7 +3724,7 @@ def _form_to_layout(
         generator = ak.layout.ArrayGenerator(
             _form_to_layout, args, form=form.form, length=length,
         )
-        node_cache_key = _arrayset_key(
+        node_cache_key = _buffers_key(
             form.form.form_key, "virtual", partition, prefix, sep, partition_first,
         )
         return ak.layout.VirtualArray(
@@ -3565,15 +3739,15 @@ def _form_to_layout(
         )
 
 
-_from_arrayset_key_number = 0
-_from_arrayset_key_lock = threading.Lock()
+_from_buffers_key_number = 0
+_from_buffers_key_lock = threading.Lock()
 
 
-def _from_arrayset_key():
-    global _from_arrayset_key_number
-    with _from_arrayset_key_lock:
-        out = _from_arrayset_key_number
-        _from_arrayset_key_number += 1
+def _from_buffers_key():
+    global _from_buffers_key_number
+    with _from_buffers_key_lock:
+        out = _from_buffers_key_number
+        _from_buffers_key_number += 1
     return out
 
 
@@ -3733,7 +3907,7 @@ def partition_format(x):
             lazy_cache = ak.layout.ArrayCache(hold_cache)
 
         if lazy_cache_key is None:
-            lazy_cache_key = "ak.from_arrayset:{0}".format(_from_arrayset_key())
+            lazy_cache_key = "ak.from_arrayset:{0}".format(_from_buffers_key())
 
     if num_partitions is None:
         args = (form, container, None, prefix, sep, partition_first)

From 7b0dd95ab3f153fecec1ee9c6fc89ad864f263b2 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Thu, 10 Dec 2020 18:33:03 -0600
Subject: [PATCH 2/8] Black and Flake8.

---
 src/awkward/_util.py                          |  4 +-
 src/awkward/operations/convert.py             | 72 ++++++++++++++-----
 tests/test_0107-assign-fields-to-records.py   | 12 +++-
 tests/test_0224-arrow-to-awkward.py           | 16 +++--
 .../test_0590-allow-regulararray-size-zero.py |  8 ++-
 5 files changed, 83 insertions(+), 29 deletions(-)

diff --git a/src/awkward/_util.py b/src/awkward/_util.py
index 9f883e0bb7..bf1ec4d56d 100644
--- a/src/awkward/_util.py
+++ b/src/awkward/_util.py
@@ -761,7 +761,9 @@ def apply(inputs, depth, user):
                 outcontent = apply(nextinputs, depth + 1, user)
                 assert isinstance(outcontent, tuple)
 
-                return tuple(ak.layout.RegularArray(x, maxsize, maxlen) for x in outcontent)
+                return tuple(
+                    ak.layout.RegularArray(x, maxsize, maxlen) for x in outcontent
+                )
 
             elif not all_same_offsets(nplike, inputs):
                 fcns = [
diff --git a/src/awkward/operations/convert.py b/src/awkward/operations/convert.py
index c7bc120acd..4e0f91d864 100644
--- a/src/awkward/operations/convert.py
+++ b/src/awkward/operations/convert.py
@@ -1122,7 +1122,9 @@ def recurse(array, level):
                         starts, stops, recurse(array.content, level + 1)
                     )
                 for i in range(len(array.starts.shape) - 1, 0, -1):
-                    out = ak.layout.RegularArray(out, array.starts.shape[i], array.starts.shape[i - 1])
+                    out = ak.layout.RegularArray(
+                        out, array.starts.shape[i], array.starts.shape[i - 1]
+                    )
                 return out
 
         elif isinstance(array, awkward0.Table):
@@ -1176,7 +1178,9 @@ def recurse(array, level):
                 )
 
             for i in range(len(array.tags.shape) - 1, 0, -1):
-                out = ak.layout.RegularArray(out, array.tags.shape[i], array.tags.shape[i - 1])
+                out = ak.layout.RegularArray(
+                    out, array.tags.shape[i], array.tags.shape[i - 1]
+                )
             return out
 
         elif isinstance(array, awkward0.MaskedArray):
@@ -1188,7 +1192,9 @@ def recurse(array, level):
                 valid_when=(not array.maskedwhen),
             )
             for i in range(len(array.mask.shape) - 1, 0, -1):
-                out = ak.layout.RegularArray(out, array.mask.shape[i], array.mask.shape[i - 1])
+                out = ak.layout.RegularArray(
+                    out, array.mask.shape[i], array.mask.shape[i - 1]
+                )
             return out
 
         elif isinstance(array, awkward0.BitMaskedArray):
@@ -1221,7 +1227,9 @@ def recurse(array, level):
                     index, recurse(array.content, level + 1)
                 )
             for i in range(len(array.index.shape) - 1, 0, -1):
-                out = ak.layout.RegularArray(out, array.index.shape[i], array.index.shape[i - 1])
+                out = ak.layout.RegularArray(
+                    out, array.index.shape[i], array.index.shape[i - 1]
+                )
             return out
 
         elif isinstance(array, awkward0.IndexedArray):
@@ -1239,7 +1247,9 @@ def recurse(array, level):
                 index = ak.layout.Index32(array.index.reshape(-1))
                 out = ak.layout.IndexedArray32(index, recurse(array.content, level + 1))
             for i in range(len(array.index.shape) - 1, 0, -1):
-                out = ak.layout.RegularArray(out, array.index.shape[i], array.index.shape[i - 1])
+                out = ak.layout.RegularArray(
+                    out, array.index.shape[i], array.index.shape[i - 1]
+                )
             return out
 
         elif isinstance(array, awkward0.SparseArray):
@@ -2894,13 +2904,17 @@ def key_format(**v):
             if v["attribute"] == "data":
                 return "{prefix}part{partition}{sep}node{node}".format(**v)
             else:
-                return "{prefix}part{partition}{sep}node{node}{sep}{attribute}".format(**v)
+                return "{prefix}part{partition}{sep}node{node}{sep}{attribute}".format(
+                    **v
+                )
 
         else:
             if v["attribute"] == "data":
                 return "{prefix}node{node}{sep}part{partition}".format(**v)
             else:
-                return "{prefix}node{node}{sep}{attribute}{sep}part{partition}".format(**v)
+                return "{prefix}node{node}{sep}{attribute}{sep}part{partition}".format(
+                    **v
+                )
 
     def form_key_format(**v):
         return "node{node}".format(**v)
@@ -3070,12 +3084,12 @@ def index_form(index):
             )
 
     if isinstance(key_format, str):
-        key_format = lambda **v: key_format.format(**v)
+        key_format = lambda **v: key_format.format(**v)  # noqa: E731
 
     if form_key_format is None:
-        form_key_format = lambda **v: None
+        form_key_format = lambda **v: None  # noqa: E731
     elif isinstance(form_key_format, str):
-        form_key_format = lambda **v: form_key_format.format(**v)
+        form_key_format = lambda **v: form_key_format.format(**v)  # noqa: E731
 
     num_form_keys = [0]
 
@@ -3111,7 +3125,9 @@ def fill(layout, part):
                 ak.layout.IndexedArray64,
             ),
         ):
-            key = key_format(node=str(key_index), attribute="index", partition=str(part))
+            key = key_format(
+                node=str(key_index), attribute="index", partition=str(part)
+            )
             container[key] = little_endian(numpy.asarray(layout.index))
 
             return ak.forms.IndexedForm(
@@ -3125,7 +3141,9 @@ def fill(layout, part):
         elif isinstance(
             layout, (ak.layout.IndexedOptionArray32, ak.layout.IndexedOptionArray64)
         ):
-            key = key_format(node=str(key_index), attribute="index", partition=str(part))
+            key = key_format(
+                node=str(key_index), attribute="index", partition=str(part)
+            )
             container[key] = little_endian(numpy.asarray(layout.index))
 
             return ak.forms.IndexedOptionForm(
@@ -3175,10 +3193,14 @@ def fill(layout, part):
             layout,
             (ak.layout.ListArray32, ak.layout.ListArrayU32, ak.layout.ListArray64),
         ):
-            key = key_format(node=str(key_index), attribute="starts", partition=str(part))
+            key = key_format(
+                node=str(key_index), attribute="starts", partition=str(part)
+            )
             container[key] = little_endian(numpy.asarray(layout.starts))
 
-            key = key_format(node=str(key_index), attribute="stops", partition=str(part))
+            key = key_format(
+                node=str(key_index), attribute="stops", partition=str(part)
+            )
             container[key] = little_endian(numpy.asarray(layout.stops))
 
             return ak.forms.ListForm(
@@ -3198,7 +3220,9 @@ def fill(layout, part):
                 ak.layout.ListOffsetArray64,
             ),
         ):
-            key = key_format(node=str(key_index), attribute="offsets", partition=str(part))
+            key = key_format(
+                node=str(key_index), attribute="offsets", partition=str(part)
+            )
             container[key] = little_endian(numpy.asarray(layout.offsets))
 
             return ak.forms.ListOffsetForm(
@@ -3236,7 +3260,11 @@ def fill(layout, part):
                     keys.append(k)
 
             return ak.forms.RecordForm(
-                forms, keys, has_identities, parameters, form_key_format(node=str(key_index)),
+                forms,
+                keys,
+                has_identities,
+                parameters,
+                form_key_format(node=str(key_index)),
             )
 
         elif isinstance(layout, ak.layout.RegularArray):
@@ -3263,7 +3291,9 @@ def fill(layout, part):
             key = key_format(node=str(key_index), attribute="tags", partition=str(part))
             container[key] = little_endian(numpy.asarray(layout.tags))
 
-            key = key_format(node=str(key_index), attribute="index", partition=str(part))
+            key = key_format(
+                node=str(key_index), attribute="index", partition=str(part)
+            )
             container[key] = little_endian(numpy.asarray(layout.index))
 
             return ak.forms.UnionForm(
@@ -3306,7 +3336,9 @@ def fill(layout, part):
 differs from the first Form:
 
     {2}""".format(
-                        partition + part, f.tojson(True, False), form.tojson(True, False)
+                        partition + part,
+                        f.tojson(True, False),
+                        form.tojson(True, False),
                     )
                     + ak._util.exception_suffix(__file__)
                 )
@@ -3645,7 +3677,9 @@ def _form_to_layout(
             length * form.size,
         )
 
-        return ak.layout.RegularArray(content, form.size, length, identities, parameters)
+        return ak.layout.RegularArray(
+            content, form.size, length, identities, parameters
+        )
 
     elif isinstance(form, ak.forms.UnionForm):
         raw_tags = (
diff --git a/tests/test_0107-assign-fields-to-records.py b/tests/test_0107-assign-fields-to-records.py
index cc565a1f08..9b9af413c3 100644
--- a/tests/test_0107-assign-fields-to-records.py
+++ b/tests/test_0107-assign-fields-to-records.py
@@ -102,10 +102,14 @@ def test_regulararray():
         np.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])
     )
     recordarray = ak.layout.RecordArray({"x": content})
-    regulararray = ak.Array(ak.layout.RegularArray(recordarray, 3, zeros_length=0), check_valid=True)
+    regulararray = ak.Array(
+        ak.layout.RegularArray(recordarray, 3, zeros_length=0), check_valid=True
+    )
 
     content2 = ak.layout.NumpyArray(np.array([100, 200, 300]))
-    regulararray2 = ak.Array(ak.layout.RegularArray(content2, 1, zeros_length=0), check_valid=True)
+    regulararray2 = ak.Array(
+        ak.layout.RegularArray(content2, 1, zeros_length=0), check_valid=True
+    )
     assert ak.to_list(ak.with_field(regulararray, regulararray2, "y")) == [
         [{"x": 0.0, "y": 100}, {"x": 1.1, "y": 100}, {"x": 2.2, "y": 100}],
         [{"x": 3.3, "y": 200}, {"x": 4.4, "y": 200}, {"x": 5.5, "y": 200}],
@@ -115,7 +119,9 @@ def test_regulararray():
     content2 = ak.layout.NumpyArray(
         np.array([100, 200, 300, 400, 500, 600, 700, 800, 900])
     )
-    regulararray2 = ak.Array(ak.layout.RegularArray(content2, 3, zeros_length=0), check_valid=True)
+    regulararray2 = ak.Array(
+        ak.layout.RegularArray(content2, 3, zeros_length=0), check_valid=True
+    )
     assert ak.to_list(ak.with_field(regulararray, regulararray2, "y")) == [
         [{"x": 0.0, "y": 100}, {"x": 1.1, "y": 200}, {"x": 2.2, "y": 300}],
         [{"x": 3.3, "y": 400}, {"x": 4.4, "y": 500}, {"x": 5.5, "y": 600}],
diff --git a/tests/test_0224-arrow-to-awkward.py b/tests/test_0224-arrow-to-awkward.py
index 693cdac495..fb36789d5a 100644
--- a/tests/test_0224-arrow-to-awkward.py
+++ b/tests/test_0224-arrow-to-awkward.py
@@ -1586,7 +1586,9 @@ def test_arrow_coverage100():
     a = ak.layout.IndexedOptionArray32(
         ak.layout.Index32(np.array([-1, 1, -1, 0, 0, -1], "i4")),
         ak.layout.RegularArray(
-            ak.layout.NumpyArray(np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])), 3, zeros_length=0
+            ak.layout.NumpyArray(np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])),
+            3,
+            zeros_length=0,
         ),
     )
     assert ak.to_arrow(a).to_pylist() == [
@@ -1601,7 +1603,9 @@ def test_arrow_coverage100():
     a = ak.layout.IndexedOptionArray32(
         ak.layout.Index32(np.array([-1, 1, -1, 0, 0, -1, 1, -1], "i4")),
         ak.layout.RegularArray(
-            ak.layout.NumpyArray(np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])), 3, zeros_length=0
+            ak.layout.NumpyArray(np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])),
+            3,
+            zeros_length=0,
         ),
     )
     assert ak.to_arrow(a).to_pylist() == [
@@ -1618,7 +1622,9 @@ def test_arrow_coverage100():
     a = ak.layout.IndexedOptionArray64(
         ak.layout.Index64(np.array([-1, 1, -1, 0, 0, -1, 1, -1], "i8")),
         ak.layout.RegularArray(
-            ak.layout.NumpyArray(np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])), 3, zeros_length=0
+            ak.layout.NumpyArray(np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])),
+            3,
+            zeros_length=0,
         ),
     )
     assert ak.to_arrow(a).to_pylist() == [
@@ -1637,7 +1643,9 @@ def test_arrow_coverage100():
         ak.layout.IndexedOptionArray32(
             ak.layout.Index32(np.array([-1, 1, -1, 0, 0, -1], "i4")),
             ak.layout.RegularArray(
-                ak.layout.NumpyArray(np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])), 3, zeros_length=0
+                ak.layout.NumpyArray(np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])),
+                3,
+                zeros_length=0,
             ),
         ),
         valid_when=True,
diff --git a/tests/test_0590-allow-regulararray-size-zero.py b/tests/test_0590-allow-regulararray-size-zero.py
index 6d55283141..bdb9dd6846 100644
--- a/tests/test_0590-allow-regulararray-size-zero.py
+++ b/tests/test_0590-allow-regulararray-size-zero.py
@@ -7,7 +7,9 @@
 import awkward as ak  # noqa: F401
 
 
-empty = ak.Array(ak.layout.RegularArray(ak.Array([[1, 2, 3], [], [4, 5]]).layout, 0, zeros_length=0))
+empty = ak.Array(
+    ak.layout.RegularArray(ak.Array([[1, 2, 3], [], [4, 5]]).layout, 0, zeros_length=0)
+)
 
 
 def test_ListOffsetArray_rpad_and_clip():
@@ -24,7 +26,9 @@ def test_toListOffsetArray64():
 
 def test_setidentities():
     empty2 = ak.Array(
-        ak.layout.RegularArray(ak.Array([[1, 2, 3], [], [4, 5]]).layout, 0, zeros_length=0)
+        ak.layout.RegularArray(
+            ak.Array([[1, 2, 3], [], [4, 5]]).layout, 0, zeros_length=0
+        )
     )
     empty2.layout.setidentities()
     assert np.asarray(empty2.layout.identities).tolist() == []

From ea616858e692c6f87a30068a4c793be7f14ec521 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Thu, 10 Dec 2020 21:18:03 -0600
Subject: [PATCH 3/8] Works again, without modifying to/from_arrayset.

---
 src/awkward/operations/convert.py | 1062 +++++++++++++++--------------
 tests/test_0348-form-keys.py      |    1 -
 tests/test_0384-lazy-arrayset.py  |   10 +-
 3 files changed, 550 insertions(+), 523 deletions(-)

diff --git a/src/awkward/operations/convert.py b/src/awkward/operations/convert.py
index 4e0f91d864..821d694fea 100644
--- a/src/awkward/operations/convert.py
+++ b/src/awkward/operations/convert.py
@@ -2704,243 +2704,12 @@ def from_parquet(
             return out
 
 
-def _buffers_key(
-    form_key, attribute, partition, prefix, sep, partition_first,
-):
-    if form_key is None:
-        raise ValueError(
-            "cannot ak.from_buffers using Forms without form_keys"
-            + ak._util.exception_suffix(__file__)
-        )
-    if attribute is None:
-        attribute = ""
-    else:
-        attribute = sep + attribute
-    if partition is None:
-        return "{0}{1}{2}".format(prefix, form_key, attribute,)
-    elif partition_first:
-        return "{0}{1}{2}{3}{4}".format(prefix, partition, sep, form_key, attribute,)
-    else:
-        return "{0}{1}{2}{3}{4}".format(prefix, form_key, attribute, sep, partition,)
-
-
-def to_arrayset(
-    array,
-    container=None,
-    partition=None,
-    prefix=None,
-    node_format="node{0}",
-    partition_format="part{0}",
-    sep="-",
-    partition_first=False,
-):
-    u"""
-    Args:
-        array: Data to decompose into an arrayset.
-        container (None or MutableMapping): The str \u2192 NumPy arrays (or
-            Python buffers) that represent the decomposed Awkward Array. This
-            `container` is only assumed to have a `__setitem__` method that
-            accepts strings as keys.
-        partition (None or non-negative int): If None and `array` is not
-            partitioned, keys written to the container have no reference to
-            partitioning; if an integer and `array` is not partitioned, keys
-            use this as their partition number; if `array` is partitioned, the
-            `partition` argument must be None and keys are written with the
-            array's own internal partition numbers.
-        prefix (None or str): If None, keys only contain node and partition
-            information; if a string, keys are all prepended by `prefix + sep`.
-        node_format (str or callable): Python format string or function
-            (returning str) of the node part of keys written to the container
-            and the `form_key` values in the output Form. Its only argument
-            (`{0}` in the format string) is the node number, unique within the
-            `array`.
-        partition_format (str or callable): Python format string or function
-            (returning str) of the partition part of keys written to the
-            container (if any). Its only argument (`{0}` in the format string)
-            is the partition number.
-        sep (str): Separates the prefix, node part, array attribute (e.g.
-            `"starts"`, `"stops"`, `"mask"`), and partition part of the
-            keys written to the container.
-        partition_first (bool): If True, the partition part appears immediately
-            after the prefix (if any); if False, the partition part appears
-            at the end of the keys. This can be relevant if the `container`
-            is sorted or lookup performance depends on alphabetical order.
-
-    **Deprecated:** this will be removed in `awkward>=1.1.0` after January 1,
-    2021. Use #ak.to_buffers instead: the return values have changed.
-
-    Decomposes an Awkward Array into a Form and a collection of arrays, so
-    that data can be losslessly written to file formats and storage devices
-    that only understand named arrays (or binary blobs).
-
-    This function returns a 3-tuple:
-
-        (form, container, num_partitions)
-
-    where the `form` is a #ak.forms.Form (which can be converted to JSON
-    with `tojson`), the `container` is either the MutableMapping you passed in
-    or a new dict containing the NumPy arrays, and `num_partitions` is None
-    if `array` was not partitioned or the number of partitions if it was.
-
-    These are also the first three arguments of #ak.from_arrayset, so a full
-    round-trip is
-
-        >>> reconstituted = ak.from_arrayset(*ak.to_arrayset(original))
-
-    The `container` argument lets you specify your own MutableMapping, which
-    might be an interface to some storage format or device (e.g. h5py). It's
-    okay if the `container` drops NumPy's `dtype` and `shape` information,
-    leaving raw bytes, since `dtype` and `shape` can be reconstituted from
-    the #ak.forms.NumpyForm.
-
-    The `partition` argument lets you fill the `container` one partition at a
-    time using unpartitioned arrays.
-
-    The rest of the arguments determine the format of the keys written to the
-    `container` (which might be restrictive if it represents a storage device).
-
-    Here is a simple example:
-
-        >>> original = ak.Array([[1, 2, 3], [], [4, 5]])
-        >>> form, container, num_partitions = ak.to_arrayset(original)
-        >>> form
-        {
-            "class": "ListOffsetArray64",
-            "offsets": "i64",
-            "content": {
-                "class": "NumpyArray",
-                "itemsize": 8,
-                "format": "l",
-                "primitive": "int64",
-                "form_key": "node1"
-            },
-            "form_key": "node0"
-        }
-        >>> container
-        {'node0-offsets': array([0, 3, 3, 5], dtype=int64),
-         'node1': array([1, 2, 3, 4, 5])}
-        >>> print(num_partitions)
-        None
-
-    which may be read back with
-
-        >>> ak.from_arrayset(form, container)
-        <Array [[1, 2, 3], [], [4, 5]] type='3 * var * int64'>
-
-    (the third argument of #ak.from_arrayset defaults to None).
-
-    Here is an example of building up a partitioned array:
-
-        >>> container = {}
-        >>> form, _, _ = ak.to_arrayset(ak.Array([[1, 2, 3], [], [4, 5]]), container, 0)
-        >>> form, _, _ = ak.to_arrayset(ak.Array([[6, 7, 8, 9]]), container, 1)
-        >>> form, _, _ = ak.to_arrayset(ak.Array([[], [], []]), container, 2)
-        >>> form, _, _ = ak.to_arrayset(ak.Array([[10]]), container, 3)
-        >>> form
-        {
-            "class": "ListOffsetArray64",
-            "offsets": "i64",
-            "content": {
-                "class": "NumpyArray",
-                "itemsize": 8,
-                "format": "l",
-                "primitive": "int64",
-                "form_key": "node1"
-            },
-            "form_key": "node0"
-        }
-        >>> container
-        {'node0-offsets-part0': array([0, 3, 3, 5], dtype=int64),
-         'node1-part0': array([1, 2, 3, 4, 5]),
-         'node0-offsets-part1': array([0, 4], dtype=int64),
-         'node1-part1': array([6, 7, 8, 9]),
-         'node0-offsets-part2': array([0, 0, 0, 0], dtype=int64),
-         'node1-part2': array([], dtype=float64),
-         'node0-offsets-part3': array([0, 1], dtype=int64),
-         'node1-part3': array([10])}
-
-    The object returned by #ak.from_arrayset is now a partitioned array:
-
-        >>> ak.from_arrayset(form, container, 4)
-        <Array [[1, 2, 3], [], [4, ... [], [], [10]] type='8 * var * int64'>
-        >>> ak.partitions(ak.from_arrayset(form, container, 4))
-        [3, 1, 3, 1]
-
-    See also #ak.from_arrayset.
-    """
-
-    layout = to_layout(array, allow_record=False, allow_other=False)
-
-    if isinstance(layout, ak.partition.PartitionedArray):
-        show_partition = True
-        if partition is not None:
-            raise ValueError(
-                "array is partitioned; an explicit 'partition' should not be "
-                "assigned" + ak._util.exception_suffix(__file__)
-            )
-    else:
-        if partition is None:
-            show_partition = False
-        else:
-            show_partition = True
-
-    if partition is None:
-        partition = 0
-
-    def key_format(**v):
-        v["sep"] = sep
-        if prefix is None:
-            v["prefix"] = ""
-        else:
-            v["prefix"] = prefix + sep
-
-        if not show_partition:
-            if v["attribute"] == "data":
-                return "{prefix}node{node}".format(**v)
-            else:
-                return "{prefix}node{node}{sep}{attribute}".format(**v)
-
-        elif partition_first:
-            if v["attribute"] == "data":
-                return "{prefix}part{partition}{sep}node{node}".format(**v)
-            else:
-                return "{prefix}part{partition}{sep}node{node}{sep}{attribute}".format(
-                    **v
-                )
-
-        else:
-            if v["attribute"] == "data":
-                return "{prefix}node{node}{sep}part{partition}".format(**v)
-            else:
-                return "{prefix}node{node}{sep}{attribute}{sep}part{partition}".format(
-                    **v
-                )
-
-    def form_key_format(**v):
-        return "node{node}".format(**v)
-
-    form, length, container = to_buffers(
-        layout,
-        container=container,
-        partition=partition,
-        key_format=key_format,
-        form_key_format=form_key_format,
-    )
-
-    if isinstance(length, (numbers.Integral, np.integer)):
-        num_partitions = None
-    else:
-        num_partitions = len(length)
-
-    return form, container, num_partitions
-
-
 def to_buffers(
     array,
     container=None,
-    partition=0,
-    key_format="part{partition}-node{node}-{attribute}",
-    form_key_format="node{node}",
+    partition_start=0,
+    form_key="node{id}",
+    key_format="part{partition}-{form_key}-{attribute}",
 ):
     u"""
     Args:
@@ -2949,18 +2718,21 @@ def to_buffers(
             Python buffers) that represent the decomposed Awkward Array. This
             `container` is only assumed to have a `__setitem__` method that
             accepts strings as keys.
-        partition (non-negative int): If `array` is not partitioned, this is
+        partition_start (non-negative int): If `array` is not partitioned, this is
             the partition number that will be used as part of the container
-            key. If `array` is partitioned, this will be added to the partition
-            numbers.
+            key. If `array` is partitioned, this is the first partition number.
+        form_key (str, callable): Python format string containing
+            `"{id}"` or a function that takes non-negative integer as a string
+            and the current `layout` as keyword arguments and returns a string,
+            for use as a `form_key` on each Form node and in `key_format` (below).
         key_format (str or callable): Python format string containing
-            `"{partition}"`, `"{node}"`, and/or `"{attribute}"` or a function
+            `"{partition}"`, `"{form_key}"`, and/or `"{attribute}"` or a function
             that takes these as keyword arguments and returns a string to use
-            as keys for buffers in the `container`.
-        form_key_format (str, callable, or None): Python format string containing
-            `"{node}"` or a function that takes this as a keyword argument and
-            returns a string to use as a `form_key` for each Form node. If None,
-            the Form nodes have no keys. (They are not required for reconstruction.)
+            as keys for buffers in the `container`. The `partition` is a
+            partition number (non-negative integer, passed as a string), the
+            `form_key` is the result of applying `form_key` (above), and the
+            `attribute` is a hard-coded string representing the buffer's function
+            (e.g. `"data"`, `"offsets"`, `"index"`).
 
     Decomposes an Awkward Array into a Form and a collection of Python buffers,
     so that data can be losslessly written to file formats and storage devices
@@ -2987,11 +2759,12 @@ def to_buffers(
     leaving raw bytes, since `dtype` and `shape` can be reconstituted from
     the #ak.forms.NumpyForm.
 
-    The `partition` argument lets you fill the `container` gradually or in parallel.
-    If the `array` is not partitioned, the `partition` argument sets its
-    partition number (for the container keys, through `key_format`).
-    If the `array` is partitioned, the `partition` argument is added to each
-    partition number.
+    The `partition_start` argument lets you fill the `container` gradually or
+    in parallel. If the `array` is not partitioned, the `partition_start`
+    argument sets its partition number (for the container keys, through
+    `key_format`). If the `array` is partitioned, the first partition is numbered
+    `partition_start` and as many are filled as ar in `array`. See #ak.partitions
+    to get the number of partitions in `array`.
 
     Here is a simple example:
 
@@ -3083,14 +2856,12 @@ def index_form(index):
                 + ak._util.exception_suffix(__file__)
             )
 
+    if isinstance(form_key, str):
+        form_key = lambda **v: form_key.format(**v)  # noqa: E731
+
     if isinstance(key_format, str):
         key_format = lambda **v: key_format.format(**v)  # noqa: E731
 
-    if form_key_format is None:
-        form_key_format = lambda **v: None  # noqa: E731
-    elif isinstance(form_key_format, str):
-        form_key_format = lambda **v: form_key_format.format(**v)  # noqa: E731
-
     num_form_keys = [0]
 
     def little_endian(array):
@@ -3109,13 +2880,10 @@ def fill(layout, part):
             )
 
         if isinstance(layout, ak.layout.EmptyArray):
-            array = numpy.asarray(layout)
-            key = key_format(node=str(key_index), attribute="data", partition=str(part))
-            container[key] = little_endian(array)
-
-            return ak.forms.EmptyForm(
-                has_identities, parameters, form_key_format(node=str(key_index))
-            )
+            fk = form_key(id=str(key_index))
+            key = key_format(form_key=fk, attribute="data", partition=str(part))
+            container[key] = little_endian(numpy.asarray(layout))
+            return ak.forms.EmptyForm(has_identities, parameters, fk)
 
         elif isinstance(
             layout,
@@ -3125,52 +2893,48 @@ def fill(layout, part):
                 ak.layout.IndexedArray64,
             ),
         ):
-            key = key_format(
-                node=str(key_index), attribute="index", partition=str(part)
-            )
+            fk = form_key(id=str(key_index), layout=layout)
+            key = key_format(form_key=fk, attribute="index", partition=str(part))
             container[key] = little_endian(numpy.asarray(layout.index))
-
             return ak.forms.IndexedForm(
                 index_form(layout.index),
                 fill(layout.content, part),
                 has_identities,
                 parameters,
-                form_key_format(node=str(key_index)),
+                fk,
             )
 
         elif isinstance(
             layout, (ak.layout.IndexedOptionArray32, ak.layout.IndexedOptionArray64)
         ):
-            key = key_format(
-                node=str(key_index), attribute="index", partition=str(part)
-            )
+            fk = form_key(id=str(key_index), layout=layout)
+            key = key_format(form_key=fk, attribute="index", partition=str(part))
             container[key] = little_endian(numpy.asarray(layout.index))
-
             return ak.forms.IndexedOptionForm(
                 index_form(layout.index),
                 fill(layout.content, part),
                 has_identities,
                 parameters,
-                form_key_format(node=str(key_index)),
+                fk,
             )
 
         elif isinstance(layout, ak.layout.ByteMaskedArray):
-            key = key_format(node=str(key_index), attribute="mask", partition=str(part))
+            fk = form_key(id=str(key_index), layout=layout)
+            key = key_format(form_key=fk, attribute="mask", partition=str(part))
             container[key] = little_endian(numpy.asarray(layout.mask))
-
             return ak.forms.ByteMaskedForm(
                 index_form(layout.mask),
                 fill(layout.content, part),
                 layout.valid_when,
                 has_identities,
                 parameters,
-                form_key_format(node=str(key_index)),
+                fk,
             )
 
         elif isinstance(layout, ak.layout.BitMaskedArray):
-            key = key_format(node=str(key_index), attribute="mask", partition=str(part))
+            fk = form_key(id=str(key_index), layout=layout)
+            key = key_format(form_key=fk, attribute="mask", partition=str(part))
             container[key] = little_endian(numpy.asarray(layout.mask))
-
             return ak.forms.BitMaskedForm(
                 index_form(layout.mask),
                 fill(layout.content, part),
@@ -3178,7 +2942,7 @@ def fill(layout, part):
                 layout.lsb_order,
                 has_identities,
                 parameters,
-                form_key_format(node=str(key_index)),
+                fk,
             )
 
         elif isinstance(layout, ak.layout.UnmaskedArray):
@@ -3186,30 +2950,25 @@ def fill(layout, part):
                 fill(layout.content, part),
                 has_identities,
                 parameters,
-                form_key_format(node=str(key_index)),
+                form_key(id=str(key_index), layout=layout),
             )
 
         elif isinstance(
             layout,
             (ak.layout.ListArray32, ak.layout.ListArrayU32, ak.layout.ListArray64),
         ):
-            key = key_format(
-                node=str(key_index), attribute="starts", partition=str(part)
-            )
+            fk = form_key(id=str(key_index), layout=layout)
+            key = key_format(form_key=fk, attribute="starts", partition=str(part))
             container[key] = little_endian(numpy.asarray(layout.starts))
-
-            key = key_format(
-                node=str(key_index), attribute="stops", partition=str(part)
-            )
+            key = key_format(form_key=fk, attribute="stops", partition=str(part))
             container[key] = little_endian(numpy.asarray(layout.stops))
-
             return ak.forms.ListForm(
                 index_form(layout.starts),
                 index_form(layout.stops),
                 fill(layout.content, part),
                 has_identities,
                 parameters,
-                form_key_format(node=str(key_index)),
+                fk,
             )
 
         elif isinstance(
@@ -3220,24 +2979,24 @@ def fill(layout, part):
                 ak.layout.ListOffsetArray64,
             ),
         ):
+            fk = form_key(id=str(key_index), layout=layout)
             key = key_format(
-                node=str(key_index), attribute="offsets", partition=str(part)
+                form_key=fk, attribute="offsets", partition=str(part)
             )
             container[key] = little_endian(numpy.asarray(layout.offsets))
-
             return ak.forms.ListOffsetForm(
                 index_form(layout.offsets),
                 fill(layout.content, part),
                 has_identities,
                 parameters,
-                form_key_format(node=str(key_index)),
+                fk,
             )
 
         elif isinstance(layout, ak.layout.NumpyArray):
+            fk = form_key(id=str(key_index), layout=layout)
+            key = key_format(form_key=fk, attribute="data", partition=str(part))
             array = numpy.asarray(layout)
-            key = key_format(node=str(key_index), attribute="data", partition=str(part))
             container[key] = little_endian(array)
-
             form = ak.forms.Form.from_numpy(array.dtype)
             return ak.forms.NumpyForm(
                 layout.shape[1:],
@@ -3245,7 +3004,7 @@ def fill(layout, part):
                 form.format,
                 has_identities,
                 parameters,
-                form_key_format(node=str(key_index)),
+                fk,
             )
 
         elif isinstance(layout, ak.layout.RecordArray):
@@ -3264,7 +3023,7 @@ def fill(layout, part):
                 keys,
                 has_identities,
                 parameters,
-                form_key_format(node=str(key_index)),
+                form_key(id=str(key_index), layout=layout),
             )
 
         elif isinstance(layout, ak.layout.RegularArray):
@@ -3273,7 +3032,7 @@ def fill(layout, part):
                 layout.size,
                 has_identities,
                 parameters,
-                form_key_format(node=str(key_index)),
+                form_key(id=str(key_index), layout=layout),
             )
 
         elif isinstance(
@@ -3288,21 +3047,18 @@ def fill(layout, part):
             for x in layout.contents:
                 forms.append(fill(x, part))
 
-            key = key_format(node=str(key_index), attribute="tags", partition=str(part))
+            fk = form_key(id=str(key_index), layout=layout)
+            key = key_format(form_key=fk, attribute="tags", partition=str(part))
             container[key] = little_endian(numpy.asarray(layout.tags))
-
-            key = key_format(
-                node=str(key_index), attribute="index", partition=str(part)
-            )
+            key = key_format(form_key=fk, attribute="index", partition=str(part))
             container[key] = little_endian(numpy.asarray(layout.index))
-
             return ak.forms.UnionForm(
                 index_form(layout.tags),
                 index_form(layout.index),
                 forms,
                 has_identities,
                 parameters,
-                form_key_format(node=str(key_index)),
+                fk,
             )
 
         elif isinstance(layout, ak.layout.VirtualArray):
@@ -3323,7 +3079,7 @@ def fill(layout, part):
         for part, content in enumerate(layout.partitions):
             num_form_keys[0] = 0
 
-            f = fill(content, partition + part)
+            f = fill(content, partition_start + part)
 
             if form is None:
                 form = f
@@ -3336,7 +3092,7 @@ def fill(layout, part):
 differs from the first Form:
 
     {2}""".format(
-                        partition + part,
+                        partition_start + part,
                         f.tojson(True, False),
                         form.tojson(True, False),
                     )
@@ -3345,7 +3101,7 @@ def fill(layout, part):
             length.append(len(content))
 
     else:
-        form = fill(layout, partition)
+        form = fill(layout, partition_start)
         length = len(layout)
 
     return form, length, container
@@ -3357,13 +3113,11 @@ def fill(layout, part):
 def _form_to_layout(
     form,
     container,
-    partition,
-    prefix,
-    sep,
-    partition_first,
-    cache=None,
-    cache_key=None,
-    length=None,
+    partnum,
+    key_format,
+    length,
+    lazy_cache,
+    lazy_cache_key,
 ):
     global _index_form_to_dtype, _index_form_to_index, _form_to_layout_class
 
@@ -3410,14 +3164,11 @@ def _form_to_layout(
         identities = None
 
     parameters = form.parameters
+    fk = form.form_key
 
     if isinstance(form, ak.forms.BitMaskedForm):
         raw_mask = (
-            container[
-                _buffers_key(
-                    form.form_key, "mask", partition, prefix, sep, partition_first,
-                )
-            ]
+            container[key_format(form_key=fk, attribute="mask", partition=partnum)]
             .reshape(-1)
             .view("u1")
         )
@@ -3428,13 +3179,11 @@ def _form_to_layout(
         content = _form_to_layout(
             form.content,
             container,
-            partition,
-            prefix,
-            sep,
-            partition_first,
-            cache,
-            cache_key,
+            partnum,
+            key_format,
             len(mask),
+            lazy_cache,
+            lazy_cache_key,
         )
 
         return ak.layout.BitMaskedArray(
@@ -3449,11 +3198,7 @@ def _form_to_layout(
 
     elif isinstance(form, ak.forms.ByteMaskedForm):
         raw_mask = (
-            container[
-                _buffers_key(
-                    form.form_key, "mask", partition, prefix, sep, partition_first,
-                )
-            ]
+            container[key_format(form_key=fk, attribute="mask", partition=partnum)]
             .reshape(-1)
             .view("u1")
         )
@@ -3464,13 +3209,11 @@ def _form_to_layout(
         content = _form_to_layout(
             form.content,
             container,
-            partition,
-            prefix,
-            sep,
-            partition_first,
-            cache,
-            cache_key,
+            partnum,
+            key_format,
             len(mask),
+            lazy_cache,
+            lazy_cache_key,
         )
 
         return ak.layout.ByteMaskedArray(
@@ -3482,11 +3225,7 @@ def _form_to_layout(
 
     elif isinstance(form, ak.forms.IndexedForm):
         raw_index = (
-            container[
-                _buffers_key(
-                    form.form_key, "index", partition, prefix, sep, partition_first,
-                )
-            ]
+            container[key_format(form_key=fk, attribute="index", partition=partnum)]
             .reshape(-1)
             .view("u1")
         )
@@ -3497,13 +3236,11 @@ def _form_to_layout(
         content = _form_to_layout(
             form.content,
             container,
-            partition,
-            prefix,
-            sep,
-            partition_first,
-            cache,
-            cache_key,
-            numpy.max(index) + 1,
+            partnum,
+            key_format,
+            0 if len(index) == 0 else numpy.max(index) + 1,
+            lazy_cache,
+            lazy_cache_key,
         )
 
         return _form_to_layout_class[type(form), form.index](
@@ -3512,11 +3249,7 @@ def _form_to_layout(
 
     elif isinstance(form, ak.forms.IndexedOptionForm):
         raw_index = (
-            container[
-                _buffers_key(
-                    form.form_key, "index", partition, prefix, sep, partition_first,
-                )
-            ]
+            container[key_format(form_key=fk, attribute="index", partition=partnum)]
             .reshape(-1)
             .view("u1")
         )
@@ -3527,13 +3260,11 @@ def _form_to_layout(
         content = _form_to_layout(
             form.content,
             container,
-            partition,
-            prefix,
-            sep,
-            partition_first,
-            cache,
-            cache_key,
-            numpy.max(index) + 1,
+            partnum,
+            key_format,
+            0 if len(index) == 0 else max(0, numpy.max(index) + 1),
+            lazy_cache,
+            lazy_cache_key,
         )
 
         return _form_to_layout_class[type(form), form.index](
@@ -3542,11 +3273,7 @@ def _form_to_layout(
 
     elif isinstance(form, ak.forms.ListForm):
         raw_starts = (
-            container[
-                _buffers_key(
-                    form.form_key, "starts", partition, prefix, sep, partition_first,
-                )
-            ]
+            container[key_format(form_key=fk, attribute="starts", partition=partnum)]
             .reshape(-1)
             .view("u1")
         )
@@ -3554,11 +3281,7 @@ def _form_to_layout(
             raw_starts.view(_index_form_to_dtype[form.starts])
         )
         raw_stops = (
-            container[
-                _buffers_key(
-                    form.form_key, "stops", partition, prefix, sep, partition_first,
-                )
-            ]
+            container[key_format(form_key=fk, attribute="stops", partition=partnum)]
             .reshape(-1)
             .view("u1")
         )
@@ -3566,16 +3289,17 @@ def _form_to_layout(
             raw_stops.view(_index_form_to_dtype[form.stops])
         )
 
+        array_starts = numpy.asarray(starts)
+        array_stops = numpy.asarray(stops)[:len(array_starts)]
+        array_stops = array_stops[array_starts != array_stops]
         content = _form_to_layout(
             form.content,
             container,
-            partition,
-            prefix,
-            sep,
-            partition_first,
-            cache,
-            cache_key,
-            stops[-1],
+            partnum,
+            key_format,
+            0 if len(array_stops) == 0 else numpy.max(array_stops),
+            lazy_cache,
+            lazy_cache_key,
         )
 
         return _form_to_layout_class[type(form), form.starts](
@@ -3584,11 +3308,7 @@ def _form_to_layout(
 
     elif isinstance(form, ak.forms.ListOffsetForm):
         raw_offsets = (
-            container[
-                _buffers_key(
-                    form.form_key, "offsets", partition, prefix, sep, partition_first,
-                )
-            ]
+            container[key_format(form_key=fk, attribute="offsets", partition=partnum)]
             .reshape(-1)
             .view("u1")
         )
@@ -3599,13 +3319,11 @@ def _form_to_layout(
         content = _form_to_layout(
             form.content,
             container,
-            partition,
-            prefix,
-            sep,
-            partition_first,
-            cache,
-            cache_key,
+            partnum,
+            key_format,
             offsets[-1],
+            lazy_cache,
+            lazy_cache_key,
         )
 
         return _form_to_layout_class[type(form), form.offsets](
@@ -3614,11 +3332,7 @@ def _form_to_layout(
 
     elif isinstance(form, ak.forms.NumpyForm):
         raw_array = (
-            container[
-                _buffers_key(
-                    form.form_key, None, partition, prefix, sep, partition_first,
-                )
-            ]
+            container[key_format(form_key=fk, attribute="data", partition=partnum)]
             .reshape(-1)
             .view("u1")
         )
@@ -3646,13 +3360,11 @@ def _form_to_layout(
             content = _form_to_layout(
                 content_form,
                 container,
-                partition,
-                prefix,
-                sep,
-                partition_first,
-                cache,
-                cache_key,
+                partnum,
+                key_format,
                 length,
+                lazy_cache,
+                lazy_cache_key,
             )
             if minlength is None:
                 minlength = len(content)
@@ -3665,16 +3377,17 @@ def _form_to_layout(
         )
 
     elif isinstance(form, ak.forms.RegularForm):
+        if length is None:
+            length = 0
+
         content = _form_to_layout(
             form.content,
             container,
-            partition,
-            prefix,
-            sep,
-            partition_first,
-            cache,
-            cache_key,
+            partnum,
+            key_format,
             length * form.size,
+            lazy_cache,
+            lazy_cache_key,
         )
 
         return ak.layout.RegularArray(
@@ -3683,11 +3396,7 @@ def _form_to_layout(
 
     elif isinstance(form, ak.forms.UnionForm):
         raw_tags = (
-            container[
-                _buffers_key(
-                    form.form_key, "tags", partition, prefix, sep, partition_first,
-                )
-            ]
+            container[key_format(form_key=fk, attribute="tags", partition=partnum)]
             .reshape(-1)
             .view("u1")
         )
@@ -3695,11 +3404,7 @@ def _form_to_layout(
             raw_tags.view(_index_form_to_dtype[form.tags])
         )
         raw_index = (
-            container[
-                _buffers_key(
-                    form.form_key, "index", partition, prefix, sep, partition_first,
-                )
-            ]
+            container[key_format(form_key=fk, attribute="index", partition=partnum)]
             .reshape(-1)
             .view("u1")
         )
@@ -3708,19 +3413,17 @@ def _form_to_layout(
         )
 
         contents = []
-        for i, x in enumerate(form.contents):
-            applicable_indices = numpy.array(index)[numpy.equal(tags, i)]
+        for i, content_form in enumerate(form.contents):
+            mine = numpy.array(index)[numpy.equal(tags, i)]
             contents.append(
                 _form_to_layout(
-                    x,
+                    content_form,
                     container,
-                    partition,
-                    prefix,
-                    sep,
-                    partition_first,
-                    cache,
-                    cache_key,
-                    numpy.max(applicable_indices) + 1,
+                    partnum,
+                    key_format,
+                    0 if len(mine) == 0 else numpy.max(mine) + 1,
+                    lazy_cache,
+                    lazy_cache_key,
                 )
             )
 
@@ -3732,13 +3435,11 @@ def _form_to_layout(
         content = _form_to_layout(
             form.content,
             container,
-            partition,
-            prefix,
-            sep,
-            partition_first,
-            cache,
-            cache_key,
+            partnum,
+            key_format,
             length,
+            lazy_cache,
+            lazy_cache_key,
         )
 
         return ak.layout.UnmaskedArray(content, identities, parameters)
@@ -3747,22 +3448,18 @@ def _form_to_layout(
         args = (
             form.form,
             container,
-            partition,
-            prefix,
-            sep,
-            partition_first,
-            cache,
-            cache_key,
+            partnum,
+            key_format,
             length,
+            lazy_cache,
+            lazy_cache_key,
         )
         generator = ak.layout.ArrayGenerator(
             _form_to_layout, args, form=form.form, length=length,
         )
-        node_cache_key = _buffers_key(
-            form.form.form_key, "virtual", partition, prefix, sep, partition_first,
-        )
+        node_cache_key = key_format(form_key=form.form.form_key, attribute="virtual", partition=partnum)
         return ak.layout.VirtualArray(
-            generator, cache, cache_key + sep + node_cache_key
+            generator, lazy_cache, "{0}({1})".format(lazy_cache_key, node_cache_key)
         )
 
     else:
@@ -3809,6 +3506,354 @@ def modify(form):
     return ak.forms.Form.fromjson(json.dumps(form))
 
 
+def from_buffers(
+    form,
+    length,
+    container,
+    partition_start=0,
+    key_format="part{partition}-{form_key}-{attribute}",
+    lazy=False,
+    lazy_cache="new",
+    lazy_cache_key=None,
+    highlevel=True,
+    behavior=None,
+):
+    u"""
+    Args:
+        form (#ak.forms.Form or str/dict equivalent): The form of the Awkward
+            Array to reconstitute from named buffers.
+        length (int or iterable of int): Length of the array to reconstitute as a
+            non-partitioned array or the lengths (plural) of partitions in a
+            partitioned array.
+        container (Mapping, such as dict): The str \u2192 Python buffers that
+            represent the decomposed Awkward Array. This `container` is only
+            assumed to have a `__getitem__` method that accepts strings as keys.
+        partition_start (int): First (or only) partition number to get from the
+            `container`.
+        key_format (str or callable): Python format string containing
+            `"{partition}"`, `"{form_key}"`, and/or `"{attribute}"` or a function
+            that takes these as keyword arguments and returns a string to use
+            as keys for buffers in the `container`. The `partition` is a
+            partition number (non-negative integer, passed as a string), the
+            `form_key` is a string associated with each node in the Form, and the
+            `attribute` is a hard-coded string representing the buffer's function
+            (e.g. `"data"`, `"offsets"`, `"index"`).
+        lazy (bool): If True, read the array or its partitions on demand (as
+            #ak.layout.VirtualArray, possibly in #ak.partition.PartitionedArray
+            if `num_partitions` is not None); if False, read all requested data
+            immediately. Any RecordArray child nodes will additionally be
+            read on demand.
+        lazy_cache (None, "new", or MutableMapping): If lazy, pass this
+            cache to the VirtualArrays. If "new", a new dict (keep-forever cache)
+            is created. If None, no cache is used.
+        lazy_cache_key (None or str): If lazy, pass this cache_key to the
+            VirtualArrays. If None, a process-unique string is constructed.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.layout.Content subclass.
+        behavior (bool): Custom #ak.behavior for the output array, if
+            high-level.
+    """
+
+    if isinstance(form, str) or (ak._util.py27 and isinstance(form, ak._util.unicode)):
+        form = ak.forms.Form.fromjson(form)
+    elif isinstance(form, dict):
+        form = ak.forms.Form.fromjson(json.dumps(form))
+
+    if isinstance(key_format, str):
+        key_format = lambda **v: key_format.format(**v)  # noqa: E731
+
+    hold_cache = None
+    if lazy:
+        form = _wrap_record_with_virtual(form)
+
+        if lazy_cache == "new":
+            hold_cache = ak._util.MappingProxy({})
+            lazy_cache = ak.layout.ArrayCache(hold_cache)
+        elif lazy_cache is not None and not isinstance(
+            lazy_cache, ak.layout.ArrayCache
+        ):
+            hold_cache = ak._util.MappingProxy.maybe_wrap(lazy_cache)
+            if not isinstance(hold_cache, MutableMapping):
+                raise TypeError("lazy_cache must be a MutableMapping")
+            lazy_cache = ak.layout.ArrayCache(hold_cache)
+
+        if lazy_cache_key is None:
+            lazy_cache_key = "ak.from_buffers:{0}".format(_from_buffers_key())
+
+    if length is None or isinstance(length, (numbers.Integral, np.integer)):
+        if length is None:
+            print("FIXME: remember to deprecate")
+
+        args = (form, container, str(partition_start), key_format, length)
+
+        if lazy:
+            generator = ak.layout.ArrayGenerator(
+                _form_to_layout,
+                args + (lazy_cache, lazy_cache_key),
+                form=form,
+                length=length,
+            )
+            out = ak.layout.VirtualArray(generator, lazy_cache, lazy_cache_key)
+
+        else:
+            out = _form_to_layout(*(args + (None, None)))
+
+    elif isinstance(length, Iterable):
+        partitions = []
+        offsets = [0]
+
+        for part, partlen in enumerate(length):
+            partnum = str(partition_start + part)
+            args = (form, container, partnum, key_format)
+
+            if lazy:
+                lazy_cache_key_part = "{0}[{1}]".format(lazy_cache_key, partnum)
+                generator = ak.layout.ArrayGenerator(
+                    _form_to_layout,
+                    args + (partlen, lazy_cache, lazy_cache_key_part),
+                    form=form,
+                    length=length[part],
+                )
+
+                partitions.append(
+                    ak.layout.VirtualArray(generator, lazy_cache, lazy_cache_key_part)
+                )
+                offsets.append(offsets[-1] + length[part])
+
+            else:
+                partitions.append(_form_to_layout(*(args + (partlen, None, None))))
+                offsets.append(offsets[-1] + len(partitions[-1]))
+
+        out = ak.partition.IrregularlyPartitionedArray(partitions, offsets[1:])
+
+    else:
+        raise TypeError(
+            "length must be an integer or an iterable of integers, not "
+            + repr(length)
+            + ak._util.exception_suffix(__file__)
+        )
+
+    if highlevel:
+        return ak._util.wrap(out, behavior)
+    else:
+        return out
+
+
+def to_arrayset(
+    array,
+    container=None,
+    partition=None,
+    prefix=None,
+    node_format="node{0}",
+    partition_format="part{0}",
+    sep="-",
+    partition_first=False,
+):
+    u"""
+    Args:
+        array: Data to decompose into an arrayset.
+        container (None or MutableMapping): The str \u2192 NumPy arrays (or
+            Python buffers) that represent the decomposed Awkward Array. This
+            `container` is only assumed to have a `__setitem__` method that
+            accepts strings as keys.
+        partition (None or non-negative int): If None and `array` is not
+            partitioned, keys written to the container have no reference to
+            partitioning; if an integer and `array` is not partitioned, keys
+            use this as their partition number; if `array` is partitioned, the
+            `partition` argument must be None and keys are written with the
+            array's own internal partition numbers.
+        prefix (None or str): If None, keys only contain node and partition
+            information; if a string, keys are all prepended by `prefix + sep`.
+        node_format (str or callable): Python format string or function
+            (returning str) of the node part of keys written to the container
+            and the `form_key` values in the output Form. Its only argument
+            (`{0}` in the format string) is the node number, unique within the
+            `array`.
+        partition_format (str or callable): Python format string or function
+            (returning str) of the partition part of keys written to the
+            container (if any). Its only argument (`{0}` in the format string)
+            is the partition number.
+        sep (str): Separates the prefix, node part, array attribute (e.g.
+            `"starts"`, `"stops"`, `"mask"`), and partition part of the
+            keys written to the container.
+        partition_first (bool): If True, the partition part appears immediately
+            after the prefix (if any); if False, the partition part appears
+            at the end of the keys. This can be relevant if the `container`
+            is sorted or lookup performance depends on alphabetical order.
+
+    **Deprecated:** this will be removed in `awkward>=1.1.0` after January 1,
+    2021. Use #ak.to_buffers instead: the arguments and return values have changed.
+
+    Decomposes an Awkward Array into a Form and a collection of arrays, so
+    that data can be losslessly written to file formats and storage devices
+    that only understand named arrays (or binary blobs).
+
+    This function returns a 3-tuple:
+
+        (form, container, num_partitions)
+
+    where the `form` is a #ak.forms.Form (which can be converted to JSON
+    with `tojson`), the `container` is either the MutableMapping you passed in
+    or a new dict containing the NumPy arrays, and `num_partitions` is None
+    if `array` was not partitioned or the number of partitions if it was.
+
+    These are also the first three arguments of #ak.from_arrayset, so a full
+    round-trip is
+
+        >>> reconstituted = ak.from_arrayset(*ak.to_arrayset(original))
+
+    The `container` argument lets you specify your own MutableMapping, which
+    might be an interface to some storage format or device (e.g. h5py). It's
+    okay if the `container` drops NumPy's `dtype` and `shape` information,
+    leaving raw bytes, since `dtype` and `shape` can be reconstituted from
+    the #ak.forms.NumpyForm.
+
+    The `partition` argument lets you fill the `container` one partition at a
+    time using unpartitioned arrays.
+
+    The rest of the arguments determine the format of the keys written to the
+    `container` (which might be restrictive if it represents a storage device).
+
+    Here is a simple example:
+
+        >>> original = ak.Array([[1, 2, 3], [], [4, 5]])
+        >>> form, container, num_partitions = ak.to_arrayset(original)
+        >>> form
+        {
+            "class": "ListOffsetArray64",
+            "offsets": "i64",
+            "content": {
+                "class": "NumpyArray",
+                "itemsize": 8,
+                "format": "l",
+                "primitive": "int64",
+                "form_key": "node1"
+            },
+            "form_key": "node0"
+        }
+        >>> container
+        {'node0-offsets': array([0, 3, 3, 5], dtype=int64),
+         'node1': array([1, 2, 3, 4, 5])}
+        >>> print(num_partitions)
+        None
+
+    which may be read back with
+
+        >>> ak.from_arrayset(form, container)
+        <Array [[1, 2, 3], [], [4, 5]] type='3 * var * int64'>
+
+    (the third argument of #ak.from_arrayset defaults to None).
+
+    Here is an example of building up a partitioned array:
+
+        >>> container = {}
+        >>> form, _, _ = ak.to_arrayset(ak.Array([[1, 2, 3], [], [4, 5]]), container, 0)
+        >>> form, _, _ = ak.to_arrayset(ak.Array([[6, 7, 8, 9]]), container, 1)
+        >>> form, _, _ = ak.to_arrayset(ak.Array([[], [], []]), container, 2)
+        >>> form, _, _ = ak.to_arrayset(ak.Array([[10]]), container, 3)
+        >>> form
+        {
+            "class": "ListOffsetArray64",
+            "offsets": "i64",
+            "content": {
+                "class": "NumpyArray",
+                "itemsize": 8,
+                "format": "l",
+                "primitive": "int64",
+                "form_key": "node1"
+            },
+            "form_key": "node0"
+        }
+        >>> container
+        {'node0-offsets-part0': array([0, 3, 3, 5], dtype=int64),
+         'node1-part0': array([1, 2, 3, 4, 5]),
+         'node0-offsets-part1': array([0, 4], dtype=int64),
+         'node1-part1': array([6, 7, 8, 9]),
+         'node0-offsets-part2': array([0, 0, 0, 0], dtype=int64),
+         'node1-part2': array([], dtype=float64),
+         'node0-offsets-part3': array([0, 1], dtype=int64),
+         'node1-part3': array([10])}
+
+    The object returned by #ak.from_arrayset is now a partitioned array:
+
+        >>> ak.from_arrayset(form, container, 4)
+        <Array [[1, 2, 3], [], [4, ... [], [], [10]] type='8 * var * int64'>
+        >>> ak.partitions(ak.from_arrayset(form, container, 4))
+        [3, 1, 3, 1]
+
+    See also #ak.from_arrayset.
+    """
+
+    print("FIXME: remember to deprecate")
+
+    layout = to_layout(array, allow_record=False, allow_other=False)
+
+    if isinstance(layout, ak.partition.PartitionedArray):
+        show_partition = True
+        if partition is not None:
+            raise ValueError(
+                "array is partitioned; an explicit 'partition' should not be "
+                "assigned" + ak._util.exception_suffix(__file__)
+            )
+    else:
+        if partition is None:
+            show_partition = False
+        else:
+            show_partition = True
+
+    if partition is None:
+        partition_start = 0
+    else:
+        partition_start = partition
+
+    def form_key(**v):
+        return "node{id}".format(**v)
+
+    def key_format(**v):
+        v["sep"] = sep
+        if prefix is None:
+            v["prefix"] = ""
+        else:
+            v["prefix"] = prefix + sep
+
+        if not show_partition:
+            if v["attribute"] == "data":
+                return "{prefix}{form_key}".format(**v)
+            else:
+                return "{prefix}{form_key}{sep}{attribute}".format(**v)
+
+        elif partition_first:
+            if v["attribute"] == "data":
+                return "{prefix}part{partition}{sep}{form_key}".format(**v)
+            else:
+                return "{prefix}part{partition}{sep}{form_key}{sep}{attribute}".format(
+                    **v
+                )
+
+        else:
+            if v["attribute"] == "data":
+                return "{prefix}{form_key}{sep}part{partition}".format(**v)
+            else:
+                return "{prefix}{form_key}{sep}{attribute}{sep}part{partition}".format(
+                    **v
+                )
+
+    form, length, container = to_buffers(
+        layout,
+        container=container,
+        partition_start=partition_start,
+        form_key=form_key,
+        key_format=key_format,
+    )
+
+    if isinstance(length, (numbers.Integral, np.integer)):
+        num_partitions = None
+    else:
+        num_partitions = len(length)
+
+    return form, container, num_partitions
+
+
 def from_arrayset(
     form,
     container,
@@ -3872,6 +3917,9 @@ def from_arrayset(
         behavior (bool): Custom #ak.behavior for the output array, if
             high-level.
 
+    **Deprecated:** this will be removed in `awkward>=1.1.0` after January 1,
+    2021. Use #ak.from_buffers instead: the arguments have changed.
+
     Reconstructs an Awkward Array from a Form and a collection of arrays, so
     that data can be losslessly read from file formats and storage devices that
     only understand named arrays (or binary blobs).
@@ -3903,15 +3951,7 @@ def from_arrayset(
     See #ak.to_arrayset for examples.
     """
 
-    if isinstance(form, str) or (ak._util.py27 and isinstance(form, ak._util.unicode)):
-        form = ak.forms.Form.fromjson(form)
-    elif isinstance(form, dict):
-        form = ak.forms.Form.fromjson(json.dumps(form))
-
-    if prefix is None:
-        prefix = ""
-    else:
-        prefix = prefix + sep
+    print("FIXME: remember to deprecate")
 
     if isinstance(partition_format, str) or (
         ak._util.py27 and isinstance(partition_format, ak._util.unicode)
@@ -3921,102 +3961,90 @@ def from_arrayset(
         def partition_format(x):
             return tmp2.format(x)
 
-    hold_cache = None
-    if lazy:
-        form = _wrap_record_with_virtual(form)
-
-        if lazy_cache == "new":
-            hold_cache = ak._util.MappingProxy({})
-            lazy_cache = ak.layout.ArrayCache(hold_cache)
-        elif lazy_cache == "attach":
-            raise TypeError("lazy_cache must be a MutableMapping")
-            hold_cache = ak._util.MappingProxy({})
-            lazy_cache = ak.layout.ArrayCache(hold_cache)
-        elif lazy_cache is not None and not isinstance(
-            lazy_cache, ak.layout.ArrayCache
-        ):
-            hold_cache = ak._util.MappingProxy.maybe_wrap(lazy_cache)
-            if not isinstance(hold_cache, MutableMapping):
-                raise TypeError("lazy_cache must be a MutableMapping")
-            lazy_cache = ak.layout.ArrayCache(hold_cache)
-
-        if lazy_cache_key is None:
-            lazy_cache_key = "ak.from_arrayset:{0}".format(_from_buffers_key())
-
     if num_partitions is None:
-        args = (form, container, None, prefix, sep, partition_first)
+        show_partition = False
 
-        if lazy:
-            if not isinstance(lazy_lengths, numbers.Integral):
+        if lazy_lengths is None:
+            if lazy:
                 raise TypeError(
                     "for lazy=True and num_partitions=None, lazy_lengths "
                     "must be an integer, not "
                     + repr(lazy_lengths)
                     + ak._util.exception_suffix(__file__)
                 )
+            length = None
 
-            generator = ak.layout.ArrayGenerator(
-                _form_to_layout,
-                args + (lazy_cache, lazy_cache_key, lazy_lengths),
-                form=form,
-                length=lazy_lengths,
-            )
-
-            out = ak.layout.VirtualArray(generator, lazy_cache, lazy_cache_key)
+        elif isinstance(lazy_lengths, (numbers.Integral, np.integer)):
+            length = lazy_lengths
 
         else:
-            out = _form_to_layout(*args)
+            raise TypeError(
+                "for num_partitions=None, lazy_lengths "
+                "must be None or an integer, not "
+                + repr(lazy_lengths)
+                + ak._util.exception_suffix(__file__)
+            )
 
     else:
-        if lazy:
-            if isinstance(lazy_lengths, numbers.Integral):
-                lazy_lengths = [lazy_lengths] * num_partitions
-            elif (
-                isinstance(lazy_lengths, Iterable)
-                and len(lazy_lengths) == num_partitions
-                and all(isinstance(x, numbers.Integral) for x in lazy_lengths)
-            ):
-                pass
-            else:
+        show_partition = True
+
+        if lazy_lengths is None:
+            if lazy:
                 raise TypeError(
-                    "for lazy=True, lazy_lengths must be an integer or "
-                    "iterable of 'num_partitions' integers, not "
+                    "for lazy=True and isinstance(num_partitions, int), lazy_lengths "
+                    "must be an iterable of 'num_partitions' integers, not "
                     + repr(lazy_lengths)
                     + ak._util.exception_suffix(__file__)
                 )
+            length = [None] * num_partitions
 
-        partitions = []
-        offsets = [0]
+        elif isinstance(lazy_lengths, (numbers.Integral, np.integer)):
+            length = [lazy_lengths] * num_partitions
 
-        for part in range(num_partitions):
-            p = partition_format(part)
-            args = (form, container, p, prefix, sep, partition_first)
+        else:
+            length = lazy_lengths
 
-            if lazy:
-                cache_key = "{0}[{1}]".format(lazy_cache_key, part)
+    def key_format(**v):
+        v["sep"] = sep
+        if prefix is None:
+            v["prefix"] = ""
+        else:
+            v["prefix"] = prefix + sep
 
-                generator = ak.layout.ArrayGenerator(
-                    _form_to_layout,
-                    args + (lazy_cache, cache_key, lazy_lengths[part]),
-                    form=form,
-                    length=lazy_lengths[part],
-                )
+        if not show_partition:
+            if v["attribute"] == "data":
+                return "{prefix}{form_key}".format(**v)
+            else:
+                return "{prefix}{form_key}{sep}{attribute}".format(**v)
 
-                partitions.append(
-                    ak.layout.VirtualArray(generator, lazy_cache, cache_key)
+        elif partition_first:
+            if v["attribute"] == "data":
+                return "{prefix}part{partition}{sep}{form_key}".format(**v)
+            else:
+                return "{prefix}part{partition}{sep}{form_key}{sep}{attribute}".format(
+                    **v
                 )
-                offsets.append(offsets[-1] + lazy_lengths[part])
 
+        else:
+            if v["attribute"] == "data":
+                return "{prefix}{form_key}{sep}part{partition}".format(**v)
             else:
-                partitions.append(_form_to_layout(*args))
-                offsets.append(offsets[-1] + len(partitions[-1]))
-
-        out = ak.partition.IrregularlyPartitionedArray(partitions, offsets[1:])
+                return "{prefix}{form_key}{sep}{attribute}{sep}part{partition}".format(
+                    **v
+                )
 
-    if highlevel:
-        return ak._util.wrap(out, behavior)
-    else:
-        return out
+    return from_buffers(
+        form,
+        length,
+        container,
+        partition_start=0,
+        key_format=key_format,
+        lazy=lazy,
+        lazy_cache=lazy_cache,
+        lazy_cache_key=lazy_cache_key,
+        highlevel=highlevel,
+        behavior=behavior,
+    )
 
 
 def to_pandas(
diff --git a/tests/test_0348-form-keys.py b/tests/test_0348-form-keys.py
index e14694f9e2..7334676452 100644
--- a/tests/test_0348-form-keys.py
+++ b/tests/test_0348-form-keys.py
@@ -206,7 +206,6 @@ def test_record():
     ).tolist() == {"x": 2.2, "y": [1, 2]}
 
 
-@pytest.mark.skip(reason="FIXME: arrayset has to be given lengths; it's required")
 def test_regulararray():
     content = ak.Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]).layout
     regulararray = ak.layout.RegularArray(content, 3, zeros_length=0)
diff --git a/tests/test_0384-lazy-arrayset.py b/tests/test_0384-lazy-arrayset.py
index 43991c8ec2..077e6b6e7b 100644
--- a/tests/test_0384-lazy-arrayset.py
+++ b/tests/test_0384-lazy-arrayset.py
@@ -89,7 +89,7 @@ def test_lazy_arrayset():
 
     assert ak.to_list(ak.num(out.listcollection)) == [3, 3, 3]
     assert set(canary.ops) == {("get", "kitty-node1-offsets")}
-    assert set(cache) == {"hello", "hello-kitty-node1-virtual"}
+    assert set(cache) == {"hello", "hello(kitty-node1-virtual)"}
     canary.ops = []
     cache.clear()
 
@@ -107,16 +107,16 @@ def test_lazy_arrayset():
     }
     assert set(cache) == {
         "hello",
-        "hello-kitty-node11-virtual",
-        "hello-kitty-node13-virtual",
-        "hello-kitty-node16-virtual",
+        "hello(kitty-node11-virtual)",
+        "hello(kitty-node13-virtual)",
+        "hello(kitty-node16-virtual)",
     }
     canary.ops = []
     cache.clear()
 
     assert ak.to_list(out.masked) == [None, 4, 4]
     assert set(canary.ops) == {("get", "kitty-node17-index"), ("get", "kitty-node18")}
-    assert set(cache) == {"hello", "hello-kitty-node17-virtual"}
+    assert set(cache) == {"hello", "hello(kitty-node17-virtual)"}
     canary.ops = []
     cache.clear()
 

From 92495f7fdbf41472f7d369d43adddf21021b2cf7 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Thu, 10 Dec 2020 21:45:38 -0600
Subject: [PATCH 4/8] All the deprecation messages are in place.

---
 src/awkward/operations/convert.py | 117 ++++++++++++++++++++++++------
 tests/test_0348-form-keys.py      |  32 ++++++++
 tests/test_0384-lazy-arrayset.py  |   4 +
 3 files changed, 129 insertions(+), 24 deletions(-)

diff --git a/src/awkward/operations/convert.py b/src/awkward/operations/convert.py
index 821d694fea..86b2ca6c0d 100644
--- a/src/awkward/operations/convert.py
+++ b/src/awkward/operations/convert.py
@@ -2734,7 +2734,7 @@ def to_buffers(
             `attribute` is a hard-coded string representing the buffer's function
             (e.g. `"data"`, `"offsets"`, `"index"`).
 
-    Decomposes an Awkward Array into a Form and a collection of Python buffers,
+    Decomposes an Awkward Array into a Form and a collection of memory buffers,
     so that data can be losslessly written to file formats and storage devices
     that only map names to binary blobs (such as a filesystem directory).
 
@@ -2799,9 +2799,13 @@ def to_buffers(
         >>> container = {}
         >>> lengths = []
         >>> form, length, _ = ak.to_buffers(ak.Array([[1, 2, 3], [], [4, 5]]), container, 0)
+        >>> lengths.append(length)
         >>> form, length, _ = ak.to_buffers(ak.Array([[6, 7, 8, 9]]), container, 1)
+        >>> lengths.append(length)
         >>> form, length, _ = ak.to_buffers(ak.Array([[], [], []]), container, 2)
+        >>> lengths.append(length)
         >>> form, length, _ = ak.to_buffers(ak.Array([[10]]), container, 3)
+        >>> lengths.append(length)
         >>> form
         {
             "class": "ListOffsetArray64",
@@ -2815,6 +2819,8 @@ def to_buffers(
             },
             "form_key": "node0"
         }
+        >>> lengths
+        [3, 1, 3, 1]
         >>> container
         {'part0-node0-offsets': array([0, 3, 3, 5], dtype=int64),
          'part0-node1-data': array([1, 2, 3, 4, 5]),
@@ -2857,10 +2863,24 @@ def index_form(index):
             )
 
     if isinstance(form_key, str):
-        form_key = lambda **v: form_key.format(**v)  # noqa: E731
+
+        def generate_form_key(form_key):
+            def fk(**v):
+                return form_key.format(**v)
+
+            return fk
+
+        form_key = generate_form_key(form_key)
 
     if isinstance(key_format, str):
-        key_format = lambda **v: key_format.format(**v)  # noqa: E731
+
+        def generate_key_format(key_format):
+            def kf(**v):
+                return key_format.format(**v)
+
+            return kf
+
+        key_format = generate_key_format(key_format)
 
     num_form_keys = [0]
 
@@ -2980,9 +3000,7 @@ def fill(layout, part):
             ),
         ):
             fk = form_key(id=str(key_index), layout=layout)
-            key = key_format(
-                form_key=fk, attribute="offsets", partition=str(part)
-            )
+            key = key_format(form_key=fk, attribute="offsets", partition=str(part))
             container[key] = little_endian(numpy.asarray(layout.offsets))
             return ak.forms.ListOffsetForm(
                 index_form(layout.offsets),
@@ -3111,13 +3129,7 @@ def fill(layout, part):
 
 
 def _form_to_layout(
-    form,
-    container,
-    partnum,
-    key_format,
-    length,
-    lazy_cache,
-    lazy_cache_key,
+    form, container, partnum, key_format, length, lazy_cache, lazy_cache_key,
 ):
     global _index_form_to_dtype, _index_form_to_index, _form_to_layout_class
 
@@ -3290,7 +3302,7 @@ def _form_to_layout(
         )
 
         array_starts = numpy.asarray(starts)
-        array_stops = numpy.asarray(stops)[:len(array_starts)]
+        array_stops = numpy.asarray(stops)[: len(array_starts)]
         array_stops = array_stops[array_starts != array_stops]
         content = _form_to_layout(
             form.content,
@@ -3372,8 +3384,10 @@ def _form_to_layout(
                 minlength = min(minlength, len(content))
             contents.append(content)
 
+        if length is None:
+            length = minlength
         return ak.layout.RecordArray(
-            contents, None if form.istuple else keys, minlength, identities, parameters,
+            contents, None if form.istuple else keys, length, identities, parameters,
         )
 
     elif isinstance(form, ak.forms.RegularForm):
@@ -3457,7 +3471,9 @@ def _form_to_layout(
         generator = ak.layout.ArrayGenerator(
             _form_to_layout, args, form=form.form, length=length,
         )
-        node_cache_key = key_format(form_key=form.form.form_key, attribute="virtual", partition=partnum)
+        node_cache_key = key_format(
+            form_key=form.form.form_key, attribute="virtual", partition=partnum
+        )
         return ak.layout.VirtualArray(
             generator, lazy_cache, "{0}({1})".format(lazy_cache_key, node_cache_key)
         )
@@ -3552,6 +3568,30 @@ def from_buffers(
             a low-level #ak.layout.Content subclass.
         behavior (bool): Custom #ak.behavior for the output array, if
             high-level.
+
+    Reconstitutes an Awkward Array from a Form, length, and a collection of memory
+    buffers, so that data can be losslessly read from file formats and storage
+    devices that only map names to binary blobs (such as a filesystem directory).
+
+    The first three arguments of this function are the return values of
+    #ak.to_buffers, so a full round-trip is
+
+        >>> reconstituted = ak.from_buffers(*ak.to_buffers(original))
+
+    The `container` argument lets you specify your own Mapping, which might be
+    an interface to some storage format or device (e.g. h5py). It's okay if
+    the `container` dropped NumPy's `dtype` and `shape` information, leaving
+    raw bytes, since `dtype` and `shape` can be reconstituted from the
+    #ak.forms.NumpyForm.
+
+    The `key_format` should be the same as the one used in #ak.to_buffers.
+
+    The arguments that begin with `lazy_` are only needed if `lazy` is True.
+    The `lazy_cache` and `lazy_cache_key` determine how the array or its
+    partitions are cached after being read from the `container` (in a no-eviction
+    dict attached to the output #ak.Array as `cache` if not specified).
+
+    See #ak.to_buffers for examples.
     """
 
     if isinstance(form, str) or (ak._util.py27 and isinstance(form, ak._util.unicode)):
@@ -3560,7 +3600,14 @@ def from_buffers(
         form = ak.forms.Form.fromjson(json.dumps(form))
 
     if isinstance(key_format, str):
-        key_format = lambda **v: key_format.format(**v)  # noqa: E731
+
+        def generate_key_format(key_format):
+            def kf(**v):
+                return key_format.format(**v)
+
+            return kf
+
+        key_format = generate_key_format(key_format)
 
     hold_cache = None
     if lazy:
@@ -3582,7 +3629,14 @@ def from_buffers(
 
     if length is None or isinstance(length, (numbers.Integral, np.integer)):
         if length is None:
-            print("FIXME: remember to deprecate")
+            ak._util.deprecate(
+                TypeError(
+                    "length must be an integer or an iterable of integers"
+                    + ak._util.exception_suffix(__file__)
+                ),
+                "1.1.0",
+                "January 1, 2021",
+            )
 
         args = (form, container, str(partition_start), key_format, length)
 
@@ -3681,8 +3735,9 @@ def to_arrayset(
             at the end of the keys. This can be relevant if the `container`
             is sorted or lookup performance depends on alphabetical order.
 
-    **Deprecated:** this will be removed in `awkward>=1.1.0` after January 1,
-    2021. Use #ak.to_buffers instead: the arguments and return values have changed.
+    **Deprecated:** This will be removed in `awkward>=1.1.0` (target date:
+    January 1, 2021). Use #ak.to_buffers instead: the arguments and return
+    values have changed.
 
     Decomposes an Awkward Array into a Form and a collection of arrays, so
     that data can be losslessly written to file formats and storage devices
@@ -3784,7 +3839,14 @@ def to_arrayset(
     See also #ak.from_arrayset.
     """
 
-    print("FIXME: remember to deprecate")
+    ak._util.deprecate(
+        TypeError(
+            "ak.to_arrayset is deprecated; use ak.to_buffers instead"
+            + ak._util.exception_suffix(__file__)
+        ),
+        "1.1.0",
+        "January 1, 2021",
+    )
 
     layout = to_layout(array, allow_record=False, allow_other=False)
 
@@ -3917,8 +3979,8 @@ def from_arrayset(
         behavior (bool): Custom #ak.behavior for the output array, if
             high-level.
 
-    **Deprecated:** this will be removed in `awkward>=1.1.0` after January 1,
-    2021. Use #ak.from_buffers instead: the arguments have changed.
+    **Deprecated:** This will be removed in `awkward>=1.1.0` (target date:
+    January 1, 2021). Use #ak.from_buffers instead: the arguments have changed.
 
     Reconstructs an Awkward Array from a Form and a collection of arrays, so
     that data can be losslessly read from file formats and storage devices that
@@ -3951,7 +4013,14 @@ def from_arrayset(
     See #ak.to_arrayset for examples.
     """
 
-    print("FIXME: remember to deprecate")
+    ak._util.deprecate(
+        TypeError(
+            "ak.from_arrayset is deprecated; use ak.from_buffers instead"
+            + ak._util.exception_suffix(__file__)
+        ),
+        "1.1.0",
+        "January 1, 2021",
+    )
 
     if isinstance(partition_format, str) or (
         ak._util.py27 and isinstance(partition_format, ak._util.unicode)
diff --git a/tests/test_0348-form-keys.py b/tests/test_0348-form-keys.py
index 7334676452..13fe5f5f30 100644
--- a/tests/test_0348-form-keys.py
+++ b/tests/test_0348-form-keys.py
@@ -15,6 +15,8 @@
 
 
 def test_numpyarray():
+    ak.deprecations_as_errors = False
+
     assert ak.from_arrayset(*ak.to_arrayset([1, 2, 3, 4, 5])).tolist() == [
         1,
         2,
@@ -32,6 +34,8 @@ def test_numpyarray():
 
 
 def test_listoffsetarray():
+    ak.deprecations_as_errors = False
+
     assert ak.from_arrayset(*ak.to_arrayset([[1, 2, 3], [], [4, 5]])).tolist() == [
         [1, 2, 3],
         [],
@@ -49,6 +53,8 @@ def test_listoffsetarray():
 
 
 def test_listarray():
+    ak.deprecations_as_errors = False
+
     listoffsetarray = ak.Array([[1, 2, 3], [], [4, 5]]).layout
     listarray = ak.layout.ListArray64(
         listoffsetarray.starts, listoffsetarray.stops, listoffsetarray.content
@@ -66,6 +72,8 @@ def test_listarray():
 
 
 def test_indexedoptionarray():
+    ak.deprecations_as_errors = False
+
     assert ak.from_arrayset(*ak.to_arrayset([1, 2, 3, None, None, 5])).tolist() == [
         1,
         2,
@@ -80,6 +88,8 @@ def test_indexedoptionarray():
 
 
 def test_indexedarray():
+    ak.deprecations_as_errors = False
+
     content = ak.Array([0.0, 1.1, 2.2, 3.3, 4.4]).layout
     index = ak.layout.Index64(np.array([3, 1, 1, 4, 2], dtype=np.int64))
     indexedarray = ak.layout.IndexedArray64(index, content)
@@ -100,6 +110,8 @@ def test_indexedarray():
 
 
 def test_emptyarray():
+    ak.deprecations_as_errors = False
+
     assert ak.from_arrayset(*ak.to_arrayset([])).tolist() == []
     assert ak.from_arrayset(*ak.to_arrayset([[], [], []])).tolist() == [[], [], []]
 
@@ -112,6 +124,8 @@ def test_emptyarray():
 
 
 def test_bytemaskedarray():
+    ak.deprecations_as_errors = False
+
     content = ak.Array([0.0, 1.1, 2.2, 3.3, 4.4]).layout
     mask = ak.layout.Index8(np.array([False, True, True, False, False], dtype=np.int8))
     bytemaskedarray = ak.layout.ByteMaskedArray(mask, content, True)
@@ -132,6 +146,8 @@ def test_bytemaskedarray():
 
 
 def test_bitmaskedarray():
+    ak.deprecations_as_errors = False
+
     content = ak.Array([0.0, 1.1, 2.2, 3.3, 4.4]).layout
     mask = ak.layout.IndexU8(
         np.packbits(np.array([False, True, True, False, False], dtype=np.int8))
@@ -154,6 +170,8 @@ def test_bitmaskedarray():
 
 
 def test_recordarray():
+    ak.deprecations_as_errors = False
+
     assert ak.from_arrayset(
         *ak.to_arrayset([(1.1, [1]), (2.2, [1, 2]), (3.3, [1, 2, 3])])
     ).tolist() == [(1.1, [1]), (2.2, [1, 2]), (3.3, [1, 2, 3])]
@@ -189,6 +207,8 @@ def test_recordarray():
 
 
 def test_record():
+    ak.deprecations_as_errors = False
+
     assert pickle.loads(
         pickle.dumps(ak.Record({"x": 2.2, "y": [1, 2]}), -1)
     ).tolist() == {"x": 2.2, "y": [1, 2]}
@@ -207,6 +227,8 @@ def test_record():
 
 
 def test_regulararray():
+    ak.deprecations_as_errors = False
+
     content = ak.Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]).layout
     regulararray = ak.layout.RegularArray(content, 3, zeros_length=0)
     assert ak.from_arrayset(*ak.to_arrayset(regulararray)).tolist() == [
@@ -224,6 +246,8 @@ def test_regulararray():
 
 
 def test_unionarray():
+    ak.deprecations_as_errors = False
+
     assert ak.from_arrayset(*ak.to_arrayset([[1, 2, 3], [], 4, 5])).tolist() == [
         [1, 2, 3],
         [],
@@ -239,6 +263,8 @@ def test_unionarray():
 
 
 def test_unmaskedarray():
+    ak.deprecations_as_errors = False
+
     content = ak.Array([1, 2, 3, 4, 5]).layout
     unmaskedarray = ak.layout.UnmaskedArray(content)
     assert ak.from_arrayset(*ak.to_arrayset(unmaskedarray)).tolist() == [1, 2, 3, 4, 5]
@@ -252,6 +278,8 @@ def test_unmaskedarray():
 
 
 def test_partitioned():
+    ak.deprecations_as_errors = False
+
     array = ak.repartition(ak.Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3)
 
     form, container, num_partitions = ak.to_arrayset(array, partition_first=True)
@@ -298,6 +326,8 @@ def test_partitioned():
 
 
 def test_lazy():
+    ak.deprecations_as_errors = False
+
     array = ak.Array([1, 2, 3, 4, 5])
 
     form, container, num_partitions = ak.to_arrayset(array)
@@ -308,6 +338,8 @@ def test_lazy():
 
 
 def test_lazy_partitioned():
+    ak.deprecations_as_errors = False
+
     array = ak.repartition(ak.Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3)
     form, container, num_partitions = ak.to_arrayset(array)
     assert num_partitions == 4
diff --git a/tests/test_0384-lazy-arrayset.py b/tests/test_0384-lazy-arrayset.py
index 077e6b6e7b..beefa51456 100644
--- a/tests/test_0384-lazy-arrayset.py
+++ b/tests/test_0384-lazy-arrayset.py
@@ -22,6 +22,8 @@ def __setitem__(self, key, value):
 
 
 def test_lazy_arrayset():
+    ak.deprecations_as_errors = False
+
     array = ak.from_json(
         """
     [
@@ -122,6 +124,8 @@ def test_lazy_arrayset():
 
 
 def test_longer_than_expected():
+    ak.deprecations_as_errors = False
+
     array = ak.Array(
         ak.layout.ListOffsetArray64(
             ak.layout.Index64([0, 2, 4]),

From acc21376edbc4b24ded265df7bc4557433f94bd4 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 11 Dec 2020 09:46:30 -0600
Subject: [PATCH 5/8] Pickle uses the new to_buffers/from_buffers, but old
 pickle files can still be read.

---
 src/awkward/_util.py              | 28 +++++++++++++++++++++++++
 src/awkward/highlevel.py          | 34 +++++++++++++++++++++++--------
 src/awkward/operations/convert.py |  8 --------
 3 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/src/awkward/_util.py b/src/awkward/_util.py
index bf1ec4d56d..3c1ca31b27 100644
--- a/src/awkward/_util.py
+++ b/src/awkward/_util.py
@@ -1697,3 +1697,31 @@ def union_to_record(unionarray, anonymous):
             )
 
         return ak.layout.RecordArray(all_fields, all_names, len(unionarray))
+
+
+def adjust_old_pickle(form, container, num_partitions, behavior):
+    def key_format(**v):
+        if num_partitions is None:
+            if v["attribute"] == "data":
+                return "{form_key}".format(**v)
+            else:
+                return "{form_key}-{attribute}".format(**v)
+
+        else:
+            if v["attribute"] == "data":
+                return "{form_key}-part{partition}".format(**v)
+            else:
+                return "{form_key}-{attribute}-part{partition}".format(**v)
+
+    return ak.operations.convert.from_buffers(
+        form,
+        None,
+        container,
+        partition_start=0,
+        key_format=key_format,
+        lazy=False,
+        lazy_cache="new",
+        lazy_cache_key=None,
+        highlevel=False,
+        behavior=behavior,
+    )
diff --git a/src/awkward/highlevel.py b/src/awkward/highlevel.py
index a75a245108..5996ad6505 100644
--- a/src/awkward/highlevel.py
+++ b/src/awkward/highlevel.py
@@ -1386,16 +1386,24 @@ def numba_type(self):
         return numba.typeof(self._numbaview)
 
     def __getstate__(self):
-        form, container, num_partitions = ak.to_arrayset(self)
+        form, length, container = ak.operations.convert.to_buffers(self._layout)
         if self._behavior is ak.behavior:
             behavior = None
         else:
             behavior = self._behavior
-        return form, container, num_partitions, behavior
+        return form, length, container, behavior
 
     def __setstate__(self, state):
-        form, container, num_partitions, behavior = state
-        layout = ak.from_arrayset(form, container, num_partitions, highlevel=False)
+        if isinstance(state[1], dict):
+            form, container, num_partitions, behavior = state
+            layout = ak._util.adjust_old_pickle(
+                form, container, num_partitions, behavior
+            )
+        else:
+            form, length, container, behavior = state
+            layout = ak.operations.convert.from_buffers(
+                form, length, container, highlevel=False, behavior=behavior
+            )
         if self.__class__ is Array:
             self.__class__ = ak._util.arrayclass(layout, behavior)
         self.layout = layout
@@ -1975,17 +1983,25 @@ def numba_type(self):
         return numba.typeof(self._numbaview)
 
     def __getstate__(self):
-        form, container, num_partitions = ak.to_arrayset(self._layout.array)
+        form, length, container = ak.operations.convert.to_buffers(self._layout.array)
         if self._behavior is ak.behavior:
             behavior = None
         else:
             behavior = self._behavior
-        return form, container, num_partitions, behavior, self._layout.at
+        return form, length, container, behavior, self._layout.at
 
     def __setstate__(self, state):
-        form, container, num_partitions, behavior, at = state
-        array = ak.from_arrayset(form, container, num_partitions, highlevel=False)
-        layout = ak.layout.Record(array, at)
+        if isinstance(state[1], dict):
+            form, container, num_partitions, behavior, at = state
+            layout = ak._util.adjust_old_pickle(
+                form, container, num_partitions, behavior
+            )
+        else:
+            form, length, container, behavior, at = state
+            layout = ak.operations.convert.from_buffers(
+                form, length, container, highlevel=False, behavior=behavior
+            )
+        layout = ak.layout.Record(layout, at)
         if self.__class__ is Record:
             self.__class__ = ak._util.recordclass(layout, behavior)
         self.layout = layout
diff --git a/src/awkward/operations/convert.py b/src/awkward/operations/convert.py
index 86b2ca6c0d..0f355051fa 100644
--- a/src/awkward/operations/convert.py
+++ b/src/awkward/operations/convert.py
@@ -4022,14 +4022,6 @@ def from_arrayset(
         "January 1, 2021",
     )
 
-    if isinstance(partition_format, str) or (
-        ak._util.py27 and isinstance(partition_format, ak._util.unicode)
-    ):
-        tmp2 = partition_format
-
-        def partition_format(x):
-            return tmp2.format(x)
-
     if num_partitions is None:
         show_partition = False
 

From 8cc01334d206254891d0d8141e37ba0d03675668 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 11 Dec 2020 13:33:15 -0600
Subject: [PATCH 6/8] Eliminated all warnings, references to 'arrayset' from
 the tests.

---
 tests/test_0348-form-keys.py     | 144 ++++++++++++++++---------------
 tests/test_0384-lazy-arrayset.py |  24 +++---
 2 files changed, 84 insertions(+), 84 deletions(-)

diff --git a/tests/test_0348-form-keys.py b/tests/test_0348-form-keys.py
index 13fe5f5f30..79a2570cae 100644
--- a/tests/test_0348-form-keys.py
+++ b/tests/test_0348-form-keys.py
@@ -15,9 +15,7 @@
 
 
 def test_numpyarray():
-    ak.deprecations_as_errors = False
-
-    assert ak.from_arrayset(*ak.to_arrayset([1, 2, 3, 4, 5])).tolist() == [
+    assert ak.from_buffers(*ak.to_buffers([1, 2, 3, 4, 5])).tolist() == [
         1,
         2,
         3,
@@ -34,18 +32,16 @@ def test_numpyarray():
 
 
 def test_listoffsetarray():
-    ak.deprecations_as_errors = False
-
-    assert ak.from_arrayset(*ak.to_arrayset([[1, 2, 3], [], [4, 5]])).tolist() == [
+    assert ak.from_buffers(*ak.to_buffers([[1, 2, 3], [], [4, 5]])).tolist() == [
         [1, 2, 3],
         [],
         [4, 5],
     ]
-    assert ak.from_arrayset(
-        *ak.to_arrayset(["one", "two", "three", "four", "five"])
+    assert ak.from_buffers(
+        *ak.to_buffers(["one", "two", "three", "four", "five"])
     ).tolist() == ["one", "two", "three", "four", "five"]
-    assert ak.from_arrayset(
-        *ak.to_arrayset([["one", "two", "three"], [], ["four", "five"]])
+    assert ak.from_buffers(
+        *ak.to_buffers([["one", "two", "three"], [], ["four", "five"]])
     ).tolist() == [["one", "two", "three"], [], ["four", "five"]]
     assert pickle.loads(
         pickle.dumps(ak.Array([[1, 2, 3], [], [4, 5]]), -1)
@@ -53,13 +49,11 @@ def test_listoffsetarray():
 
 
 def test_listarray():
-    ak.deprecations_as_errors = False
-
     listoffsetarray = ak.Array([[1, 2, 3], [], [4, 5]]).layout
     listarray = ak.layout.ListArray64(
         listoffsetarray.starts, listoffsetarray.stops, listoffsetarray.content
     )
-    assert ak.from_arrayset(*ak.to_arrayset(listarray)).tolist() == [
+    assert ak.from_buffers(*ak.to_buffers(listarray)).tolist() == [
         [1, 2, 3],
         [],
         [4, 5],
@@ -72,9 +66,7 @@ def test_listarray():
 
 
 def test_indexedoptionarray():
-    ak.deprecations_as_errors = False
-
-    assert ak.from_arrayset(*ak.to_arrayset([1, 2, 3, None, None, 5])).tolist() == [
+    assert ak.from_buffers(*ak.to_buffers([1, 2, 3, None, None, 5])).tolist() == [
         1,
         2,
         3,
@@ -88,12 +80,10 @@ def test_indexedoptionarray():
 
 
 def test_indexedarray():
-    ak.deprecations_as_errors = False
-
     content = ak.Array([0.0, 1.1, 2.2, 3.3, 4.4]).layout
     index = ak.layout.Index64(np.array([3, 1, 1, 4, 2], dtype=np.int64))
     indexedarray = ak.layout.IndexedArray64(index, content)
-    assert ak.from_arrayset(*ak.to_arrayset(indexedarray)).tolist() == [
+    assert ak.from_buffers(*ak.to_buffers(indexedarray)).tolist() == [
         3.3,
         1.1,
         1.1,
@@ -110,10 +100,8 @@ def test_indexedarray():
 
 
 def test_emptyarray():
-    ak.deprecations_as_errors = False
-
-    assert ak.from_arrayset(*ak.to_arrayset([])).tolist() == []
-    assert ak.from_arrayset(*ak.to_arrayset([[], [], []])).tolist() == [[], [], []]
+    assert ak.from_buffers(*ak.to_buffers([])).tolist() == []
+    assert ak.from_buffers(*ak.to_buffers([[], [], []])).tolist() == [[], [], []]
 
     assert pickle.loads(pickle.dumps(ak.Array([]), -1)).tolist() == []
     assert pickle.loads(pickle.dumps(ak.Array([[], [], []]), -1)).tolist() == [
@@ -124,12 +112,10 @@ def test_emptyarray():
 
 
 def test_bytemaskedarray():
-    ak.deprecations_as_errors = False
-
     content = ak.Array([0.0, 1.1, 2.2, 3.3, 4.4]).layout
     mask = ak.layout.Index8(np.array([False, True, True, False, False], dtype=np.int8))
     bytemaskedarray = ak.layout.ByteMaskedArray(mask, content, True)
-    assert ak.from_arrayset(*ak.to_arrayset(bytemaskedarray)).tolist() == [
+    assert ak.from_buffers(*ak.to_buffers(bytemaskedarray)).tolist() == [
         None,
         1.1,
         2.2,
@@ -146,14 +132,12 @@ def test_bytemaskedarray():
 
 
 def test_bitmaskedarray():
-    ak.deprecations_as_errors = False
-
     content = ak.Array([0.0, 1.1, 2.2, 3.3, 4.4]).layout
     mask = ak.layout.IndexU8(
         np.packbits(np.array([False, True, True, False, False], dtype=np.int8))
     )
     bitmaskedarray = ak.layout.BitMaskedArray(mask, content, True, 5, False)
-    assert ak.from_arrayset(*ak.to_arrayset(bitmaskedarray)).tolist() == [
+    assert ak.from_buffers(*ak.to_buffers(bitmaskedarray)).tolist() == [
         None,
         1.1,
         2.2,
@@ -170,13 +154,11 @@ def test_bitmaskedarray():
 
 
 def test_recordarray():
-    ak.deprecations_as_errors = False
-
-    assert ak.from_arrayset(
-        *ak.to_arrayset([(1.1, [1]), (2.2, [1, 2]), (3.3, [1, 2, 3])])
+    assert ak.from_buffers(
+        *ak.to_buffers([(1.1, [1]), (2.2, [1, 2]), (3.3, [1, 2, 3])])
     ).tolist() == [(1.1, [1]), (2.2, [1, 2]), (3.3, [1, 2, 3])]
-    assert ak.from_arrayset(
-        *ak.to_arrayset(
+    assert ak.from_buffers(
+        *ak.to_buffers(
             [{"x": 1.1, "y": [1]}, {"x": 2.2, "y": [1, 2]}, {"x": 3.3, "y": [1, 2, 3]}]
         )
     ).tolist() == [
@@ -207,8 +189,6 @@ def test_recordarray():
 
 
 def test_record():
-    ak.deprecations_as_errors = False
-
     assert pickle.loads(
         pickle.dumps(ak.Record({"x": 2.2, "y": [1, 2]}), -1)
     ).tolist() == {"x": 2.2, "y": [1, 2]}
@@ -227,11 +207,9 @@ def test_record():
 
 
 def test_regulararray():
-    ak.deprecations_as_errors = False
-
     content = ak.Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]).layout
     regulararray = ak.layout.RegularArray(content, 3, zeros_length=0)
-    assert ak.from_arrayset(*ak.to_arrayset(regulararray)).tolist() == [
+    assert ak.from_buffers(*ak.to_buffers(regulararray)).tolist() == [
         [1, 2, 3],
         [4, 5, 6],
         [7, 8, 9],
@@ -246,9 +224,7 @@ def test_regulararray():
 
 
 def test_unionarray():
-    ak.deprecations_as_errors = False
-
-    assert ak.from_arrayset(*ak.to_arrayset([[1, 2, 3], [], 4, 5])).tolist() == [
+    assert ak.from_buffers(*ak.to_buffers([[1, 2, 3], [], 4, 5])).tolist() == [
         [1, 2, 3],
         [],
         4,
@@ -263,11 +239,9 @@ def test_unionarray():
 
 
 def test_unmaskedarray():
-    ak.deprecations_as_errors = False
-
     content = ak.Array([1, 2, 3, 4, 5]).layout
     unmaskedarray = ak.layout.UnmaskedArray(content)
-    assert ak.from_arrayset(*ak.to_arrayset(unmaskedarray)).tolist() == [1, 2, 3, 4, 5]
+    assert ak.from_buffers(*ak.to_buffers(unmaskedarray)).tolist() == [1, 2, 3, 4, 5]
     assert pickle.loads(pickle.dumps(ak.Array(unmaskedarray), -1)).tolist() == [
         1,
         2,
@@ -278,28 +252,47 @@ def test_unmaskedarray():
 
 
 def test_partitioned():
-    ak.deprecations_as_errors = False
-
     array = ak.repartition(ak.Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3)
 
-    form, container, num_partitions = ak.to_arrayset(array, partition_first=True)
-    assert ak.from_arrayset(
-        form, container, num_partitions, partition_first=True
-    ).tolist() == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    form, length, container = ak.to_buffers(array)
+    assert ak.from_buffers(form, length, container).tolist() == [
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+    ]
 
-    form, container, num_partitions = ak.to_arrayset(array, partition_first=False)
-    assert ak.from_arrayset(
-        form, container, num_partitions, partition_first=False
-    ).tolist() == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    form, length, container = ak.to_buffers(array)
+    assert ak.from_buffers(form, length, container).tolist() == [
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+    ]
 
     one = ak.Array([1, 2, 3, 4, 5])
     two = ak.Array([6, 7, 8, 9, 10])
     container = {}
-    form1, _, _ = ak.to_arrayset(one, container, 0)
-    form2, _, _ = ak.to_arrayset(two, container, 1)
+    lengths = []
+    form1, length, _ = ak.to_buffers(one, container, 0)
+    lengths.append(length)
+    form2, length, _ = ak.to_buffers(two, container, 1)
+    lengths.append(length)
     assert form1 == form2
 
-    assert ak.from_arrayset(form1, container, 2).tolist() == [
+    assert ak.from_buffers(form1, lengths, container).tolist() == [
         1,
         2,
         3,
@@ -326,24 +319,35 @@ def test_partitioned():
 
 
 def test_lazy():
-    ak.deprecations_as_errors = False
-
     array = ak.Array([1, 2, 3, 4, 5])
 
-    form, container, num_partitions = ak.to_arrayset(array)
+    form, length, container = ak.to_buffers(array)
 
-    assert ak.from_arrayset(
-        form, container, num_partitions, lazy=True, lazy_lengths=5
-    ).tolist() == [1, 2, 3, 4, 5]
+    assert ak.from_buffers(form, length, container, lazy=True).tolist() == [
+        1,
+        2,
+        3,
+        4,
+        5,
+    ]
 
 
 def test_lazy_partitioned():
     ak.deprecations_as_errors = False
 
     array = ak.repartition(ak.Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3)
-    form, container, num_partitions = ak.to_arrayset(array)
-    assert num_partitions == 4
+    form, length, container = ak.to_buffers(array)
+    assert length == [3, 3, 3, 1]
 
-    assert ak.from_arrayset(
-        form, container, num_partitions, lazy=True, lazy_lengths=[3, 3, 3, 1]
-    ).tolist() == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    assert ak.from_buffers(form, length, container, lazy=True).tolist() == [
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+    ]
diff --git a/tests/test_0384-lazy-arrayset.py b/tests/test_0384-lazy-arrayset.py
index beefa51456..c13ec64662 100644
--- a/tests/test_0384-lazy-arrayset.py
+++ b/tests/test_0384-lazy-arrayset.py
@@ -21,9 +21,7 @@ def __setitem__(self, key, value):
         return super(Canary, self).__setitem__(key, value)
 
 
-def test_lazy_arrayset():
-    ak.deprecations_as_errors = False
-
+def test_lazy_buffers():
     array = ak.from_json(
         """
     [
@@ -67,19 +65,19 @@ def test_lazy_arrayset():
     )
 
     canary = Canary()
-    prefix = "kitty"
-    form, container, npart = ak.to_arrayset(array, container=canary, prefix=prefix)
+    key_format = "kitty-{form_key}-{attribute}"
+    form, length, container = ak.to_buffers(array, container=canary, key_format=key_format)
     assert not any(op[0] == "get" for op in canary.ops)
     canary.ops = []
 
     cache = {}
-    out = ak.from_arrayset(
+    out = ak.from_buffers(
         form,
+        length,
         container,
+        key_format=key_format,
         lazy=True,
         lazy_cache=cache,
-        lazy_lengths=3,
-        prefix=prefix,
         lazy_cache_key="hello",
     )
     assert len(canary.ops) == 0
@@ -104,8 +102,8 @@ def test_lazy_arrayset():
         ("get", "kitty-node11-tags"),
         ("get", "kitty-node11-index"),
         ("get", "kitty-node14-offsets"),
-        ("get", "kitty-node13"),
-        ("get", "kitty-node16"),
+        ("get", "kitty-node13-data"),
+        ("get", "kitty-node16-data"),
     }
     assert set(cache) == {
         "hello",
@@ -117,15 +115,13 @@ def test_lazy_arrayset():
     cache.clear()
 
     assert ak.to_list(out.masked) == [None, 4, 4]
-    assert set(canary.ops) == {("get", "kitty-node17-index"), ("get", "kitty-node18")}
+    assert set(canary.ops) == {("get", "kitty-node17-index"), ("get", "kitty-node18-data")}
     assert set(cache) == {"hello", "hello(kitty-node17-virtual)"}
     canary.ops = []
     cache.clear()
 
 
 def test_longer_than_expected():
-    ak.deprecations_as_errors = False
-
     array = ak.Array(
         ak.layout.ListOffsetArray64(
             ak.layout.Index64([0, 2, 4]),
@@ -137,7 +133,7 @@ def test_longer_than_expected():
             ),
         )
     )
-    out = ak.from_arrayset(*ak.to_arrayset(array), lazy=True, lazy_lengths=2)
+    out = ak.from_buffers(*ak.to_buffers(array), lazy=True)
     assert ak.to_list(out) == [
         [{"item1": 0, "longitem": 0}, {"item1": 1, "longitem": 1}],
         [{"item1": 2, "longitem": 2}, {"item1": 3, "longitem": 3}],

From 3ffe3ec8a084decda6ed410a172cd21ec8fc49db Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 11 Dec 2020 14:13:45 -0600
Subject: [PATCH 7/8] Updated the documentation, too.

---
 docs-src/_toc.yml                             |  4 +-
 ...-arrayset.md => how-to-convert-buffers.md} | 74 +++++++------------
 docs-src/how-to-convert.md                    |  2 +-
 3 files changed, 31 insertions(+), 49 deletions(-)
 rename docs-src/{how-to-convert-arrayset.md => how-to-convert-buffers.md} (59%)

diff --git a/docs-src/_toc.yml b/docs-src/_toc.yml
index cff21ffcdc..e05f4b5b5c 100644
--- a/docs-src/_toc.yml
+++ b/docs-src/_toc.yml
@@ -20,8 +20,8 @@
       title: "Arrow and Parquet"
     - file: how-to-convert-pandas
       title: "Pandas"
-    - file: how-to-convert-arrayset
-      title: "Generic array-sets"
+    - file: how-to-convert-buffers
+      title: "Generic buffers"
 
 - file: how-to-create
   title: "Creating new arrays"
diff --git a/docs-src/how-to-convert-arrayset.md b/docs-src/how-to-convert-buffers.md
similarity index 59%
rename from docs-src/how-to-convert-arrayset.md
rename to docs-src/how-to-convert-buffers.md
index d94730df0c..f34d93cae9 100644
--- a/docs-src/how-to-convert-arrayset.md
+++ b/docs-src/how-to-convert-buffers.md
@@ -11,10 +11,10 @@ kernelspec:
   name: python3
 ---
 
-Generic array-sets
-==================
+Generic buffers
+===============
 
-Most of the conversion functions target a particular library: NumPy, Arrow, Pandas, or Python itself. As a catch-all for other storage formats, Awkward Arrays can be converted to and from "array-sets," sets of named arrays with a schema that can be used to reconstruct the original array. This section will demonstrate how an array-set can be used to store an Awkward Array in an HDF5 file, which ordinarily wouldn't be able to represent nested, irregular data structures.
+Most of the conversion functions target a particular library: NumPy, Arrow, Pandas, or Python itself. As a catch-all for other storage formats, Awkward Arrays can be converted to and from sets of named buffers. The buffers are not (usually) intelligible on their own; the length of the array and a JSON document are needed to reconstitute the original structure. This section will demonstrate how an array-set can be used to store an Awkward Array in an HDF5 file, which ordinarily wouldn't be able to represent nested, irregular data structures.
 
 ```{code-cell} ipython3
 import awkward as ak
@@ -23,8 +23,8 @@ import h5py
 import json
 ```
 
-From Awkward to an array-set
-----------------------------
+From Awkward to buffers
+-----------------------
 
 Consider the following complex array:
 
@@ -37,18 +37,17 @@ ak_array = ak.Array([
 ak_array
 ```
 
-The [ak.to_arrayset](https://awkward-array.readthedocs.io/en/latest/_auto/ak.to_arrayset.html) function decomposes it into a set of one-dimensional arrays (a zero-copy operation).
+The [ak.to_buffers](https://awkward-array.readthedocs.io/en/latest/_auto/ak.to_buffers.html) function decomposes it into a set of one-dimensional arrays (a zero-copy operation).
 
 ```{code-cell} ipython3
-form, container, num_partitions = ak.to_arrayset(ak_array)
+form, length, container = ak.to_buffers(ak_array)
 ```
 
 The pieces needed to reconstitute this array are:
 
    * the [Form](https://awkward-array.readthedocs.io/en/latest/ak.forms.Form.html), which defines how structure is built from one-dimensional arrays,
-   * the one-dimensional arrays in the `container` (a [MutableMapping](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes)),
-   * the number of partitions, if any,
-   * the length of the original array or lengths of all partitions ([ak.partitions](https://awkward-array.readthedocs.io/en/latest/_auto/ak.partitions.html)) are needed if we wish to read it back _lazily_ (more on that below).
+   * the length of the original array or lengths of all of its partitions ([ak.partitions](https://awkward-array.readthedocs.io/en/latest/_auto/ak.partitions.html)),
+   * the one-dimensional arrays in the `container` (a [MutableMapping](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes)).
 
 The [Form](https://awkward-array.readthedocs.io/en/latest/ak.forms.Form.html) is like an Awkward [Type](https://awkward-array.readthedocs.io/en/latest/ak.types.Type.html) in that it describes how the data are structured, but with more detail: it includes distinctions such as the difference between [ListArray](https://awkward-array.readthedocs.io/en/latest/ak.layout.ListArray.html) and [ListOffsetArray](https://awkward-array.readthedocs.io/en/latest/ak.layout.ListOffsetArray.html), as well as the integer types of structural [Indexes](https://awkward-array.readthedocs.io/en/latest/ak.layout.Index.html).
 
@@ -58,37 +57,31 @@ It is usually presented as JSON, and has a compact JSON format (when [Form.tojso
 form
 ```
 
-This `container` is a new dict, but it could have been a user-specified [MutableMapping](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes).
+In this case, the `length` is just an integer. It would be a list of integers if `ak_array` was partitioned.
 
 ```{code-cell} ipython3
-container
+length
 ```
 
-This array has no partitions.
+This `container` is a new dict, but it could have been a user-specified [MutableMapping](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes) if passed into [ak.to_buffers](https://awkward-array.readthedocs.io/en/latest/_auto/ak.to_buffers.html) as an argument.
 
 ```{code-cell} ipython3
-num_partitions is None
-```
-
-This is also what we find from [ak.partitions](https://awkward-array.readthedocs.io/en/latest/_auto/ak.partitions.html).
-
-```{code-cell} ipython3
-ak.partitions(ak_array) is None
+container
 ```
 
-From array-set to Awkward
--------------------------
+From buffers to Awkward
+-----------------------
 
-The function that reverses [ak.to_arrayset](https://awkward-array.readthedocs.io/en/latest/_auto/ak.to_arrayset.html) is [ak.from_arrayset](https://awkward-array.readthedocs.io/en/latest/_auto/ak.from_arrayset.html). Its first three arguments are `form`, `container`, and `num_partitions`.
+The function that reverses [ak.to_buffers](https://awkward-array.readthedocs.io/en/latest/_auto/ak.to_buffers.html) is [ak.from_buffers](https://awkward-array.readthedocs.io/en/latest/_auto/ak.from_buffers.html). Its first three arguments are `form`, `length`, and `container`.
 
 ```{code-cell} ipython3
-ak.from_arrayset(form, container, num_partitions)
+ak.from_buffers(form, length, container)
 ```
 
 Saving Awkward Arrays to HDF5
 -----------------------------
 
-The [h5py](https://www.h5py.org/) library presents each group in an HDF5 file as a [MutableMapping](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes), which we can use as a container for an array-set. We must also save the `form`, `num_partitions`, and `length` as metadata for the array to be retrievable.
+The [h5py](https://www.h5py.org/) library presents each group in an HDF5 file as a [MutableMapping](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes), which we can use as a container for an array-set. We must also save the `form` and `length` as metadata for the array to be retrievable.
 
 ```{code-cell} ipython3
 file = h5py.File("/tmp/example.hdf5", "w")
@@ -96,10 +89,10 @@ group = file.create_group("awkward")
 group
 ```
 
-We can fill this `group` as a `container` by passing it in to [ak.to_arrayset](https://awkward-array.readthedocs.io/en/latest/_auto/ak.to_arrayset.html).
+We can fill this `group` as a `container` by passing it in to [ak.to_buffers](https://awkward-array.readthedocs.io/en/latest/_auto/ak.to_buffers.html).
 
 ```{code-cell} ipython3
-form, container, num_partitions = ak.to_arrayset(ak_array, container=group)
+form, length, container = ak.to_buffers(ak_array, container=group)
 ```
 
 ```{code-cell} ipython3
@@ -115,7 +108,7 @@ container.keys()
 Here's one.
 
 ```{code-cell} ipython3
-np.asarray(container["node0-offsets"])
+np.asarray(container["part0-node0-offsets"])
 ```
 
 Now we need to add the other information to the group as metadata. Since HDF5 accepts string-valued metadata, we can put it all in as JSON or numbers.
@@ -126,38 +119,27 @@ group.attrs["form"]
 ```
 
 ```{code-cell} ipython3
-group.attrs["num_partitions"] = json.dumps(num_partitions)
-group.attrs["num_partitions"]
-```
-
-```{code-cell} ipython3
-group.attrs["partition_lengths"] = json.dumps(ak.partitions(ak_array))
-group.attrs["partition_lengths"]
-```
-
-```{code-cell} ipython3
-group.attrs["length"] = len(ak_array)
+group.attrs["length"] = json.dumps(length)   # JSON-encode it because it might be a list
 group.attrs["length"]
 ```
 
 Reading Awkward Arrays from HDF5
 --------------------------------
 
-With that, we can reconstitute the array by supplying [ak.from_arrayset](https://awkward-array.readthedocs.io/en/latest/_auto/ak.from_arrayset.html) the right arguments from the group and metadata.
+With that, we can reconstitute the array by supplying [ak.from_buffers](https://awkward-array.readthedocs.io/en/latest/_auto/ak.from_buffers.html) the right arguments from the group and metadata.
 
 The group can't be used as a `container` as-is, since subscripting it returns `h5py.Dataset` objects, rather than arrays.
 
 ```{code-cell} ipython3
-reconstituted = ak.from_arrayset(
+reconstituted = ak.from_buffers(
     ak.forms.Form.fromjson(group.attrs["form"]),
+    json.loads(group.attrs["length"]),
     {k: np.asarray(v) for k, v in group.items()},
 )
 reconstituted
 ```
 
-Like [ak.from_parquet](https://awkward-array.readthedocs.io/en/latest/_auto/ak.from_parquet.html), [ak.from_arrayset](https://awkward-array.readthedocs.io/en/latest/_auto/ak.from_arrayset.html) has the option to read lazily, only accessing record fields and partitions that are accessed.
-
-To do so, we need to pass `lazy=True`, but also the total length of the array (if not partitioned) or the lengths of all the partitions (if partitioned).
+Like [ak.from_parquet](https://awkward-array.readthedocs.io/en/latest/_auto/ak.from_parquet.html), [ak.from_buffers](https://awkward-array.readthedocs.io/en/latest/_auto/ak.from_buffers.html) has the option to read lazily, only accessing record fields and partitions that are accessed.
 
 ```{code-cell} ipython3
 class LazyGet:
@@ -168,11 +150,11 @@ class LazyGet:
         print(key)
         return np.asarray(self.group[key])
 
-lazy = ak.from_arrayset(
+lazy = ak.from_buffers(
     ak.forms.Form.fromjson(group.attrs["form"]),
+    json.loads(group.attrs["length"]),
     LazyGet(group),
     lazy=True,
-    lazy_lengths = group.attrs["length"],
 )
 ```
 
diff --git a/docs-src/how-to-convert.md b/docs-src/how-to-convert.md
index 02610f36de..62b3a13719 100644
--- a/docs-src/how-to-convert.md
+++ b/docs-src/how-to-convert.md
@@ -20,4 +20,4 @@ Converting arrays
    * **[ROOT via Uproot](how-to-convert-uproot)**
    * **[Arrow and Parquet](how-to-convert-arrow)**
    * **[Pandas](how-to-convert-pandas)**
-   * **[Generic array-sets](how-to-convert-arrayset)**
+   * **[Generic array-sets](how-to-convert-buffers)**

From f068e1d4204d43380febf4a8cde2980f643c0b2b Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 11 Dec 2020 14:58:35 -0600
Subject: [PATCH 8/8] Last touches: length sanity-checks at all levels.

---
 src/awkward/operations/convert.py     | 100 +++++++++++++++++++++++---
 src/libawkward/array/RegularArray.cpp |   4 ++
 tests/test_0384-lazy-arrayset.py      |   9 ++-
 3 files changed, 102 insertions(+), 11 deletions(-)

diff --git a/src/awkward/operations/convert.py b/src/awkward/operations/convert.py
index 0f355051fa..5454ae8c8b 100644
--- a/src/awkward/operations/convert.py
+++ b/src/awkward/operations/convert.py
@@ -3193,16 +3193,19 @@ def _form_to_layout(
             container,
             partnum,
             key_format,
-            len(mask),
+            length,
             lazy_cache,
             lazy_cache_key,
         )
 
+        if length is None:
+            length = len(content)
+
         return ak.layout.BitMaskedArray(
             mask,
             content,
             form.valid_when,
-            len(content),
+            length,
             form.lsb_order,
             identities,
             parameters,
@@ -3218,12 +3221,22 @@ def _form_to_layout(
             raw_mask.view(_index_form_to_dtype[form.mask])
         )
 
+        if length is None:
+            length = len(mask)
+        elif length != len(mask):
+            raise ValueError(
+                "ByteMaskedArray length mismatch: expected {0}, observed {1}".format(
+                    length, len(mask)
+                )
+                + ak._util.exception_suffix(__file__)
+            )
+
         content = _form_to_layout(
             form.content,
             container,
             partnum,
             key_format,
-            len(mask),
+            length,
             lazy_cache,
             lazy_cache_key,
         )
@@ -3233,6 +3246,13 @@ def _form_to_layout(
         )
 
     elif isinstance(form, ak.forms.EmptyForm):
+        if length is not None and length != 0:
+            raise ValueError(
+                "EmptyArray length mismatch: expected {0}, observed {1}".format(
+                    length, 0
+                )
+                + ak._util.exception_suffix(__file__)
+            )
         return ak.layout.EmptyArray(identities, parameters)
 
     elif isinstance(form, ak.forms.IndexedForm):
@@ -3245,6 +3265,16 @@ def _form_to_layout(
             raw_index.view(_index_form_to_dtype[form.index])
         )
 
+        if length is None:
+            length = len(index)
+        elif length != len(index):
+            raise ValueError(
+                "IndexedArray length mismatch: expected {0}, observed {1}".format(
+                    length, len(index)
+                )
+                + ak._util.exception_suffix(__file__)
+            )
+
         content = _form_to_layout(
             form.content,
             container,
@@ -3269,6 +3299,16 @@ def _form_to_layout(
             raw_index.view(_index_form_to_dtype[form.index])
         )
 
+        if length is None:
+            length = len(index)
+        elif length != len(index):
+            raise ValueError(
+                "IndexedOptionArray length mismatch: expected {0}, observed {1}".format(
+                    length, len(index)
+                )
+                + ak._util.exception_suffix(__file__)
+            )
+
         content = _form_to_layout(
             form.content,
             container,
@@ -3301,6 +3341,16 @@ def _form_to_layout(
             raw_stops.view(_index_form_to_dtype[form.stops])
         )
 
+        if length is None:
+            length = len(starts)
+        elif length != len(starts):
+            raise ValueError(
+                "ListArray length mismatch: expected {0}, observed {1}".format(
+                    length, len(starts)
+                )
+                + ak._util.exception_suffix(__file__)
+            )
+
         array_starts = numpy.asarray(starts)
         array_stops = numpy.asarray(stops)[: len(array_starts)]
         array_stops = array_stops[array_starts != array_stops]
@@ -3328,6 +3378,16 @@ def _form_to_layout(
             raw_offsets.view(_index_form_to_dtype[form.offsets])
         )
 
+        if length is None:
+            length = len(offsets) - 1
+        elif length != len(offsets) - 1:
+            raise ValueError(
+                "ListOffsetArray length mismatch: expected {0}, observed {1}".format(
+                    length, len(offsets) - 1
+                )
+                + ak._util.exception_suffix(__file__)
+            )
+
         content = _form_to_layout(
             form.content,
             container,
@@ -3354,7 +3414,11 @@ def _form_to_layout(
             dtype, inner_shape = dtype_inner_shape, ()
         else:
             dtype, inner_shape = dtype_inner_shape.subdtype
-        shape = (-1,) + inner_shape
+
+        if length is None:
+            shape = (-1,) + inner_shape
+        else:
+            shape = (length,) + inner_shape
 
         array = raw_array.view(dtype).reshape(shape)
 
@@ -3386,6 +3450,14 @@ def _form_to_layout(
 
         if length is None:
             length = minlength
+        elif minlength is not None and length > minlength:
+            raise ValueError(
+                "RecordArray length mismatch: expected {0}, minimum content is {1}".format(
+                    length, minlength
+                )
+                + ak._util.exception_suffix(__file__)
+            )
+
         return ak.layout.RecordArray(
             contents, None if form.istuple else keys, length, identities, parameters,
         )
@@ -3426,6 +3498,16 @@ def _form_to_layout(
             raw_index.view(_index_form_to_dtype[form.index])
         )
 
+        if length is None:
+            length = len(tags)
+        elif length != len(tags):
+            raise ValueError(
+                "UnionArray length mismatch: expected {0}, observed {1}".format(
+                    length, len(tags)
+                )
+                + ak._util.exception_suffix(__file__)
+            )
+
         contents = []
         for i, content_form in enumerate(form.contents):
             mine = numpy.array(index)[numpy.equal(tags, i)]
@@ -3635,7 +3717,7 @@ def kf(**v):
                     + ak._util.exception_suffix(__file__)
                 ),
                 "1.1.0",
-                "January 1, 2021",
+                "February 1, 2021",
             )
 
         args = (form, container, str(partition_start), key_format, length)
@@ -3736,7 +3818,7 @@ def to_arrayset(
             is sorted or lookup performance depends on alphabetical order.
 
     **Deprecated:** This will be removed in `awkward>=1.1.0` (target date:
-    January 1, 2021). Use #ak.to_buffers instead: the arguments and return
+    February 1, 2021). Use #ak.to_buffers instead: the arguments and return
     values have changed.
 
     Decomposes an Awkward Array into a Form and a collection of arrays, so
@@ -3845,7 +3927,7 @@ def to_arrayset(
             + ak._util.exception_suffix(__file__)
         ),
         "1.1.0",
-        "January 1, 2021",
+        "February 1, 2021",
     )
 
     layout = to_layout(array, allow_record=False, allow_other=False)
@@ -3980,7 +4062,7 @@ def from_arrayset(
             high-level.
 
     **Deprecated:** This will be removed in `awkward>=1.1.0` (target date:
-    January 1, 2021). Use #ak.from_buffers instead: the arguments have changed.
+    February 1, 2021). Use #ak.from_buffers instead: the arguments have changed.
 
     Reconstructs an Awkward Array from a Form and a collection of arrays, so
     that data can be losslessly read from file formats and storage devices that
@@ -4019,7 +4101,7 @@ def from_arrayset(
             + ak._util.exception_suffix(__file__)
         ),
         "1.1.0",
-        "January 1, 2021",
+        "February 1, 2021",
     )
 
     if num_partitions is None:
diff --git a/src/libawkward/array/RegularArray.cpp b/src/libawkward/array/RegularArray.cpp
index a9f1bef99f..c63aee7b78 100644
--- a/src/libawkward/array/RegularArray.cpp
+++ b/src/libawkward/array/RegularArray.cpp
@@ -474,6 +474,10 @@ namespace awkward {
     std::stringstream out;
     out << indent << pre << "<" << classname() << " size=\"" << size_
         << "\">\n";
+    if (size_ == 0) {
+      out << indent << pre << "<" << classname() << " length=\"" << length_
+          << "\">\n";
+    }
     if (identities_.get() != nullptr) {
       out << identities_.get()->tostring_part(
                indent + std::string("    "), "", "\n");
diff --git a/tests/test_0384-lazy-arrayset.py b/tests/test_0384-lazy-arrayset.py
index c13ec64662..0cfccd86a9 100644
--- a/tests/test_0384-lazy-arrayset.py
+++ b/tests/test_0384-lazy-arrayset.py
@@ -66,7 +66,9 @@ def test_lazy_buffers():
 
     canary = Canary()
     key_format = "kitty-{form_key}-{attribute}"
-    form, length, container = ak.to_buffers(array, container=canary, key_format=key_format)
+    form, length, container = ak.to_buffers(
+        array, container=canary, key_format=key_format
+    )
     assert not any(op[0] == "get" for op in canary.ops)
     canary.ops = []
 
@@ -115,7 +117,10 @@ def test_lazy_buffers():
     cache.clear()
 
     assert ak.to_list(out.masked) == [None, 4, 4]
-    assert set(canary.ops) == {("get", "kitty-node17-index"), ("get", "kitty-node18-data")}
+    assert set(canary.ops) == {
+        ("get", "kitty-node17-index"),
+        ("get", "kitty-node18-data"),
+    }
     assert set(cache) == {"hello", "hello(kitty-node17-virtual)"}
     canary.ops = []
     cache.clear()