Skip to content

Commit

Permalink
Convert Arrow <--> Parquet, and hence Awkward <--> Parquet. (#343)
Browse files Browse the repository at this point in the history
* Convert Arrow <--> Parquet, and hence Awkward <--> Parquet.

* Completely implmented Parquet reading and writing, with tests of eagerness and laziness. Added 'metadata' on ak.Array and ak.Record (for VirtualArray cache).

* Add old OAMap Parquet samples and remember to pytest.importorskip.

* Add an 'explode_records' option to make it easier to write structured data.
  • Loading branch information
jpivarski authored Jul 17, 2020
1 parent 833eb49 commit f5d3282
Show file tree
Hide file tree
Showing 20 changed files with 536 additions and 8 deletions.
6 changes: 3 additions & 3 deletions src/awkward1/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,16 +334,16 @@ def behaviorof(*arrays):
return behavior


def wrap(content, behavior):
def wrap(content, behavior, metadata=None):
import awkward1.highlevel

if isinstance(
content, (awkward1.layout.Content, awkward1.partition.PartitionedArray)
):
return awkward1.highlevel.Array(content, behavior=behavior)
return awkward1.highlevel.Array(content, behavior=behavior, metadata=metadata)

elif isinstance(content, awkward1.layout.Record):
return awkward1.highlevel.Record(content, behavior=behavior)
return awkward1.highlevel.Record(content, behavior=behavior, metadata=metadata)

else:
return content
Expand Down
50 changes: 46 additions & 4 deletions src/awkward1/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@

try:
from collections.abc import Iterable
from collections.abc import MutableMapping
except ImportError:
from collections import Iterable
from collections import MutableMapping

import numpy

Expand Down Expand Up @@ -38,6 +40,9 @@ class Array(
with_name (None or str): Gives tuples and records a name that can be
used to override their behavior (see below).
check_valid (bool): If True, verify that the #layout is valid.
metadata (None or MutableMapping): Optional metadata, attached to this
array, which persists through `__getitem__` but not any other
operations.
High-level array that can contain data of any type.
Expand Down Expand Up @@ -201,7 +206,9 @@ class Array(
derivatives elementwise on each numeric value in an ak.Array.
"""

def __init__(self, data, behavior=None, with_name=None, check_valid=False):
def __init__(
self, data, behavior=None, with_name=None, check_valid=False, metadata=None
):
if isinstance(
data, (awkward1.layout.Content, awkward1.partition.PartitionedArray)
):
Expand Down Expand Up @@ -240,6 +247,8 @@ def __init__(self, data, behavior=None, with_name=None, check_valid=False):
if check_valid:
awkward1.operations.describe.validity_error(self, exception=True)

self.metadata = metadata

@property
def layout(self):
"""
Expand Down Expand Up @@ -320,6 +329,17 @@ def behavior(self, behavior):
else:
raise TypeError("behavior must be None or a dict")

@property
def metadata(self):
return self._metadata

@metadata.setter
def metadata(self, value):
if value is None or isinstance(value, MutableMapping):
self._metadata = value
else:
raise TypeError("metadata must be None or a MutableMapping")

class Mask(object):
def __init__(self, array, valid_when):
self._array = array
Expand Down Expand Up @@ -857,7 +877,9 @@ def __getitem__(self, where):
acting at the last level, while the higher levels of the indexer all
have the same dimension as the array being indexed.
"""
return awkward1._util.wrap(self._layout[where], self._behavior)
return awkward1._util.wrap(
self._layout[where], self._behavior, metadata=self._metadata
)

def __setitem__(self, where, what):
"""
Expand Down Expand Up @@ -1332,6 +1354,9 @@ class Record(awkward1._connect._numpy.NDArrayOperatorsMixin):
with_name (None or str): Gives the record type a name that can be
used to override its behavior (see below).
check_valid (bool): If True, verify that the #layout is valid.
metadata (None or MutableMapping): Optional metadata, attached to this
array, which persists through `__getitem__` but not any other
operations.
High-level record that can contain fields of any type.
Expand All @@ -1347,7 +1372,9 @@ class Record(awkward1._connect._numpy.NDArrayOperatorsMixin):
See also #ak.Array and #ak.behavior.
"""

def __init__(self, data, behavior=None, with_name=None, check_valid=False):
def __init__(
self, data, behavior=None, with_name=None, check_valid=False, metadata=None
):
if isinstance(data, awkward1.layout.Record):
layout = data
elif isinstance(data, Record):
Expand Down Expand Up @@ -1379,6 +1406,8 @@ def __init__(self, data, behavior=None, with_name=None, check_valid=False):
if check_valid:
awkward1.operations.describe.validity_error(self, exception=True)

self.metadata = metadata

@property
def layout(self):
"""
Expand Down Expand Up @@ -1453,6 +1482,17 @@ def behavior(self, behavior):
else:
raise TypeError("behavior must be None or a dict")

@property
def metadata(self):
return self._metadata

@metadata.setter
def metadata(self, value):
if value is None or isinstance(value, MutableMapping):
self._metadata = value
else:
raise TypeError("metadata must be None or a MutableMapping")

def tolist(self):
"""
Converts this Record into Python objects.
Expand Down Expand Up @@ -1562,7 +1602,9 @@ def __getitem__(self, where):
>>> record["y", 1]
2
"""
return awkward1._util.wrap(self._layout[where], self._behavior)
return awkward1._util.wrap(
self._layout[where], self._behavior, metadata=self._metadata
)

def __setitem__(self, where, what):
"""
Expand Down
Loading

0 comments on commit f5d3282

Please sign in to comment.