Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add new "fields" key to RecordForm #1773

Merged
merged 8 commits into from
Oct 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 34 additions & 22 deletions src/awkward/forms/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
from __future__ import annotations

import json
from typing import Any, Mapping
from collections.abc import Mapping
from typing import Any

import awkward as ak
from awkward import _errors

np = ak.nplikes.NumpyMetadata.instance()

Expand Down Expand Up @@ -65,12 +67,22 @@ def from_dict(input: dict) -> Form:
)

elif input["class"] == "RecordArray":
if isinstance(input["contents"], dict):
# New serialisation
if "fields" in input:
if isinstance(input["contents"], Mapping):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: since this is coming from json.loads or equivalent, we can make a hard assumption that JSON objects are dicts. Being more general doesn't hurt, but it's why the rest of this function is checking isinstance(_, list) and isinstance(_, dict) everywhere.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This warning is different to the rest of the function body, which (as you point out) is much stricter with types.

In reality, this is a high level functions that users can call. My temptation is to relax the constraints from dict to Mapping, and list to sized_iterable. How do you feel about that?

raise _errors.wrap_error(
TypeError("new-style RecordForm contents must not be mappings")
)
contents = [from_dict(content) for content in input["contents"]]
fields = input["fields"]
# Old style record
elif isinstance(input["contents"], dict):
contents = []
fields = []
for key, content in input["contents"].items():
contents.append(from_dict(content))
fields.append(key)
# Old style tuple
else:
contents = [from_dict(content) for content in input["contents"]]
fields = None
Expand Down Expand Up @@ -154,14 +166,14 @@ def from_dict(input: dict) -> Form:
)

elif input["class"] == "VirtualArray":
raise ak._errors.wrap_error(
raise _errors.wrap_error(
ValueError("Awkward 1.x VirtualArrays are not supported")
)

else:
raise ak._errors.wrap_error(
raise _errors.wrap_error(
ValueError(
"Input class: {} was not recognised".format(repr(input["class"]))
"input class: {} was not recognised".format(repr(input["class"]))
)
)

Expand Down Expand Up @@ -265,23 +277,23 @@ class Form:

def _init(self, has_identifier, parameters, form_key):
if not isinstance(has_identifier, bool):
raise ak._errors.wrap_error(
raise _errors.wrap_error(
TypeError(
"{} 'has_identifier' must be of type bool, not {}".format(
type(self).__name__, repr(has_identifier)
)
)
)
if parameters is not None and not isinstance(parameters, dict):
raise ak._errors.wrap_error(
raise _errors.wrap_error(
TypeError(
"{} 'parameters' must be of type dict or None, not {}".format(
type(self).__name__, repr(parameters)
)
)
)
if form_key is not None and not ak._util.isstr(form_key):
raise ak._errors.wrap_error(
raise _errors.wrap_error(
TypeError(
"{} 'form_key' must be of type string or None, not {}".format(
type(self).__name__, repr(form_key)
Expand All @@ -306,7 +318,7 @@ def parameters(self):
@property
def is_identity_like(self):
"""Return True if the content or its non-list descendents are an identity"""
raise ak._errors.wrap_error(NotImplementedError)
raise _errors.wrap_error(NotImplementedError)

def parameter(self, key):
if self._parameters is None:
Expand All @@ -315,31 +327,31 @@ def parameter(self, key):
return self._parameters.get(key)

def purelist_parameter(self, key):
raise ak._errors.wrap_error(NotImplementedError)
raise _errors.wrap_error(NotImplementedError)

@property
def purelist_isregular(self):
raise ak._errors.wrap_error(NotImplementedError)
raise _errors.wrap_error(NotImplementedError)

@property
def purelist_depth(self):
raise ak._errors.wrap_error(NotImplementedError)
raise _errors.wrap_error(NotImplementedError)

@property
def minmax_depth(self):
raise ak._errors.wrap_error(NotImplementedError)
raise _errors.wrap_error(NotImplementedError)

@property
def branch_depth(self):
raise ak._errors.wrap_error(NotImplementedError)
raise _errors.wrap_error(NotImplementedError)

@property
def fields(self):
raise ak._errors.wrap_error(NotImplementedError)
raise _errors.wrap_error(NotImplementedError)

@property
def is_tuple(self):
raise ak._errors.wrap_error(NotImplementedError)
raise _errors.wrap_error(NotImplementedError)

@property
def form_key(self):
Expand Down Expand Up @@ -396,7 +408,7 @@ def select_columns(self, specifier, expand_braces=True):

for item in specifier:
if not ak._util.isstr(item):
raise ak._errors.wrap_error(
raise _errors.wrap_error(
TypeError("a column-selection specifier must be a list of strings")
)

Expand All @@ -417,16 +429,16 @@ def column_types(self):
return self._column_types()

def _columns(self, path, output, list_indicator):
raise ak._errors.wrap_error(NotImplementedError)
raise _errors.wrap_error(NotImplementedError)

def _select_columns(self, index, specifier, matches, output):
raise ak._errors.wrap_error(NotImplementedError)
raise _errors.wrap_error(NotImplementedError)

def _column_types(self):
raise ak._errors.wrap_error(NotImplementedError)
raise _errors.wrap_error(NotImplementedError)

def _to_dict_part(self, verbose, toplevel):
raise ak._errors._errors(NotImplementedError)
raise _errors._errors(NotImplementedError)

def _type(self, typestrs):
raise ak._errors.wrap_error(NotImplementedError)
raise _errors.wrap_error(NotImplementedError)
5 changes: 3 additions & 2 deletions src/awkward/forms/recordform.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,11 @@ def _to_dict_part(self, verbose, toplevel):
content._to_dict_part(verbose, toplevel=False) for content in self._contents
]
if self._fields is not None:
out["contents"] = dict(zip(self._fields, contents_tolist))
out["fields"] = list(self._fields)
else:
out["contents"] = contents_tolist
out["fields"] = None

out["contents"] = contents_tolist
return self._to_dict_extra(out, verbose)

def _type(self, typestrs):
Expand Down
26 changes: 14 additions & 12 deletions tests/test_0025-record-array.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,17 @@ def test_basic():
assert json.loads(ak.forms.form.Form.to_json(recordarray.form)) == (
{
"class": "RecordArray",
"contents": {
"one": {
"fields": ["one", "two", "2", "wonky"],
"contents": [
{
"class": "NumpyArray",
"primitive": "int64",
"inner_shape": [],
"has_identifier": False,
"parameters": {},
"form_key": None,
},
"two": {
{
"class": "ListOffsetArray",
"offsets": "i64",
"content": {
Expand All @@ -67,23 +68,23 @@ def test_basic():
"parameters": {},
"form_key": None,
},
"2": {
{
"class": "NumpyArray",
"primitive": "float64",
"inner_shape": [],
"has_identifier": False,
"parameters": {},
"form_key": None,
},
"wonky": {
{
"class": "NumpyArray",
"primitive": "int64",
"inner_shape": [],
"has_identifier": False,
"parameters": {},
"form_key": None,
},
},
],
"has_identifier": False,
"parameters": {},
"form_key": None,
Expand Down Expand Up @@ -135,16 +136,17 @@ def test_basic():

assert json.loads(ak.forms.form.Form.to_json(recordarray.form)) == {
"class": "RecordArray",
"contents": {
"one": {
"fields": ["one", "two", "2", "wonky"],
"contents": [
{
"class": "NumpyArray",
"primitive": "int64",
"inner_shape": [],
"has_identifier": False,
"parameters": {},
"form_key": None,
},
"two": {
{
"class": "ListOffsetArray",
"offsets": "i64",
"content": {
Expand All @@ -159,23 +161,23 @@ def test_basic():
"parameters": {},
"form_key": None,
},
"2": {
{
"class": "NumpyArray",
"primitive": "float64",
"inner_shape": [],
"has_identifier": False,
"parameters": {},
"form_key": None,
},
"wonky": {
{
"class": "NumpyArray",
"primitive": "int64",
"inner_shape": [],
"has_identifier": False,
"parameters": {},
"form_key": None,
},
},
],
"has_identifier": False,
"parameters": {},
"form_key": None,
Expand Down
20 changes: 12 additions & 8 deletions tests/test_0057-introducing-forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,34 +265,36 @@ def test_forms():
assert ak.forms.from_json(form.to_json()) == form
assert json.loads(form.to_json()) == {
"class": "RecordArray",
"contents": {
"one": {
"fields": ["one", "two"],
"contents": [
{
"class": "NumpyArray",
"inner_shape": [],
"primitive": "float64",
"has_identifier": False,
"parameters": {},
"form_key": None,
},
"two": {
{
"class": "NumpyArray",
"inner_shape": [],
"primitive": "bool",
"has_identifier": False,
"parameters": {},
"form_key": None,
},
},
],
"has_identifier": False,
"parameters": {"hey": ["you"]},
"form_key": "yowzers",
}
assert json.loads(str(form)) == {
"class": "RecordArray",
"contents": {
"one": "float64",
"two": "bool",
},
"fields": ["one", "two"],
"contents": [
"float64",
"bool",
],
"parameters": {"hey": ["you"]},
"form_key": "yowzers",
}
Expand All @@ -308,6 +310,7 @@ def test_forms():
assert ak.forms.from_json(form.to_json()) == form
assert json.loads(form.to_json()) == {
"class": "RecordArray",
"fields": None,
"contents": [
{
"class": "NumpyArray",
Expand All @@ -332,6 +335,7 @@ def test_forms():
}
assert json.loads(str(form)) == {
"class": "RecordArray",
"fields": None,
"contents": ["float64", "bool"],
"parameters": {"hey": ["you"]},
"form_key": "yowzers",
Expand Down
Loading