Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revamping the to_json/from_json interface. #1449

Merged
merged 13 commits into from
May 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 48 additions & 16 deletions include/awkward/io/json.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,14 +329,13 @@ namespace awkward {
/// ArrayBuilder.
///
/// @param source Null-terminated string containing any valid JSON data.
/// @param options Configuration options for building an array with an
/// ArrayBuilder.
/// @param nan_string user-defined string for a not-a-number (NaN) value
/// representation in JSON format
/// @param infinity_string user-defined string for a positive infinity
/// representation in JSON format
/// @param minus_infinity_string user-defined string for a negative
/// infinity representation in JSON format
/// @param builder To build the array.
/// @param nan_string User-defined string for a not-a-number (NaN) value
/// representation in JSON format.
/// @param infinity_string User-defined string for a positive infinity
/// representation in JSON format.
/// @param minus_infinity_string User-defined string for a negative
/// infinity representation in JSON format.
LIBAWKWARD_EXPORT_SYMBOL int64_t
FromJsonString(const char* source,
ArrayBuilder& builder,
Expand All @@ -348,15 +347,14 @@ namespace awkward {
/// ArrayBuilder.
///
/// @param source C file handle to a file containing any valid JSON data.
/// @param options Configuration options for building an array with an
/// ArrayBuilder.
/// @param builder To build the array.
/// @param buffersize Number of bytes for an intermediate buffer.
/// @param nan_string user-defined string for a not-a-number (NaN) value
/// representation in JSON format
/// @param infinity_string user-defined string for a positive infinity
/// representation in JSON format
/// @param minus_infinity_string user-defined string for a negative
/// infinity representation in JSON format
/// @param nan_string User-defined string for a not-a-number (NaN) value
/// representation in JSON format.
/// @param infinity_string User-defined string for a positive infinity
/// representation in JSON format.
/// @param minus_infinity_string User-defined string for a negative
/// infinity representation in JSON format.
LIBAWKWARD_EXPORT_SYMBOL int64_t
FromJsonFile(FILE* source,
ArrayBuilder& builder,
Expand All @@ -365,6 +363,40 @@ namespace awkward {
const char* infinity_string = nullptr,
const char* minus_infinity_string = nullptr);

/// @class FileLikeObject
///
/// @brief Abstract class to represent a file-like object, something with
/// a `read(num_bytes)` method. Satisfies RapidJSON's Stream interface.
class FileLikeObject {
public:
virtual int64_t read(int64_t num_bytes, char* buffer) = 0;
};

/// @brief Parses a JSON-encoded file-like object using an
/// ArrayBuilder.
///
/// @param source File-like object wrapped with the FileLikeObject
/// abstraction (borrowed reference).
/// @param builder To build the array.
/// @param buffersize Number of bytes for an intermediate buffer.
/// @param read_one If true, read only one JSON object (with an error if
/// there's more); otherwise, read a stream of concatenated objects (may
/// be separated by newlines, but we don't check).
/// @param nan_string User-defined string for a not-a-number (NaN) value
/// representation in JSON format.
/// @param infinity_string User-defined string for a positive infinity
/// representation in JSON format.
/// @param minus_infinity_string User-defined string for a negative
/// infinity representation in JSON format.
LIBAWKWARD_EXPORT_SYMBOL int64_t
FromJsonObject(FileLikeObject* source,
ArrayBuilder& builder,
int64_t buffersize,
bool read_one,
const char* nan_string = nullptr,
const char* infinity_string = nullptr,
const char* minus_infinity_string = nullptr);

}

#endif // AWKWARD_IO_JSON_H_
3 changes: 3 additions & 0 deletions include/awkward/python/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ make_fromjson(py::module& m, const std::string& name);
void
make_fromjsonfile(py::module& m, const std::string& name);

void
make_fromjsonobj(py::module& m, const std::string& name);

void
make_uproot_issue_90(py::module& m);

Expand Down
1 change: 0 additions & 1 deletion src/awkward/_v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import awkward._v2.behaviors.string # noqa: F401

# operations
from awkward._v2.operations.io import * # noqa: F401, F403
from awkward._v2.operations.convert import * # noqa: F401, F403
from awkward._v2.operations.describe import * # noqa: F401, F403
from awkward._v2.operations.structure import * # noqa: F401, F403
Expand Down
34 changes: 5 additions & 29 deletions src/awkward/_v2/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,13 +628,15 @@ def packed(self):
self._nplike,
)

def _to_list(self, behavior):
out = self._to_list_custom(behavior)
def _to_list(self, behavior, json_conversions):
out = self._to_list_custom(behavior, json_conversions)
if out is not None:
return out

mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)[: self._length]
out = self._content._getitem_range(slice(0, self._length))._to_list(behavior)
out = self._content._getitem_range(slice(0, self._length))._to_list(
behavior, json_conversions
)

for i, isvalid in enumerate(mask):
if not isvalid:
Expand All @@ -655,29 +657,3 @@ def _to_nplike(self, nplike):
parameters=self._parameters,
nplike=nplike,
)

def _to_json(
self,
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
):
out = self._to_json_custom()
if out is not None:
return out

mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)[: self._length]
content = self._content._to_json(
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
)
out = [None] * self._length
for i, isvalid in enumerate(mask):
if isvalid:
out[i] = content[i]
return out
36 changes: 5 additions & 31 deletions src/awkward/_v2/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,13 +1009,15 @@ def packed(self):
self._nplike,
)

def _to_list(self, behavior):
out = self._to_list_custom(behavior)
def _to_list(self, behavior, json_conversions):
out = self._to_list_custom(behavior, json_conversions)
if out is not None:
return out

mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)
out = self._content._getitem_range(slice(0, len(mask)))._to_list(behavior)
out = self._content._getitem_range(slice(0, len(mask)))._to_list(
behavior, json_conversions
)

for i, isvalid in enumerate(mask):
if not isvalid:
Expand All @@ -1034,31 +1036,3 @@ def _to_nplike(self, nplike):
parameters=self._parameters,
nplike=nplike,
)

def _to_json(
self,
behavior,
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
):
out = self._to_list_custom(behavior)
if out is not None:
return out

mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)
content = self._content._to_json(
behavior,
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
)
out = [None] * self._mask.length
for i, isvalid in enumerate(mask):
if isvalid:
out[i] = content[i]
return out
151 changes: 108 additions & 43 deletions src/awkward/_v2/contents/content.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

import numbers
import math
import copy
from collections.abc import Iterable

Expand Down Expand Up @@ -1329,19 +1331,123 @@ def recursively_apply(
},
)

def to_json(
self,
nan_string=None,
infinity_string=None,
minus_infinity_string=None,
complex_record_fields=None,
convert_bytes=None,
behavior=None,
):
if complex_record_fields is None:
complex_real_string = None
complex_imag_string = None
elif (
isinstance(complex_record_fields, tuple)
and len(complex_record_fields) == 2
and isinstance(complex_record_fields[0], str)
and isinstance(complex_record_fields[1], str)
):
complex_real_string, complex_imag_string = complex_record_fields

return self.packed()._to_list(
behavior,
{
"nan_string": nan_string,
"infinity_string": infinity_string,
"minus_infinity_string": minus_infinity_string,
"complex_real_string": complex_real_string,
"complex_imag_string": complex_imag_string,
"convert_bytes": convert_bytes,
},
)

def tolist(self, behavior=None):
return self.to_list(behavior)

def to_list(self, behavior=None):
return self.packed()._to_list(behavior)
return self.packed()._to_list(behavior, None)

def _to_list_custom(self, behavior):
def _to_list_custom(self, behavior, json_conversions):
cls = ak._v2._util.arrayclass(self, behavior)
if cls.__getitem__ is not ak._v2.highlevel.Array.__getitem__:
array = cls(self)
out = [None] * self.length
for i in range(self.length):
out[i] = array[i]

if json_conversions is not None:
convert_bytes = json_conversions["convert_bytes"]
if convert_bytes is not None:
for i, x in enumerate(out):
if isinstance(x, bytes):
out[i] = convert_bytes(x)

outimag = None
complex_real_string = json_conversions["complex_real_string"]
complex_imag_string = json_conversions["complex_imag_string"]
if complex_real_string is not None:
Real = numbers.Real
Complex = numbers.Complex
if any(
not isinstance(x, Real) and isinstance(x, Complex) for x in out
):
outimag = [None] * len(out)
for i, x in enumerate(out):
if isinstance(x, Complex):
out[i] = x.real
outimag[i] = x.imag
else:
out[i] = x
outimag[i] = None

filters = []

nan_string = json_conversions["nan_string"]
if nan_string is not None:
isnan = math.isnan
filters.append(lambda x: nan_string if isnan(x) else x)

infinity_string = json_conversions["infinity_string"]
if infinity_string is not None:
inf = float("inf")
filters.append(lambda x: infinity_string if x == inf else x)

minus_infinity_string = json_conversions["minus_infinity_string"]
if minus_infinity_string is not None:
minf = float("-inf")
filters.append(lambda x: minus_infinity_string if x == minf else x)

if len(filters) == 1:
f0 = filters[0]
for i, x in enumerate(out):
out[i] = f0(x)
if outimag is not None:
for i, x in enumerate(outimag):
outimag[i] = f0(x)
elif len(filters) == 2:
f0 = filters[0]
f1 = filters[1]
for i, x in enumerate(out):
out[i] = f1(f0(x))
if outimag is not None:
for i, x in enumerate(outimag):
outimag[i] = f1(f0(x))
elif len(filters) == 3:
f0 = filters[0]
f1 = filters[1]
f2 = filters[2]
for i, x in enumerate(out):
out[i] = f2(f1(f0(x)))
if outimag is not None:
for i, x in enumerate(outimag):
outimag[i] = f2(f1(f0(x)))

if outimag is not None:
for i, (real, imag) in enumerate(zip(out, outimag)):
out[i] = {complex_real_string: real, complex_imag_string: imag}

return out

def flatten(self, axis=1, depth=0):
Expand All @@ -1354,47 +1460,6 @@ def to_backend(self, backend):
else:
return self._to_nplike(ak._v2._util.regularize_backend(backend))

def _to_json_custom(self):
cls = ak._v2._util.arrayclass(self, None)
if cls.__getitem__ is not ak._v2.highlevel.Array.__getitem__:
array = cls(self)
out = [None] * self.length
for i in range(self.length):
out[i] = array[i]
return out

def tojson(
self,
nan_string=None,
infinity_string=None,
minus_infinity_string=None,
complex_real_string=None,
complex_imag_string=None,
):
return self.to_json(
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
)

def to_json(
self,
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
):
return self.packed()._to_json(
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
)

def withparameter(self, key, value):
out = copy.copy(self)

Expand Down
Loading