Skip to content

Commit

Permalink
Revamping the to_json/from_json interface. (#1449)
Browse files Browse the repository at this point in the history
* First, get rid of the 'io' submodule.

* Put all of the ._to_json functionality into ._to_list.

* Tests pass again.

* Remove old '_to_json' methods, which are now dead code.

* Black formating.

* Easier error string match.

* ak._v2.to_json now takes ak._v2.to_json_file's role, dropping the now-unnecessary function.

* Test all of the 'ak._v2.to_json' options.

* Satisfy pylint.

* Fix file -> openfile bug, introduced in last commit.

* Implemented 'ak._ext.fromjsonobj' to parse JSON from a file-like object.

* Implemented JSON from file-like objects for no schema. (No tests yet.)

* Satisfy pylint (we do use a 'with' statement, later on).
  • Loading branch information
jpivarski authored May 3, 2022
1 parent cf42ab4 commit 3b225f9
Show file tree
Hide file tree
Showing 32 changed files with 1,186 additions and 739 deletions.
64 changes: 48 additions & 16 deletions include/awkward/io/json.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,14 +329,13 @@ namespace awkward {
/// ArrayBuilder.
///
/// @param source Null-terminated string containing any valid JSON data.
/// @param options Configuration options for building an array with an
/// ArrayBuilder.
/// @param nan_string user-defined string for a not-a-number (NaN) value
/// representation in JSON format
/// @param infinity_string user-defined string for a positive infinity
/// representation in JSON format
/// @param minus_infinity_string user-defined string for a negative
/// infinity representation in JSON format
/// @param builder To build the array.
/// @param nan_string User-defined string for a not-a-number (NaN) value
/// representation in JSON format.
/// @param infinity_string User-defined string for a positive infinity
/// representation in JSON format.
/// @param minus_infinity_string User-defined string for a negative
/// infinity representation in JSON format.
LIBAWKWARD_EXPORT_SYMBOL int64_t
FromJsonString(const char* source,
ArrayBuilder& builder,
Expand All @@ -348,15 +347,14 @@ namespace awkward {
/// ArrayBuilder.
///
/// @param source C file handle to a file containing any valid JSON data.
/// @param options Configuration options for building an array with an
/// ArrayBuilder.
/// @param builder To build the array.
/// @param buffersize Number of bytes for an intermediate buffer.
/// @param nan_string user-defined string for a not-a-number (NaN) value
/// representation in JSON format
/// @param infinity_string user-defined string for a positive infinity
/// representation in JSON format
/// @param minus_infinity_string user-defined string for a negative
/// infinity representation in JSON format
/// @param nan_string User-defined string for a not-a-number (NaN) value
/// representation in JSON format.
/// @param infinity_string User-defined string for a positive infinity
/// representation in JSON format.
/// @param minus_infinity_string User-defined string for a negative
/// infinity representation in JSON format.
LIBAWKWARD_EXPORT_SYMBOL int64_t
FromJsonFile(FILE* source,
ArrayBuilder& builder,
Expand All @@ -365,6 +363,40 @@ namespace awkward {
const char* infinity_string = nullptr,
const char* minus_infinity_string = nullptr);

/// @class FileLikeObject
///
/// @brief Abstract class to represent a file-like object, something with
/// a `read(num_bytes)` method. Satisfies RapidJSON's Stream interface.
class FileLikeObject {
public:
virtual int64_t read(int64_t num_bytes, char* buffer) = 0;
};

/// @brief Parses a JSON-encoded file-like object using an
/// ArrayBuilder.
///
/// @param source File-like object wrapped with the FileLikeObject
/// abstraction (borrowed reference).
/// @param builder To build the array.
/// @param buffersize Number of bytes for an intermediate buffer.
/// @param read_one If true, read only one JSON object (with an error if
/// there's more); otherwise, read a stream of concatenated objects (may
/// be separated by newlines, but we don't check).
/// @param nan_string User-defined string for a not-a-number (NaN) value
/// representation in JSON format.
/// @param infinity_string User-defined string for a positive infinity
/// representation in JSON format.
/// @param minus_infinity_string User-defined string for a negative
/// infinity representation in JSON format.
LIBAWKWARD_EXPORT_SYMBOL int64_t
FromJsonObject(FileLikeObject* source,
ArrayBuilder& builder,
int64_t buffersize,
bool read_one,
const char* nan_string = nullptr,
const char* infinity_string = nullptr,
const char* minus_infinity_string = nullptr);

}

#endif // AWKWARD_IO_JSON_H_
3 changes: 3 additions & 0 deletions include/awkward/python/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ make_fromjson(py::module& m, const std::string& name);
void
make_fromjsonfile(py::module& m, const std::string& name);

void
make_fromjsonobj(py::module& m, const std::string& name);

void
make_uproot_issue_90(py::module& m);

Expand Down
1 change: 0 additions & 1 deletion src/awkward/_v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import awkward._v2.behaviors.string # noqa: F401

# operations
from awkward._v2.operations.io import * # noqa: F401, F403
from awkward._v2.operations.convert import * # noqa: F401, F403
from awkward._v2.operations.describe import * # noqa: F401, F403
from awkward._v2.operations.structure import * # noqa: F401, F403
Expand Down
34 changes: 5 additions & 29 deletions src/awkward/_v2/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,13 +633,15 @@ def packed(self):
self._nplike,
)

def _to_list(self, behavior):
out = self._to_list_custom(behavior)
def _to_list(self, behavior, json_conversions):
out = self._to_list_custom(behavior, json_conversions)
if out is not None:
return out

mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)[: self._length]
out = self._content._getitem_range(slice(0, self._length))._to_list(behavior)
out = self._content._getitem_range(slice(0, self._length))._to_list(
behavior, json_conversions
)

for i, isvalid in enumerate(mask):
if not isvalid:
Expand All @@ -660,29 +662,3 @@ def _to_nplike(self, nplike):
parameters=self._parameters,
nplike=nplike,
)

def _to_json(
self,
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
):
out = self._to_json_custom()
if out is not None:
return out

mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)[: self._length]
content = self._content._to_json(
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
)
out = [None] * self._length
for i, isvalid in enumerate(mask):
if isvalid:
out[i] = content[i]
return out
36 changes: 5 additions & 31 deletions src/awkward/_v2/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1014,13 +1014,15 @@ def packed(self):
self._nplike,
)

def _to_list(self, behavior):
out = self._to_list_custom(behavior)
def _to_list(self, behavior, json_conversions):
out = self._to_list_custom(behavior, json_conversions)
if out is not None:
return out

mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)
out = self._content._getitem_range(slice(0, len(mask)))._to_list(behavior)
out = self._content._getitem_range(slice(0, len(mask)))._to_list(
behavior, json_conversions
)

for i, isvalid in enumerate(mask):
if not isvalid:
Expand All @@ -1039,31 +1041,3 @@ def _to_nplike(self, nplike):
parameters=self._parameters,
nplike=nplike,
)

def _to_json(
self,
behavior,
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
):
out = self._to_list_custom(behavior)
if out is not None:
return out

mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)
content = self._content._to_json(
behavior,
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
)
out = [None] * self._mask.length
for i, isvalid in enumerate(mask):
if isvalid:
out[i] = content[i]
return out
151 changes: 108 additions & 43 deletions src/awkward/_v2/contents/content.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

import numbers
import math
import copy
from collections.abc import Iterable

Expand Down Expand Up @@ -1331,19 +1333,123 @@ def recursively_apply(
},
)

def to_json(
self,
nan_string=None,
infinity_string=None,
minus_infinity_string=None,
complex_record_fields=None,
convert_bytes=None,
behavior=None,
):
if complex_record_fields is None:
complex_real_string = None
complex_imag_string = None
elif (
isinstance(complex_record_fields, tuple)
and len(complex_record_fields) == 2
and isinstance(complex_record_fields[0], str)
and isinstance(complex_record_fields[1], str)
):
complex_real_string, complex_imag_string = complex_record_fields

return self.packed()._to_list(
behavior,
{
"nan_string": nan_string,
"infinity_string": infinity_string,
"minus_infinity_string": minus_infinity_string,
"complex_real_string": complex_real_string,
"complex_imag_string": complex_imag_string,
"convert_bytes": convert_bytes,
},
)

def tolist(self, behavior=None):
return self.to_list(behavior)

def to_list(self, behavior=None):
return self.packed()._to_list(behavior)
return self.packed()._to_list(behavior, None)

def _to_list_custom(self, behavior):
def _to_list_custom(self, behavior, json_conversions):
cls = ak._v2._util.arrayclass(self, behavior)
if cls.__getitem__ is not ak._v2.highlevel.Array.__getitem__:
array = cls(self)
out = [None] * self.length
for i in range(self.length):
out[i] = array[i]

if json_conversions is not None:
convert_bytes = json_conversions["convert_bytes"]
if convert_bytes is not None:
for i, x in enumerate(out):
if isinstance(x, bytes):
out[i] = convert_bytes(x)

outimag = None
complex_real_string = json_conversions["complex_real_string"]
complex_imag_string = json_conversions["complex_imag_string"]
if complex_real_string is not None:
Real = numbers.Real
Complex = numbers.Complex
if any(
not isinstance(x, Real) and isinstance(x, Complex) for x in out
):
outimag = [None] * len(out)
for i, x in enumerate(out):
if isinstance(x, Complex):
out[i] = x.real
outimag[i] = x.imag
else:
out[i] = x
outimag[i] = None

filters = []

nan_string = json_conversions["nan_string"]
if nan_string is not None:
isnan = math.isnan
filters.append(lambda x: nan_string if isnan(x) else x)

infinity_string = json_conversions["infinity_string"]
if infinity_string is not None:
inf = float("inf")
filters.append(lambda x: infinity_string if x == inf else x)

minus_infinity_string = json_conversions["minus_infinity_string"]
if minus_infinity_string is not None:
minf = float("-inf")
filters.append(lambda x: minus_infinity_string if x == minf else x)

if len(filters) == 1:
f0 = filters[0]
for i, x in enumerate(out):
out[i] = f0(x)
if outimag is not None:
for i, x in enumerate(outimag):
outimag[i] = f0(x)
elif len(filters) == 2:
f0 = filters[0]
f1 = filters[1]
for i, x in enumerate(out):
out[i] = f1(f0(x))
if outimag is not None:
for i, x in enumerate(outimag):
outimag[i] = f1(f0(x))
elif len(filters) == 3:
f0 = filters[0]
f1 = filters[1]
f2 = filters[2]
for i, x in enumerate(out):
out[i] = f2(f1(f0(x)))
if outimag is not None:
for i, x in enumerate(outimag):
outimag[i] = f2(f1(f0(x)))

if outimag is not None:
for i, (real, imag) in enumerate(zip(out, outimag)):
out[i] = {complex_real_string: real, complex_imag_string: imag}

return out

def flatten(self, axis=1, depth=0):
Expand All @@ -1356,47 +1462,6 @@ def to_backend(self, backend):
else:
return self._to_nplike(ak._v2._util.regularize_backend(backend))

def _to_json_custom(self):
cls = ak._v2._util.arrayclass(self, None)
if cls.__getitem__ is not ak._v2.highlevel.Array.__getitem__:
array = cls(self)
out = [None] * self.length
for i in range(self.length):
out[i] = array[i]
return out

def tojson(
self,
nan_string=None,
infinity_string=None,
minus_infinity_string=None,
complex_real_string=None,
complex_imag_string=None,
):
return self.to_json(
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
)

def to_json(
self,
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
):
return self.packed()._to_json(
nan_string,
infinity_string,
minus_infinity_string,
complex_real_string,
complex_imag_string,
)

def withparameter(self, key, value):
out = copy.copy(self)

Expand Down
Loading

0 comments on commit 3b225f9

Please sign in to comment.