Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update type-parser for v2 #1514

Merged
merged 13 commits into from
Jun 23, 2022
Merged
283 changes: 0 additions & 283 deletions src/awkward/_typeparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,286 +316,3 @@ def toast_v1(ptnode, highlevel, categorical):
def from_datashape_v1(typestr, highlevel=False):
parseobj = Lark_StandAlone(transformer=TreeToJson())
return toast_v1(parseobj.parse(typestr), highlevel, False)


def toast(ptnode, highlevel, categorical):
if ptnode.__class__.__name__ == "Token":
return ptnode.value

elif ptnode.data == "start":
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "input":
assert len(ptnode.children) == 1
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "predefined_typestr":
if ptnode.children[0] == "string":
parms = {"__array__": "string"}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.ListType(
ak._v2.types.NumpyType(
"uint8", parameters={"__array__": "char"}, typestr="char"
),
parameters=parms,
typestr="string",
)
elif ptnode.children[0] == "char":
parms = {"__array__": "char"}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.NumpyType("uint8", parameters=parms, typestr="char")
elif ptnode.children[0] == "byte":
parms = {"__array__": "byte"}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.NumpyType("uint8", parameters=parms, typestr="byte")
elif ptnode.children[0] == "bytes":
parms = {"__array__": "bytestring"}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.ListType(
ak._v2.types.NumpyType(
"uint8", parameters={"__array__": "byte"}, typestr="byte"
),
parameters=parms,
typestr="bytes",
)
else:
raise AssertionError(f"unhandled typestring {ptnode.children[0]}")

elif ptnode.data == "primitive":
if len(ptnode.children) == 1:
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.NumpyType(
toast(ptnode.children[0], highlevel, False), parameters=parms
)
elif len(ptnode.children) == 2:
parms = toast(ptnode.children[1], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.NumpyType(
toast(ptnode.children[0], highlevel, categorical),
parms,
)
else:
raise AssertionError("unhandled NumpyType node")

elif ptnode.data == "categories":
assert highlevel is True
return toast(ptnode.children[0], highlevel, True)

elif ptnode.data == "unknown":
if len(ptnode.children) == 0:
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.UnknownType(parameters=parms)
elif len(ptnode.children) == 1:
parms = toast(ptnode.children[0], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.UnknownType(parameters=parms)
else:
raise AssertionError("unhandled UnknownType node")

elif ptnode.data == "listtype":
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "list_single":
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.ListType(
toast(ptnode.children[0], highlevel, False), parameters=parms
)

elif ptnode.data == "list_parm":
parms = toast(ptnode.children[1], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.ListType(
toast(ptnode.children[0], highlevel, categorical), parms
)

elif ptnode.data == "uniontype":
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "union_single":
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_list = []
for node in ptnode.children:
content_list.append(toast(node, highlevel, False))
return ak._v2.types.UnionType(content_list, parameters=parms)

elif ptnode.data == "union_parm":
parms = toast(ptnode.children[-1], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_list = []
for node in ptnode.children[:-1]:
content_list.append(toast(node, highlevel, False))
return ak._v2.types.UnionType(content_list, parms)

elif ptnode.data == "optiontype":
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "option_single":
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.OptionType(
toast(ptnode.children[0], highlevel, False), parameters=parms
)

elif ptnode.data == "option_parm":
parms = toast(ptnode.children[1], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.OptionType(
toast(ptnode.children[0], highlevel, False),
parameters=parms,
)

elif ptnode.data == "option_highlevel":
assert highlevel
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.OptionType(
toast(ptnode.children[0], highlevel, False), parameters=parms
)

elif ptnode.data == "record":
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "record_tuple":
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_list = []
for node in ptnode.children:
content_list.append(toast(node, highlevel, categorical))
return ak._v2.types.RecordType(content_list, None, parameters=parms)

elif ptnode.data == "record_dict":
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_types = []
content_keys = []
for i in range(0, len(ptnode.children), 2):
content_keys.append(ptnode.children[i])
content_types.append(toast(ptnode.children[i + 1], highlevel, categorical))
return ak._v2.types.RecordType(content_types, content_keys, parameters=parms)

elif ptnode.data == "record_tuple_param":
parms = toast(ptnode.children[-1], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_list = []
for node in ptnode.children[:-1]:
content_list.append(toast(node, highlevel, False))
return ak._v2.types.RecordType(content_list, None, parameters=parms)

elif ptnode.data == "record_struct":
parms = toast(ptnode.children[-1], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_list = []
content_keys = []
for node in ptnode.children[:-1]:
if isinstance(node, str):
content_keys.append(node)
else:
content_list.append(toast(node, highlevel, False))
return ak._v2.types.RecordType(
content_list,
content_keys,
parameters=parms,
)

elif ptnode.data == "record_highlevel":
assert highlevel
parms = {"__record__": ptnode.children[0]}
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_list = []
content_keys = []
for node in ptnode.children[1:]:
if isinstance(node, str):
content_keys.append(node)
else:
content_list.append(toast(node, highlevel, False))
return ak._v2.types.RecordType(
content_list,
content_keys,
parameters=parms,
)

elif ptnode.data == "regular":
assert (len(ptnode.children)) == 1
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "regular_inparm":
assert len(ptnode.children) == 2
if highlevel:
return ak._v2.types.ArrayType(
toast(ptnode.children[1], highlevel, categorical), ptnode.children[0]
)
return ak._v2.types.RegularType(
toast(ptnode.children[1], highlevel, categorical), ptnode.children[0]
)

elif ptnode.data == "regular_outparm":
assert len(ptnode.children) == 3
parms = toast(ptnode.children[2], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.RegularType(
toast(ptnode.children[1], highlevel, False),
ptnode.children[0],
parms,
)

elif ptnode.data == "def_option":
assert len(ptnode.children) == 1
return ptnode.children[0]

elif ptnode.data == "options":
assert len(ptnode.children) == 1
return toast(ptnode.children[0], highlevel, categorical)

else:
raise AssertionError("unhandled node")


def from_datashape(typestr, highlevel=False):
parseobj = Lark_StandAlone(transformer=TreeToJson())
return toast(parseobj.parse(typestr), highlevel, False)
4 changes: 1 addition & 3 deletions src/awkward/_v2/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

from awkward._typeparser.parser import from_datashape # noqa: F401

from awkward._v2.types.type import Type # noqa: F401
from awkward._v2.types.type import Type, from_datashape # noqa: F401
from awkward._v2.types.unknowntype import UnknownType # noqa: F401
from awkward._v2.types.numpytype import NumpyType # noqa: F401
from awkward._v2.types.regulartype import RegularType # noqa: F401
Expand Down
Loading