From 2bcd44b8bacc24fba5fa793e9d84319c851834ac Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 22 Jun 2022 17:43:21 -0500 Subject: [PATCH 01/12] New grammar for v2 type strings; start with JSON again. --- studies/type-parser-2/parser.py | 69 +++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 studies/type-parser-2/parser.py diff --git a/studies/type-parser-2/parser.py b/studies/type-parser-2/parser.py new file mode 100644 index 0000000000..b5b1a838ca --- /dev/null +++ b/studies/type-parser-2/parser.py @@ -0,0 +1,69 @@ +import json + +import lark + +grammar = """ +start: json + + + +json: json_object + | json_array + | ESCAPED_STRING -> string + | SIGNED_NUMBER -> number + | "true" -> true + | "false" -> false + | "null" -> null + +json_array: "[" [json ("," json)*] "]" +json_object: "{" [json_pair ("," json_pair)*] "}" +json_pair: ESCAPED_STRING ":" json + +%import common.ESCAPED_STRING +%import common.SIGNED_NUMBER +%import common.WS + +%ignore WS +""" + +class Transformer: + def start(self, args): + return args[0] + + def json(self, args): + return args[0] + + def json_object(self, args): + return dict(args) + + def json_pair(self, args): + return (json.loads(args[0]), args[1]) + + def json_array(self, args): + return list(args) + + def string(self, args): + return json.loads(args[0]) + + def number(self, args): + try: + return int(args[0]) + except ValueError: + return float(args[0]) + + def true(self, args): + return True + + def false(self, args): + return False + + def null(self, args): + return None + +parser = lark.Lark(grammar, parser="lalr", transformer=Transformer()) + +datashape_string = """ +{"this": ["is", "json", true, false, null, 3.14, 123]} +""" + +print(parser.parse(datashape_string)) From 99c8378749d23b74ef30a2f29051555a4f283263 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 22 Jun 2022 18:54:01 -0500 Subject: [PATCH 02/12] Type.parameter should not return None. --- src/awkward/_v2/types/type.py | 2 + studies/type-parser-2/parser.py | 178 ++++++++++++++++++++-- tests/v2/test_0032-replace-dressedtype.py | 2 +- 3 files changed, 171 insertions(+), 11 deletions(-) diff --git a/src/awkward/_v2/types/type.py b/src/awkward/_v2/types/type.py index 4a2d005286..a728fe0556 100644 --- a/src/awkward/_v2/types/type.py +++ b/src/awkward/_v2/types/type.py @@ -11,6 +11,8 @@ class Type: @property def parameters(self): + if self._parameters is None: + self._parameters = {} return self._parameters def parameter(self, key): diff --git a/studies/type-parser-2/parser.py b/studies/type-parser-2/parser.py index b5b1a838ca..8ce9d003a6 100644 --- a/studies/type-parser-2/parser.py +++ b/studies/type-parser-2/parser.py @@ -2,34 +2,118 @@ import lark -grammar = """ -start: json +import awkward as ak +from awkward._v2.types.numpytype import NumpyType +from awkward._v2.types.unknowntype import UnknownType +from awkward._v2.types.regulartype import RegularType +from awkward._v2.types.listtype import ListType +from awkward._v2.types.recordtype import RecordType +from awkward._v2.types.optiontype import OptionType +from awkward._v2.types.uniontype import UnionType +from awkward._v2.types.arraytype import ArrayType -json: json_object - | json_array - | ESCAPED_STRING -> string +grammar = r''' +start: type + +type: numpytype + | unknowntype + | regulartype + | list_parameters + | categorical + +numpytype: numpytype_name ("[" "parameters" "=" json_object "]")? + +numpytype_name: DTYPE + | DATETIME64 + | TIMEDELTA64 + +DTYPE: "bool" + | "int8" + | "uint8" + | "int16" + | "uint16" + | "int32" + | "uint32" + | "int64" + | "uint64" + | "float32" + | "float64" + | "complex64" + | "complex128" + +DATETIME64: /datetime64(\[(\s*-?[0-9]*)?(Y|M|W|D|h|m|s|ms|us|\u03bc|ns|ps|fs|as)\])?/ +TIMEDELTA64: /timedelta64(\[(\s*-?[0-9]*)?(Y|M|W|D|h|m|s|ms|us|\u03bc|ns|ps|fs|as)\])?/ + +unknowntype: "unknown" ("[" "parameters" "=" json_object "]")? + +regulartype: INT "*" type + + + + +list_parameters: "[" type "," "parameters" "=" json_object "]" + +categorical: "categorical" "[" "type" "=" type "]" + +json: ESCAPED_STRING -> string | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null + | json_array + | json_object json_array: "[" [json ("," json)*] "]" json_object: "{" [json_pair ("," json_pair)*] "}" json_pair: ESCAPED_STRING ":" json +%import common.INT %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS -""" +''' class Transformer: + @staticmethod + def _parameters(args, i): + if i < len(args): + return args[i] + else: + return None + def start(self, args): return args[0] + def type(self, args): + return args[0] + + def numpytype(self, args): + return ak._v2.types.NumpyType(args[0], parameters=self._parameters(args, 1)) + + def numpytype_name(self, args): + return str(args[0]) + + def unknowntype(self, args): + return ak._v2.types.UnknownType(parameters=self._parameters(args, 0)) + + def regulartype(self, args): + return ak._v2.types.RegularType(args[1], int(args[0])) + + + + + def list_parameters(self, args): + args[0].parameters.update(args[1]) # modify recently created type object + return args[0] + + def categorical(self, args): + args[0].parameters["__categorical__"] = True # modify recently created type object + return args[0] + def json(self, args): return args[0] @@ -62,8 +146,82 @@ def null(self, args): parser = lark.Lark(grammar, parser="lalr", transformer=Transformer()) -datashape_string = """ -{"this": ["is", "json", true, false, null, 3.14, 123]} -""" -print(parser.parse(datashape_string)) +def test_numpytype_int32(): + t = NumpyType("int32") + assert str(parser.parse(str(t))) == str(t) + + +def test_numpytype_datetime64(): + t = NumpyType("datetime64") + assert str(parser.parse(str(t))) == str(t) + + +def test_numpytype_datetime64_10s(): + t = NumpyType("datetime64[10s]") + assert str(parser.parse(str(t))) == str(t) + + +def test_numpytype_int32_parameter(): + t = NumpyType("int32", {"__array__": "Something"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_numpytype_datetime64_parameter(): + t = NumpyType("datetime64", {"__array__": "Something"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_numpytype_datetime64_10s_parameter(): + t = NumpyType("datetime64[10s]", {"__array__": "Something"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_numpytype_int32_categorical(): + t = NumpyType("int32", {"__categorical__": True}) + assert str(parser.parse(str(t))) == str(t) + + +def test_numpytype_int32_parameters_categorical(): + t = NumpyType("int32", {"__array__": "Something", "__categorical__": True}) + assert str(parser.parse(str(t))) == str(t) + + +def test_unknowntype(): + t = UnknownType() + assert str(parser.parse(str(t))) == str(t) + + +def test_unknowntype_parameter(): + t = UnknownType({"__array__": "Something"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_unknowntype_categorical(): + t = UnknownType({"__categorical__": True}) + assert str(parser.parse(str(t))) == str(t) + + +def test_unknowntype_categorical_parameter(): + t = UnknownType({"__array__": "Something", "__categorical__": True}) + assert str(parser.parse(str(t))) == str(t) + + +def test_regulartype_numpytype(): + t = RegularType(NumpyType("int32"), 5) + assert str(parser.parse(str(t))) == str(t) + + +def test_regulartype_numpytype_parameter(): + t = RegularType(NumpyType("int32"), 5, {"__array__": "Something"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_regulartype_numpytype_categorical(): + t = RegularType(NumpyType("int32"), 5, {"__categorical__": True}) + assert str(parser.parse(str(t))) == str(t) + + +def test_regulartype_numpytype_categorical_parameter(): + t = RegularType(NumpyType("int32"), 5, {"__categorical__": True, "__array__": "Something"}) + assert str(parser.parse(str(t))) == str(t) diff --git a/tests/v2/test_0032-replace-dressedtype.py b/tests/v2/test_0032-replace-dressedtype.py index 5262d8c32c..6b6894e4f7 100644 --- a/tests/v2/test_0032-replace-dressedtype.py +++ b/tests/v2/test_0032-replace-dressedtype.py @@ -9,7 +9,7 @@ def test_types_with_parameters(): t = ak._v2.types.UnknownType() - assert t.parameters is None + assert t.parameters == {} t = ak._v2.types.UnknownType(parameters={"__array__": ["val", "ue"]}) assert t.parameters == {"__array__": ["val", "ue"]} From c16c9e2e3763754aa851ccdc12b9c1d80392be4e Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 22 Jun 2022 19:19:05 -0500 Subject: [PATCH 03/12] Developing type parser: through OptionType. --- studies/type-parser-2/parser.py | 171 ++++++++++++++++++++++++++++++-- 1 file changed, 165 insertions(+), 6 deletions(-) diff --git a/studies/type-parser-2/parser.py b/studies/type-parser-2/parser.py index 8ce9d003a6..0a4ef08908 100644 --- a/studies/type-parser-2/parser.py +++ b/studies/type-parser-2/parser.py @@ -8,18 +8,25 @@ from awkward._v2.types.unknowntype import UnknownType from awkward._v2.types.regulartype import RegularType from awkward._v2.types.listtype import ListType -from awkward._v2.types.recordtype import RecordType from awkward._v2.types.optiontype import OptionType +from awkward._v2.types.recordtype import RecordType from awkward._v2.types.uniontype import UnionType from awkward._v2.types.arraytype import ArrayType -grammar = r''' +grammar = r""" start: type type: numpytype | unknowntype | regulartype + | listtype + | varlen_string + | fixedlen_string + | char + | byte + | option1 + | option2 | list_parameters | categorical @@ -50,6 +57,21 @@ regulartype: INT "*" type +listtype: "var" "*" type + +varlen_string: "string" -> varlen_string + | "bytes" -> varlen_bytestring + +fixedlen_string: ("string" "[" INT "]") -> fixedlen_string + | ("bytes" "[" INT "]") -> fixedlen_bytestring + +char: "char" + +byte: "byte" + +option1: "?" type + +option2: "option" "[" type ("," "parameters" "=" json_object)? "]" @@ -75,7 +97,8 @@ %import common.WS %ignore WS -''' +""" + class Transformer: @staticmethod @@ -103,15 +126,55 @@ def unknowntype(self, args): def regulartype(self, args): return ak._v2.types.RegularType(args[1], int(args[0])) + def listtype(self, args): + return ak._v2.types.ListType(args[0]) + + def varlen_string(self, args): + return ak._v2.types.ListType( + ak._v2.types.NumpyType("uint8", {"__array__": "char"}), + {"__array__": "string"}, + ) + + def varlen_bytestring(self, args): + return ak._v2.types.ListType( + ak._v2.types.NumpyType("uint8", {"__array__": "byte"}), + {"__array__": "bytestring"}, + ) + + def fixedlen_string(self, args): + return ak._v2.types.RegularType( + ak._v2.types.NumpyType("uint8", {"__array__": "char"}), + int(args[0]), + {"__array__": "string"}, + ) + def fixedlen_bytestring(self, args): + return ak._v2.types.RegularType( + ak._v2.types.NumpyType("uint8", {"__array__": "byte"}), + int(args[0]), + {"__array__": "bytestring"}, + ) + def char(self, args): + return ak._v2.types.NumpyType("uint8", {"__array__": "char"}) + + def byte(self, args): + return ak._v2.types.NumpyType("uint8", {"__array__": "byte"}) + + def option1(self, args): + return ak._v2.types.OptionType(args[0]) + + def option2(self, args): + return ak._v2.types.OptionType(args[0], parameters=self._parameters(args, 1)) def list_parameters(self, args): - args[0].parameters.update(args[1]) # modify recently created type object + # modify recently created type object + args[0].parameters.update(args[1]) return args[0] def categorical(self, args): - args[0].parameters["__categorical__"] = True # modify recently created type object + # modify recently created type object + args[0].parameters["__categorical__"] = True return args[0] def json(self, args): @@ -144,6 +207,7 @@ def false(self, args): def null(self, args): return None + parser = lark.Lark(grammar, parser="lalr", transformer=Transformer()) @@ -223,5 +287,100 @@ def test_regulartype_numpytype_categorical(): def test_regulartype_numpytype_categorical_parameter(): - t = RegularType(NumpyType("int32"), 5, {"__categorical__": True, "__array__": "Something"}) + t = RegularType( + NumpyType("int32"), 5, {"__categorical__": True, "__array__": "Something"} + ) + assert str(parser.parse(str(t))) == str(t) + + +def test_listtype_numpytype(): + t = ListType(NumpyType("int32")) + assert str(parser.parse(str(t))) == str(t) + + +def test_listtype_numpytype_parameter(): + t = ListType(NumpyType("int32"), {"__array__": "Something"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_listtype_numpytype_categorical(): + t = ListType(NumpyType("int32"), {"__categorical__": True}) + assert str(parser.parse(str(t))) == str(t) + + +def test_listtype_numpytype_categorical_parameter(): + t = ListType( + NumpyType("int32"), {"__categorical__": True, "__array__": "Something"} + ) + assert str(parser.parse(str(t))) == str(t) + + +def test_varlen_string(): + t = ListType(NumpyType("uint8", {"__array__": "char"}), {"__array__": "string"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_varlen_bytestring(): + t = ListType(NumpyType("uint8", {"__array__": "char"}), {"__array__": "bytestring"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_fixedlen_string(): + t = RegularType( + NumpyType("uint8", {"__array__": "char"}), 5, {"__array__": "string"} + ) + assert str(parser.parse(str(t))) == str(t) + + +def test_fixedlen_bytestring(): + t = RegularType( + NumpyType("uint8", {"__array__": "byte"}), 5, {"__array__": "bytestring"} + ) + assert str(parser.parse(str(t))) == str(t) + + +def test_char(): + t = NumpyType("uint8", {"__array__": "char"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_byte(): + t = NumpyType("uint8", {"__array__": "byte"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_optiontype_numpytype_int32(): + t = OptionType(NumpyType("int32")) + assert str(parser.parse(str(t))) == str(t) + + +def test_optiontype_numpytype_int32_parameters(): + t = OptionType(NumpyType("int32"), {"__array__": "Something"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_optiontype_numpytype_int32_categorical(): + t = OptionType(NumpyType("int32"), {"__categorical__": True}) + assert str(parser.parse(str(t))) == str(t) + + +def test_optiontype_numpytype_int32_categorical_parameters(): + t = OptionType( + NumpyType("int32"), {"__array__": "Something", "__categorical__": True} + ) + assert str(parser.parse(str(t))) == str(t) + + +def test_option_varlen_string(): + t = OptionType( + ListType(NumpyType("uint8", {"__array__": "char"}), {"__array__": "string"}) + ) + assert str(parser.parse(str(t))) == str(t) + + +def test_option_varlen_string_parameters(): + t = OptionType( + ListType(NumpyType("uint8", {"__array__": "char"}), {"__array__": "string"}), + {"__array__": "Something"}, + ) assert str(parser.parse(str(t))) == str(t) From f0d99709bb95df994414d0d38f8e8e19d671517c Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 22 Jun 2022 21:25:30 -0500 Subject: [PATCH 04/12] Fix empty record type string. --- src/awkward/_v2/types/recordtype.py | 6 ++++-- src/awkward/_v2/types/type.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/awkward/_v2/types/recordtype.py b/src/awkward/_v2/types/recordtype.py index 96652bbf3b..342035f16a 100644 --- a/src/awkward/_v2/types/recordtype.py +++ b/src/awkward/_v2/types/recordtype.py @@ -133,8 +133,9 @@ def _str(self, indent, compact): + [post, "], ", params, "]"] ) else: + c = "" if len(self._contents) == 0 else ", " out = ( - [name, "[", pre] + flat_children + [", ", post, params, "]"] + [name, "[", pre] + flat_children + [c, post, params, "]"] ) else: if name is None: @@ -142,7 +143,8 @@ def _str(self, indent, compact): ["struct[{", pre] + flat_pairs + [post, "}, ", params, "]"] ) else: - out = [name, "[", pre] + flat_pairs + [", ", post, params, "]"] + c = "" if len(self._contents) == 0 else ", " + out = [name, "[", pre] + flat_pairs + [c, post, params, "]"] return [self._str_categorical_begin()] + out + [self._str_categorical_end()] diff --git a/src/awkward/_v2/types/type.py b/src/awkward/_v2/types/type.py index a728fe0556..da9a00c7c1 100644 --- a/src/awkward/_v2/types/type.py +++ b/src/awkward/_v2/types/type.py @@ -11,7 +11,7 @@ class Type: @property def parameters(self): - if self._parameters is None: + if self._parameters is None: # pylint: disable=E0203 self._parameters = {} return self._parameters From 1e3c2f4e8215b7109eefdbb3cf28e6f2730940c0 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 22 Jun 2022 21:25:52 -0500 Subject: [PATCH 05/12] Named record parsing is working. --- studies/type-parser-2/parser.py | 169 +++++++++++++++++++++++++++++--- 1 file changed, 156 insertions(+), 13 deletions(-) diff --git a/studies/type-parser-2/parser.py b/studies/type-parser-2/parser.py index 0a4ef08908..2492a8ab17 100644 --- a/studies/type-parser-2/parser.py +++ b/studies/type-parser-2/parser.py @@ -27,6 +27,8 @@ | byte | option1 | option2 + | named0 + | named | list_parameters | categorical @@ -36,19 +38,19 @@ | DATETIME64 | TIMEDELTA64 -DTYPE: "bool" - | "int8" - | "uint8" - | "int16" - | "uint16" - | "int32" - | "uint32" - | "int64" - | "uint64" - | "float32" - | "float64" - | "complex64" - | "complex128" +DTYPE.2: "bool" + | "int8" + | "uint8" + | "int16" + | "uint16" + | "int32" + | "uint32" + | "int64" + | "uint64" + | "float32" + | "float64" + | "complex64" + | "complex128" DATETIME64: /datetime64(\[(\s*-?[0-9]*)?(Y|M|W|D|h|m|s|ms|us|\u03bc|ns|ps|fs|as)\])?/ TIMEDELTA64: /timedelta64(\[(\s*-?[0-9]*)?(Y|M|W|D|h|m|s|ms|us|\u03bc|ns|ps|fs|as)\])?/ @@ -73,7 +75,15 @@ option2: "option" "[" type ("," "parameters" "=" json_object)? "]" +named0: CNAME "[" ("parameters" "=" json_object)? "]" +named: CNAME "[" (named_tuple | named_pairs) "]" +named_tuple: type ("," (named_tuple | "parameters" "=" json_object))? +named_pairs: named_pair ("," (named_pairs | "parameters" "=" json_object))? + +named_pair: named_key ":" type +named_key: ESCAPED_STRING -> string + | CNAME -> identifier list_parameters: "[" type "," "parameters" "=" json_object "]" @@ -92,6 +102,7 @@ json_pair: ESCAPED_STRING ":" json %import common.INT +%import common.CNAME %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS @@ -167,6 +178,48 @@ def option1(self, args): def option2(self, args): return ak._v2.types.OptionType(args[0], parameters=self._parameters(args, 1)) + def named0(self, args): + parameters = {"__record__": str(args[0])} + if 1 < len(args): + parameters.update(args[1]) + return ak._v2.types.RecordType([], None, parameters) + + def named(self, args): + parameters = {"__record__": str(args[0])} + + if isinstance(args[1][-1], dict): + arguments = args[1][:-1] + parameters.update(args[1][-1]) + else: + arguments = args[1] + + if any(isinstance(x, tuple) for x in arguments): + fields = [x[0] for x in arguments] + contents = [x[1] for x in arguments] + else: + fields = None + contents = arguments + + return ak._v2.types.RecordType(contents, fields, parameters) + + def named_tuple(self, args): + if len(args) == 2 and isinstance(args[1], list): + return args[:1] + args[1] + else: + return args + + def named_pairs(self, args): + if len(args) == 2 and isinstance(args[1], list): + return args[:1] + args[1] + else: + return args + + def named_pair(self, args): + return tuple(args) + + def identifier(self, args): + return str(args[0]) + def list_parameters(self, args): # modify recently created type object args[0].parameters.update(args[1]) @@ -384,3 +437,93 @@ def test_option_varlen_string_parameters(): {"__array__": "Something"}, ) assert str(parser.parse(str(t))) == str(t) + + +# >>> print(RecordType([], None)) +# () +# >>> print(RecordType([NumpyType("int32")], None)) +# (int32) +# >>> print(RecordType([NumpyType("int32"), NumpyType("float64")], None)) +# (int32, float64) +# >>> print(RecordType([], [])) +# {} +# >>> print(RecordType([NumpyType("int32")], ["one"])) +# {one: int32} +# >>> print(RecordType([NumpyType("int32"), NumpyType("float64")], ["one", "t w o"])) +# {one: int32, "t w o": float64} + +# >>> print(RecordType([], None, {"__record__": "Name"})) +# Name[] +# >>> print(RecordType([NumpyType("int32")], None, {"__record__": "Name"})) +# Name[int32] +# >>> print(RecordType([NumpyType("int32"), NumpyType("float64")], None, {"__record__": "Name"})) +# Name[int32, float64] +# >>> print(RecordType([], [], {"__record__": "Name"})) +# Name[] +# >>> print(RecordType([NumpyType("int32")], ["one"], {"__record__": "Name"})) +# Name[one: int32] +# >>> print(RecordType([NumpyType("int32"), NumpyType("float64")], ["one", "t w o"], {"__record__": "Name"})) +# Name[one: int32, "t w o": float64] + +# >>> print(RecordType([], None, {"p": [123]})) +# tuple[[], parameters={"p": [123]}] +# >>> print(RecordType([NumpyType("int32")], None, {"p": [123]})) +# tuple[[int32], parameters={"p": [123]}] +# >>> print(RecordType([NumpyType("int32"), NumpyType("float64")], None, {"p": [123]})) +# tuple[[int32, float64], parameters={"p": [123]}] +# >>> print(RecordType([], [], {"p": [123]})) +# struct[{}, parameters={"p": [123]}] +# >>> print(RecordType([NumpyType("int32")], ["one"], {"p": [123]})) +# struct[{one: int32}, parameters={"p": [123]}] +# >>> print(RecordType([NumpyType("int32"), NumpyType("float64")], ["one", "t w o"], {"p": [123]})) +# struct[{one: int32, "t w o": float64}, parameters={"p": [123]}] + + +def test_named_record_empty(): + t = RecordType([], None, {"__record__": "Name"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_named_record_int32(): + t = RecordType([NumpyType("int32")], None, {"__record__": "Name"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_named_record_int32_float64(): + t = RecordType([NumpyType("int32"), NumpyType("float64")], None, {"__record__": "Name"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_named_record_fields_int32(): + t = RecordType([NumpyType("int32")], ["one"], {"__record__": "Name"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_named_record_fields_int32_float64(): + t = RecordType([NumpyType("int32"), NumpyType("float64")], ["one", "t w o"], {"__record__": "Name"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_named_record_empty_parameters(): + t = RecordType([], None, {"__record__": "Name", "p": [123]}) + assert str(parser.parse(str(t))) == str(t) + + +def test_named_record_int32_parameters(): + t = RecordType([NumpyType("int32")], None, {"__record__": "Name", "p": [123]}) + assert str(parser.parse(str(t))) == str(t) + + +def test_named_record_int32_float64_parameters(): + t = RecordType([NumpyType("int32"), NumpyType("float64")], None, {"__record__": "Name", "p": [123]}) + assert str(parser.parse(str(t))) == str(t) + + +def test_named_record_fields_int32_parameters(): + t = RecordType([NumpyType("int32")], ["one"], {"__record__": "Name", "p": [123]}) + assert str(parser.parse(str(t))) == str(t) + + +def test_named_record_fields_int32_float64_parameters(): + t = RecordType([NumpyType("int32"), NumpyType("float64")], ["one", "t w o"], {"__record__": "Name", "p": [123]}) + assert str(parser.parse(str(t))) == str(t) From ffc2102e490c8c8ff76fc7c1610a8359b1fa9037 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 22 Jun 2022 22:17:01 -0500 Subject: [PATCH 06/12] Restrictions on record name in the type string. --- src/awkward/_v2/types/recordtype.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/awkward/_v2/types/recordtype.py b/src/awkward/_v2/types/recordtype.py index 342035f16a..f72788c361 100644 --- a/src/awkward/_v2/types/recordtype.py +++ b/src/awkward/_v2/types/recordtype.py @@ -100,6 +100,23 @@ def _str(self, indent, compact): params = self._str_parameters() name = self.parameter("__record__") + if name is not None: + if not ak._v2._prettyprint.is_identifier.match(name) or name in ( + "unknown", + "string", + "bytes", + "option", + "tuple", + "struct", + "union", + "categorical", + ) or name in ak._v2.types.numpytype._primitive_to_dtype_dict: + if params is None: + params = 'parameters={"__record__": ' + json.dumps(name) + "}" + else: + params = 'parameters={"__record__": ' + json.dumps(name) + ", " + params[12:] + name = None + if not self.is_tuple: pairs = [] for k, v in zip(self._fields, children): From a3c008138d612fe156713812b1cb4d392fe29d13 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 22 Jun 2022 22:17:13 -0500 Subject: [PATCH 07/12] Parsing records works. --- studies/type-parser-2/parser.py | 198 ++++++++++++++++++++++++-------- 1 file changed, 151 insertions(+), 47 deletions(-) diff --git a/studies/type-parser-2/parser.py b/studies/type-parser-2/parser.py index 2492a8ab17..aa095e9e60 100644 --- a/studies/type-parser-2/parser.py +++ b/studies/type-parser-2/parser.py @@ -27,6 +27,10 @@ | byte | option1 | option2 + | tuple + | tuple_parameters + | record + | record_parameters | named0 | named | list_parameters @@ -75,12 +79,23 @@ option2: "option" "[" type ("," "parameters" "=" json_object)? "]" -named0: CNAME "[" ("parameters" "=" json_object)? "]" -named: CNAME "[" (named_tuple | named_pairs) "]" +tuple: "(" types? ")" +types: type ("," type)* -named_tuple: type ("," (named_tuple | "parameters" "=" json_object))? -named_pairs: named_pair ("," (named_pairs | "parameters" "=" json_object))? +tuple_parameters: "tuple" "[" "[" types? "]" ("," "parameters" "=" json_object)? "]" + +record: "{" pairs? "}" +pairs: pair ("," pair)* +pair: key ":" type +key: ESCAPED_STRING -> string + | CNAME -> identifier + +record_parameters: "struct" "[" "{" pairs? "}" ("," "parameters" "=" json_object)? "]" +named0: CNAME "[" ("parameters" "=" json_object)? "]" +named: CNAME "[" (named_types | named_pairs) "]" +named_types: type ("," (named_types | "parameters" "=" json_object))? +named_pairs: named_pair ("," (named_pairs | "parameters" "=" json_object))? named_pair: named_key ":" type named_key: ESCAPED_STRING -> string | CNAME -> identifier @@ -178,6 +193,59 @@ def option1(self, args): def option2(self, args): return ak._v2.types.OptionType(args[0], parameters=self._parameters(args, 1)) + def tuple(self, args): + if len(args) == 0: + types = [] + else: + types = args[0] + return ak._v2.types.RecordType(types, None) + + def types(self, args): + return args + + def tuple_parameters(self, args): + if len(args) != 0 and isinstance(args[0], list): + types = args[0] + else: + types = [] + + if len(args) != 0 and isinstance(args[-1], dict): + parameters = args[-1] + else: + parameters = {} + + return ak._v2.types.RecordType(types, None, parameters) + + def record(self, args): + if len(args) == 0: + fields = [] + types = [] + else: + fields = [x[0] for x in args[0]] + types = [x[1] for x in args[0]] + return ak._v2.types.RecordType(types, fields) + + def pairs(self, args): + return args + + def pair(self, args): + return tuple(args) + + def record_parameters(self, args): + if len(args) != 0 and isinstance(args[0], list): + fields = [x[0] for x in args[0]] + types = [x[1] for x in args[0]] + else: + fields = [] + types = [] + + if len(args) != 0 and isinstance(args[-1], dict): + parameters = args[-1] + else: + parameters = {} + + return ak._v2.types.RecordType(types, fields, parameters) + def named0(self, args): parameters = {"__record__": str(args[0])} if 1 < len(args): @@ -202,7 +270,7 @@ def named(self, args): return ak._v2.types.RecordType(contents, fields, parameters) - def named_tuple(self, args): + def named_types(self, args): if len(args) == 2 and isinstance(args[1], list): return args[:1] + args[1] else: @@ -439,44 +507,66 @@ def test_option_varlen_string_parameters(): assert str(parser.parse(str(t))) == str(t) -# >>> print(RecordType([], None)) -# () -# >>> print(RecordType([NumpyType("int32")], None)) -# (int32) -# >>> print(RecordType([NumpyType("int32"), NumpyType("float64")], None)) -# (int32, float64) -# >>> print(RecordType([], [])) -# {} -# >>> print(RecordType([NumpyType("int32")], ["one"])) -# {one: int32} -# >>> print(RecordType([NumpyType("int32"), NumpyType("float64")], ["one", "t w o"])) -# {one: int32, "t w o": float64} - -# >>> print(RecordType([], None, {"__record__": "Name"})) -# Name[] -# >>> print(RecordType([NumpyType("int32")], None, {"__record__": "Name"})) -# Name[int32] -# >>> print(RecordType([NumpyType("int32"), NumpyType("float64")], None, {"__record__": "Name"})) -# Name[int32, float64] -# >>> print(RecordType([], [], {"__record__": "Name"})) -# Name[] -# >>> print(RecordType([NumpyType("int32")], ["one"], {"__record__": "Name"})) -# Name[one: int32] -# >>> print(RecordType([NumpyType("int32"), NumpyType("float64")], ["one", "t w o"], {"__record__": "Name"})) -# Name[one: int32, "t w o": float64] - -# >>> print(RecordType([], None, {"p": [123]})) -# tuple[[], parameters={"p": [123]}] -# >>> print(RecordType([NumpyType("int32")], None, {"p": [123]})) -# tuple[[int32], parameters={"p": [123]}] -# >>> print(RecordType([NumpyType("int32"), NumpyType("float64")], None, {"p": [123]})) -# tuple[[int32, float64], parameters={"p": [123]}] -# >>> print(RecordType([], [], {"p": [123]})) -# struct[{}, parameters={"p": [123]}] -# >>> print(RecordType([NumpyType("int32")], ["one"], {"p": [123]})) -# struct[{one: int32}, parameters={"p": [123]}] -# >>> print(RecordType([NumpyType("int32"), NumpyType("float64")], ["one", "t w o"], {"p": [123]})) -# struct[{one: int32, "t w o": float64}, parameters={"p": [123]}] +def test_record_empty(): + t = RecordType([], None) + assert str(parser.parse(str(t))) == str(t) + + +def test_record_fields_empty(): + t = RecordType([], []) + assert str(parser.parse(str(t))) == str(t) + + +def test_record_int32(): + t = RecordType([NumpyType("int32")], None) + assert str(parser.parse(str(t))) == str(t) + + +def test_record_int32_float64(): + t = RecordType([NumpyType("int32"), NumpyType("float64")], None) + assert str(parser.parse(str(t))) == str(t) + + +def test_record_fields_int32(): + t = RecordType([NumpyType("int32")], ["one"]) + assert str(parser.parse(str(t))) == str(t) + + +def test_record_fields_int32_float64(): + t = RecordType([NumpyType("int32"), NumpyType("float64")], ["one", "t w o"]) + assert str(parser.parse(str(t))) == str(t) + + +def test_record_empty_parameters(): + t = RecordType([], None, {"p": [123]}) + assert str(parser.parse(str(t))) == str(t) + + +def test_record_fields_empty_parameters(): + t = RecordType([], [], {"p": [123]}) + assert str(parser.parse(str(t))) == str(t) + + +def test_record_int32_parameters(): + t = RecordType([NumpyType("int32")], None, {"p": [123]}) + assert str(parser.parse(str(t))) == str(t) + + +def test_record_int32_float64_parameters(): + t = RecordType([NumpyType("int32"), NumpyType("float64")], None, {"p": [123]}) + assert str(parser.parse(str(t))) == str(t) + + +def test_record_fields_int32_parameters(): + t = RecordType([NumpyType("int32")], ["one"], {"p": [123]}) + assert str(parser.parse(str(t))) == str(t) + + +def test_record_fields_int32_float64_parameters(): + t = RecordType( + [NumpyType("int32"), NumpyType("float64")], ["one", "t w o"], {"p": [123]} + ) + assert str(parser.parse(str(t))) == str(t) def test_named_record_empty(): @@ -490,7 +580,9 @@ def test_named_record_int32(): def test_named_record_int32_float64(): - t = RecordType([NumpyType("int32"), NumpyType("float64")], None, {"__record__": "Name"}) + t = RecordType( + [NumpyType("int32"), NumpyType("float64")], None, {"__record__": "Name"} + ) assert str(parser.parse(str(t))) == str(t) @@ -500,7 +592,11 @@ def test_named_record_fields_int32(): def test_named_record_fields_int32_float64(): - t = RecordType([NumpyType("int32"), NumpyType("float64")], ["one", "t w o"], {"__record__": "Name"}) + t = RecordType( + [NumpyType("int32"), NumpyType("float64")], + ["one", "t w o"], + {"__record__": "Name"}, + ) assert str(parser.parse(str(t))) == str(t) @@ -515,7 +611,11 @@ def test_named_record_int32_parameters(): def test_named_record_int32_float64_parameters(): - t = RecordType([NumpyType("int32"), NumpyType("float64")], None, {"__record__": "Name", "p": [123]}) + t = RecordType( + [NumpyType("int32"), NumpyType("float64")], + None, + {"__record__": "Name", "p": [123]}, + ) assert str(parser.parse(str(t))) == str(t) @@ -525,5 +625,9 @@ def test_named_record_fields_int32_parameters(): def test_named_record_fields_int32_float64_parameters(): - t = RecordType([NumpyType("int32"), NumpyType("float64")], ["one", "t w o"], {"__record__": "Name", "p": [123]}) + t = RecordType( + [NumpyType("int32"), NumpyType("float64")], + ["one", "t w o"], + {"__record__": "Name", "p": [123]}, + ) assert str(parser.parse(str(t))) == str(t) From b30ce29064e9fcba9b356dc25e82d84642329a6d Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 22 Jun 2022 22:28:18 -0500 Subject: [PATCH 08/12] Prevent record type string from using reserved words in name. --- src/awkward/_v2/types/recordtype.py | 36 ++++++++++++++++++----------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/awkward/_v2/types/recordtype.py b/src/awkward/_v2/types/recordtype.py index f72788c361..85a5bbdd04 100644 --- a/src/awkward/_v2/types/recordtype.py +++ b/src/awkward/_v2/types/recordtype.py @@ -101,20 +101,30 @@ def _str(self, indent, compact): name = self.parameter("__record__") if name is not None: - if not ak._v2._prettyprint.is_identifier.match(name) or name in ( - "unknown", - "string", - "bytes", - "option", - "tuple", - "struct", - "union", - "categorical", - ) or name in ak._v2.types.numpytype._primitive_to_dtype_dict: + if ( + not ak._v2._prettyprint.is_identifier.match(name) + or name + in ( + "unknown", + "string", + "bytes", + "option", + "tuple", + "struct", + "union", + "categorical", + ) + or name in ak._v2.types.numpytype._primitive_to_dtype_dict + ): if params is None: params = 'parameters={"__record__": ' + json.dumps(name) + "}" else: - params = 'parameters={"__record__": ' + json.dumps(name) + ", " + params[12:] + params = ( + 'parameters={"__record__": ' + + json.dumps(name) + + ", " + + params[12:] + ) name = None if not self.is_tuple: @@ -151,9 +161,7 @@ def _str(self, indent, compact): ) else: c = "" if len(self._contents) == 0 else ", " - out = ( - [name, "[", pre] + flat_children + [c, post, params, "]"] - ) + out = [name, "[", pre] + flat_children + [c, post, params, "]"] else: if name is None: out = ( From 65bf4ad46de4f63e6f3bb9fba39837671dea5903 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 22 Jun 2022 22:28:40 -0500 Subject: [PATCH 09/12] Tests are done; this is a working v2 type parser. --- studies/type-parser-2/parser.py | 46 +++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/studies/type-parser-2/parser.py b/studies/type-parser-2/parser.py index aa095e9e60..d494848fc8 100644 --- a/studies/type-parser-2/parser.py +++ b/studies/type-parser-2/parser.py @@ -33,6 +33,7 @@ | record_parameters | named0 | named + | union | list_parameters | categorical @@ -100,6 +101,8 @@ named_key: ESCAPED_STRING -> string | CNAME -> identifier +union: "union" "[" named_types? "]" + list_parameters: "[" type "," "parameters" "=" json_object "]" categorical: "categorical" "[" "type" "=" type "]" @@ -288,6 +291,19 @@ def named_pair(self, args): def identifier(self, args): return str(args[0]) + def union(self, args): + if len(args) == 0: + arguments = [] + parameters = None + elif isinstance(args[0][-1], dict): + arguments = args[0][:-1] + parameters = args[0][-1] + else: + arguments = args[0] + parameters = None + + return ak._v2.types.UnionType(arguments, parameters) + def list_parameters(self, args): # modify recently created type object args[0].parameters.update(args[1]) @@ -631,3 +647,33 @@ def test_named_record_fields_int32_float64_parameters(): {"__record__": "Name", "p": [123]}, ) assert str(parser.parse(str(t))) == str(t) + + +def test_union_empty(): + t = UnionType([]) + assert str(parser.parse(str(t))) == str(t) + + +def test_union_float64(): + t = UnionType([NumpyType("float64")]) + assert str(parser.parse(str(t))) == str(t) + + +def test_union_float64_datetime64(): + t = UnionType( + [NumpyType("float64"), NumpyType("datetime64")], + ) + assert str(parser.parse(str(t))) == str(t) + + +def test_union_float64_parameters(): + t = UnionType([NumpyType("float64")], {"__array__": "Something"}) + assert str(parser.parse(str(t))) == str(t) + + +def test_union_float64_datetime64_parameters(): + t = UnionType( + [NumpyType("float64"), NumpyType("datetime64")], + {"__array__": "Something"}, + ) + assert str(parser.parse(str(t))) == str(t) From a3504a172f3793635d50bc67dc8f51fb6c0a2ce4 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 23 Jun 2022 15:27:13 -0500 Subject: [PATCH 10/12] Add a standalone v2 types parser to the codebase. --- .../_v2/types/_awkward_datashape_parser.py | 2905 +++++++++++++++++ .../type-parser-2/awkward_datashape_grammar.g | 111 + 2 files changed, 3016 insertions(+) create mode 100644 src/awkward/_v2/types/_awkward_datashape_parser.py create mode 100644 studies/type-parser-2/awkward_datashape_grammar.g diff --git a/src/awkward/_v2/types/_awkward_datashape_parser.py b/src/awkward/_v2/types/_awkward_datashape_parser.py new file mode 100644 index 0000000000..affe03272e --- /dev/null +++ b/src/awkward/_v2/types/_awkward_datashape_parser.py @@ -0,0 +1,2905 @@ +# flake8: noqa +# fmt: off + +# The file was automatically generated by Lark v0.12.0 +__version__ = "0.12.0" + +# +# +# Lark Stand-alone Generator Tool +# ---------------------------------- +# Generates a stand-alone LALR(1) parser with a standard lexer +# +# Git: https://github.com/erezsh/lark +# Author: Erez Shinan (erezshin@gmail.com) +# +# +# >>> LICENSE +# +# This tool and its generated code use a separate license from Lark, +# and are subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# +# If you wish to purchase a commercial license for this tool and its +# generated code, you may contact me via email or otherwise. +# +# If MPL2 is incompatible with your free or open-source project, +# contact me and we'll work it out. +# +# + + + + +class LarkError(Exception): + pass + + +class ConfigurationError(LarkError, ValueError): + pass + + +def assert_config(value, options, msg='Got %r, expected one of %s'): + if value not in options: + raise ConfigurationError(msg % (value, options)) + + +class GrammarError(LarkError): + pass + + +class ParseError(LarkError): + pass + + +class LexError(LarkError): + pass + + +class UnexpectedInput(LarkError): + #-- + pos_in_stream = None + _terminals_by_name = None + + def get_context(self, text, span=40): + #-- + assert self.pos_in_stream is not None, self + pos = self.pos_in_stream + start = max(pos - span, 0) + end = pos + span + if not isinstance(text, bytes): + before = text[start:pos].rsplit('\n', 1)[-1] + after = text[pos:end].split('\n', 1)[0] + return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n' + else: + before = text[start:pos].rsplit(b'\n', 1)[-1] + after = text[pos:end].split(b'\n', 1)[0] + return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") + + def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): + #-- + assert self.state is not None, "Not supported for this exception" + + if isinstance(examples, dict): + examples = examples.items() + + candidate = (None, False) + for i, (label, example) in enumerate(examples): + assert not isinstance(example, STRING_TYPE) + + for j, malformed in enumerate(example): + try: + parse_fn(malformed) + except UnexpectedInput as ut: + if ut.state == self.state: + if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts: + logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % + (self.state, self.accepts, ut.accepts, i, j)) + continue + try: + if ut.token == self.token: ## + + logger.debug(f"Exact Match at example [{i}][{j}]") + return label + + if token_type_match_fallback: + ## + + if (ut.token.type == self.token.type) and not candidate[-1]: + logger.debug(f"Token Type Fallback at example [{i}][{j}]") + candidate = label, True + + except AttributeError: + pass + if candidate[0] is None: + logger.debug(f"Same State match at example [{i}][{j}]") + candidate = label, False + + return candidate[0] + + def _format_expected(self, expected): + if self._terminals_by_name: + d = self._terminals_by_name + expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected] + return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected) + + +class UnexpectedEOF(ParseError, UnexpectedInput): + #-- + + def __init__(self, expected, state=None, terminals_by_name=None): + super().__init__() + + self.expected = expected + self.state = state + from .lexer import Token + self.token = Token("", "") ## + + self.pos_in_stream = -1 + self.line = -1 + self.column = -1 + self._terminals_by_name = terminals_by_name + + + def __str__(self): + message = "Unexpected end-of-input. " + message += self._format_expected(self.expected) + return message + + +class UnexpectedCharacters(LexError, UnexpectedInput): + #-- + + def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, + terminals_by_name=None, considered_rules=None): + super().__init__() + + ## + + self.line = line + self.column = column + self.pos_in_stream = lex_pos + self.state = state + self._terminals_by_name = terminals_by_name + + self.allowed = allowed + self.considered_tokens = considered_tokens + self.considered_rules = considered_rules + self.token_history = token_history + + if isinstance(seq, bytes): + self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace") + else: + self.char = seq[lex_pos] + self._context = self.get_context(seq) + + + def __str__(self): + message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column) + message += '\n\n' + self._context + if self.allowed: + message += self._format_expected(self.allowed) + if self.token_history: + message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history) + return message + + +class UnexpectedToken(ParseError, UnexpectedInput): + #-- + + def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): + super().__init__() + + ## + + self.line = getattr(token, 'line', '?') + self.column = getattr(token, 'column', '?') + self.pos_in_stream = getattr(token, 'start_pos', None) + self.state = state + + self.token = token + self.expected = expected ## + + self._accepts = NO_VALUE + self.considered_rules = considered_rules + self.interactive_parser = interactive_parser + self._terminals_by_name = terminals_by_name + self.token_history = token_history + + + @property + def accepts(self): + if self._accepts is NO_VALUE: + self._accepts = self.interactive_parser and self.interactive_parser.accepts() + return self._accepts + + def __str__(self): + message = ("Unexpected token %r at line %s, column %s.\n%s" + % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected))) + if self.token_history: + message += "Previous tokens: %r\n" % self.token_history + + return message + + @property + def puppet(self): + warn("UnexpectedToken.puppet attribute has been renamed to interactive_parser", DeprecationWarning) + return self.interactive_parser + + + +class VisitError(LarkError): + #-- + + def __init__(self, rule, obj, orig_exc): + message = f'Error trying to process rule "{rule}":\n\n{orig_exc}' + super().__init__(message) + + self.rule = rule + self.obj = obj + self.orig_exc = orig_exc + + +import sys, re +import logging +logger = logging.getLogger("lark") +logger.addHandler(logging.StreamHandler()) +## + +## + +logger.setLevel(logging.CRITICAL) + +if sys.version_info[0]>2: + from abc import ABC, abstractmethod +else: + from abc import ABCMeta, abstractmethod + class ABC: ## + + __slots__ = () + __metclass__ = ABCMeta + + +Py36 = (sys.version_info[:2] >= (3, 6)) + +NO_VALUE = object() + + +def classify(seq, key=None, value=None): + d = {} + for item in seq: + k = key(item) if (key is not None) else item + v = value(item) if (value is not None) else item + if k in d: + d[k].append(v) + else: + d[k] = [v] + return d + + +def _deserialize(data, namespace, memo): + if isinstance(data, dict): + if '__type__' in data: ## + + class_ = namespace[data['__type__']] + return class_.deserialize(data, memo) + elif '@' in data: + return memo[data['@']] + return {key:_deserialize(value, namespace, memo) for key, value in data.items()} + elif isinstance(data, list): + return [_deserialize(value, namespace, memo) for value in data] + return data + + +class Serialize: + #-- + + def memo_serialize(self, types_to_memoize): + memo = SerializeMemoizer(types_to_memoize) + return self.serialize(memo), memo.serialize() + + def serialize(self, memo=None): + if memo and memo.in_types(self): + return {'@': memo.memoized.get(self)} + + fields = getattr(self, '__serialize_fields__') + res = {f: _serialize(getattr(self, f), memo) for f in fields} + res['__type__'] = type(self).__name__ + if hasattr(self, '_serialize'): + self._serialize(res, memo) + return res + + @classmethod + def deserialize(cls, data, memo): + namespace = getattr(cls, '__serialize_namespace__', []) + namespace = {c.__name__:c for c in namespace} + + fields = getattr(cls, '__serialize_fields__') + + if '@' in data: + return memo[data['@']] + + inst = cls.__new__(cls) + for f in fields: + try: + setattr(inst, f, _deserialize(data[f], namespace, memo)) + except KeyError as e: + raise KeyError("Cannot find key for class", cls, e) + + if hasattr(inst, '_deserialize'): + inst._deserialize() + + return inst + + +class SerializeMemoizer(Serialize): + #-- + + __serialize_fields__ = 'memoized', + + def __init__(self, types_to_memoize): + self.types_to_memoize = tuple(types_to_memoize) + self.memoized = Enumerator() + + def in_types(self, value): + return isinstance(value, self.types_to_memoize) + + def serialize(self): + return _serialize(self.memoized.reversed(), None) + + @classmethod + def deserialize(cls, data, namespace, memo): + return _deserialize(data, namespace, memo) + + +try: + STRING_TYPE = basestring +except NameError: ## + + STRING_TYPE = str + + +import types +from functools import wraps, partial +from contextlib import contextmanager + +Str = str +try: + classtype = types.ClassType ## + +except AttributeError: + classtype = type ## + + + +def smart_decorator(f, create_decorator): + if isinstance(f, types.FunctionType): + return wraps(f)(create_decorator(f, True)) + + elif isinstance(f, (classtype, type, types.BuiltinFunctionType)): + return wraps(f)(create_decorator(f, False)) + + elif isinstance(f, types.MethodType): + return wraps(f)(create_decorator(f.__func__, True)) + + elif isinstance(f, partial): + ## + + return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True)) + + else: + return create_decorator(f.__func__.__call__, True) + + +try: + import regex +except ImportError: + regex = None + +import sre_parse +import sre_constants +categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') + +def get_regexp_width(expr): + if regex: + ## + + ## + + ## + + regexp_final = re.sub(categ_pattern, 'A', expr) + else: + if re.search(categ_pattern, expr): + raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr) + regexp_final = expr + try: + return [int(x) for x in sre_parse.parse(regexp_final).getwidth()] + except sre_constants.error: + if not regex: + raise ValueError(expr) + else: + ## + + ## + + c = regex.compile(regexp_final) + if c.match('') is None: + return 1, sre_constants.MAXREPEAT + else: + return 0, sre_constants.MAXREPEAT + + +from collections import OrderedDict + + +class Meta: + def __init__(self): + self.empty = True + + +class Tree: + #-- + def __init__(self, data, children, meta=None): + self.data = data + self.children = children + self._meta = meta + + @property + def meta(self): + if self._meta is None: + self._meta = Meta() + return self._meta + + def __repr__(self): + return f'Tree({self.data!r}, {self.children!r})' + + def _pretty_label(self): + return self.data + + def _pretty(self, level, indent_str): + if len(self.children) == 1 and not isinstance(self.children[0], Tree): + return [indent_str*level, self._pretty_label(), '\t', f'{self.children[0]}', '\n'] + + l = [indent_str*level, self._pretty_label(), '\n'] + for n in self.children: + if isinstance(n, Tree): + l += n._pretty(level+1, indent_str) + else: + l += [indent_str*(level+1), f'{n}', '\n'] + + return l + + def pretty(self, indent_str=' '): + #-- + return ''.join(self._pretty(0, indent_str)) + + def __eq__(self, other): + try: + return self.data == other.data and self.children == other.children + except AttributeError: + return False + + def __ne__(self, other): + return not (self == other) + + def __hash__(self): + return hash((self.data, tuple(self.children))) + + def iter_subtrees(self): + #-- + queue = [self] + subtrees = OrderedDict() + for subtree in queue: + subtrees[id(subtree)] = subtree + queue += [c for c in reversed(subtree.children) + if isinstance(c, Tree) and id(c) not in subtrees] + + del queue + return reversed(list(subtrees.values())) + + def find_pred(self, pred): + #-- + return filter(pred, self.iter_subtrees()) + + def find_data(self, data): + #-- + return self.find_pred(lambda t: t.data == data) + + +from inspect import getmembers, getmro + + +class Discard(Exception): + #-- + pass + +## + + + +class _Decoratable: + #-- + + @classmethod + def _apply_decorator(cls, decorator, **kwargs): + mro = getmro(cls) + assert mro[0] is cls + libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)} + for name, value in getmembers(cls): + + ## + + if name.startswith('_') or (name in libmembers and name not in cls.__dict__): + continue + if not callable(value): + continue + + ## + + if hasattr(cls.__dict__[name], 'vargs_applied') or hasattr(value, 'vargs_applied'): + continue + + static = isinstance(cls.__dict__[name], (staticmethod, classmethod)) + setattr(cls, name, decorator(value, static=static, **kwargs)) + return cls + + def __class_getitem__(cls, _): + return cls + + +class Transformer(_Decoratable): + #-- + __visit_tokens__ = True ## + + + def __init__(self, visit_tokens=True): + self.__visit_tokens__ = visit_tokens + + def _call_userfunc(self, tree, new_children=None): + ## + + children = new_children if new_children is not None else tree.children + try: + f = getattr(self, tree.data) + except AttributeError: + return self.__default__(tree.data, children, tree.meta) + else: + try: + wrapper = getattr(f, 'visit_wrapper', None) + if wrapper is not None: + return f.visit_wrapper(f, tree.data, children, tree.meta) + else: + return f(children) + except (GrammarError, Discard): + raise + except Exception as e: + raise VisitError(tree.data, tree, e) + + def _call_userfunc_token(self, token): + try: + f = getattr(self, token.type) + except AttributeError: + return self.__default_token__(token) + else: + try: + return f(token) + except (GrammarError, Discard): + raise + except Exception as e: + raise VisitError(token.type, token, e) + + def _transform_children(self, children): + for c in children: + try: + if isinstance(c, Tree): + yield self._transform_tree(c) + elif self.__visit_tokens__ and isinstance(c, Token): + yield self._call_userfunc_token(c) + else: + yield c + except Discard: + pass + + def _transform_tree(self, tree): + children = list(self._transform_children(tree.children)) + return self._call_userfunc(tree, children) + + def transform(self, tree): + #-- + return self._transform_tree(tree) + + def __mul__(self, other): + #-- + return TransformerChain(self, other) + + def __default__(self, data, children, meta): + #-- + return Tree(data, children, meta) + + def __default_token__(self, token): + #-- + return token + + +def merge_transformers(base_transformer=None, **transformers_to_merge): + #-- + if base_transformer is None: + base_transformer = Transformer() + for prefix, transformer in transformers_to_merge.items(): + for method_name in dir(transformer): + method = getattr(transformer, method_name) + if not callable(method): + continue + if method_name.startswith("_") or method_name == "transform": + continue + prefixed_method = prefix + "__" + method_name + if hasattr(base_transformer, prefixed_method): + raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method) + + setattr(base_transformer, prefixed_method, method) + + return base_transformer + + +class InlineTransformer(Transformer): ## + + def _call_userfunc(self, tree, new_children=None): + ## + + children = new_children if new_children is not None else tree.children + try: + f = getattr(self, tree.data) + except AttributeError: + return self.__default__(tree.data, children, tree.meta) + else: + return f(*children) + + +class TransformerChain: + def __init__(self, *transformers): + self.transformers = transformers + + def transform(self, tree): + for t in self.transformers: + tree = t.transform(tree) + return tree + + def __mul__(self, other): + return TransformerChain(*self.transformers + (other,)) + + +class Transformer_InPlace(Transformer): + #-- + def _transform_tree(self, tree): ## + + return self._call_userfunc(tree) + + def transform(self, tree): + for subtree in tree.iter_subtrees(): + subtree.children = list(self._transform_children(subtree.children)) + + return self._transform_tree(tree) + + +class Transformer_NonRecursive(Transformer): + #-- + + def transform(self, tree): + ## + + rev_postfix = [] + q = [tree] + while q: + t = q.pop() + rev_postfix.append(t) + if isinstance(t, Tree): + q += t.children + + ## + + stack = [] + for x in reversed(rev_postfix): + if isinstance(x, Tree): + size = len(x.children) + if size: + args = stack[-size:] + del stack[-size:] + else: + args = [] + stack.append(self._call_userfunc(x, args)) + elif self.__visit_tokens__ and isinstance(x, Token): + stack.append(self._call_userfunc_token(x)) + else: + stack.append(x) + + t ,= stack ## + + return t + + +class Transformer_InPlaceRecursive(Transformer): + #-- + def _transform_tree(self, tree): + tree.children = list(self._transform_children(tree.children)) + return self._call_userfunc(tree) + + +## + + +class VisitorBase: + def _call_userfunc(self, tree): + return getattr(self, tree.data, self.__default__)(tree) + + def __default__(self, tree): + #-- + return tree + + def __class_getitem__(cls, _): + return cls + + +class Visitor(VisitorBase): + #-- + + def visit(self, tree): + #-- + for subtree in tree.iter_subtrees(): + self._call_userfunc(subtree) + return tree + + def visit_topdown(self,tree): + #-- + for subtree in tree.iter_subtrees_topdown(): + self._call_userfunc(subtree) + return tree + + +class Visitor_Recursive(VisitorBase): + #-- + + def visit(self, tree): + #-- + for child in tree.children: + if isinstance(child, Tree): + self.visit(child) + + self._call_userfunc(tree) + return tree + + def visit_topdown(self,tree): + #-- + self._call_userfunc(tree) + + for child in tree.children: + if isinstance(child, Tree): + self.visit_topdown(child) + + return tree + + +def visit_children_decor(func): + #-- + @wraps(func) + def inner(cls, tree): + values = cls.visit_children(tree) + return func(cls, values) + return inner + + +class Interpreter(_Decoratable): + #-- + + def visit(self, tree): + f = getattr(self, tree.data) + wrapper = getattr(f, 'visit_wrapper', None) + if wrapper is not None: + return f.visit_wrapper(f, tree.data, tree.children, tree.meta) + else: + return f(tree) + + def visit_children(self, tree): + return [self.visit(child) if isinstance(child, Tree) else child + for child in tree.children] + + def __getattr__(self, name): + return self.__default__ + + def __default__(self, tree): + return self.visit_children(tree) + + +## + + +def _apply_decorator(obj, decorator, **kwargs): + try: + _apply = obj._apply_decorator + except AttributeError: + return decorator(obj, **kwargs) + else: + return _apply(decorator, **kwargs) + + +def _inline_args__func(func): + @wraps(func) + def create_decorator(_f, with_self): + if with_self: + def f(self, children): + return _f(self, *children) + else: + def f(self, children): + return _f(*children) + return f + + return smart_decorator(func, create_decorator) + + +def inline_args(obj): ## + + return _apply_decorator(obj, _inline_args__func) + + +def _visitor_args_func_dec(func, visit_wrapper=None, static=False): + def create_decorator(_f, with_self): + if with_self: + def f(self, *args, **kwargs): + return _f(self, *args, **kwargs) + else: + def f(self, *args, **kwargs): + return _f(*args, **kwargs) + return f + + if static: + f = wraps(func)(create_decorator(func, False)) + else: + f = smart_decorator(func, create_decorator) + f.vargs_applied = True + f.visit_wrapper = visit_wrapper + return f + + +def _vargs_inline(f, _data, children, _meta): + return f(*children) +def _vargs_meta_inline(f, _data, children, meta): + return f(meta, *children) +def _vargs_meta(f, _data, children, meta): + return f(children, meta) ## + +def _vargs_tree(f, data, children, meta): + return f(Tree(data, children, meta)) + + +def v_args(inline=False, meta=False, tree=False, wrapper=None): + #-- + if tree and (meta or inline): + raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.") + + func = None + if meta: + if inline: + func = _vargs_meta_inline + else: + func = _vargs_meta + elif inline: + func = _vargs_inline + elif tree: + func = _vargs_tree + + if wrapper is not None: + if func is not None: + raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.") + func = wrapper + + def _visitor_args_dec(obj): + return _apply_decorator(obj, _visitor_args_func_dec, visit_wrapper=func) + return _visitor_args_dec + + + + +class Symbol(Serialize): + __slots__ = ('name',) + + is_term = NotImplemented + + def __init__(self, name): + self.name = name + + def __eq__(self, other): + assert isinstance(other, Symbol), other + return self.is_term == other.is_term and self.name == other.name + + def __ne__(self, other): + return not (self == other) + + def __hash__(self): + return hash(self.name) + + def __repr__(self): + return f'{type(self).__name__}({self.name!r})' + + fullrepr = property(__repr__) + + +class Terminal(Symbol): + __serialize_fields__ = 'name', 'filter_out' + + is_term = True + + def __init__(self, name, filter_out=False): + self.name = name + self.filter_out = filter_out + + @property + def fullrepr(self): + return f'{type(self).__name__}({self.name!r}, {self.filter_out!r})' + + +class NonTerminal(Symbol): + __serialize_fields__ = 'name', + + is_term = False + + +class RuleOptions(Serialize): + __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' + + def __init__(self, keep_all_tokens=False, expand1=False, priority=None, template_source=None, empty_indices=()): + self.keep_all_tokens = keep_all_tokens + self.expand1 = expand1 + self.priority = priority + self.template_source = template_source + self.empty_indices = empty_indices + + def __repr__(self): + return 'RuleOptions({!r}, {!r}, {!r}, {!r})'.format( + self.keep_all_tokens, + self.expand1, + self.priority, + self.template_source + ) + + +class Rule(Serialize): + #-- + __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') + + __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' + __serialize_namespace__ = Terminal, NonTerminal, RuleOptions + + def __init__(self, origin, expansion, order=0, alias=None, options=None): + self.origin = origin + self.expansion = expansion + self.alias = alias + self.order = order + self.options = options or RuleOptions() + self._hash = hash((self.origin, tuple(self.expansion))) + + def _deserialize(self): + self._hash = hash((self.origin, tuple(self.expansion))) + + def __str__(self): + return '<{} : {}>'.format(self.origin.name, ' '.join(x.name for x in self.expansion)) + + def __repr__(self): + return f'Rule({self.origin!r}, {self.expansion!r}, {self.alias!r}, {self.options!r})' + + def __hash__(self): + return self._hash + + def __eq__(self, other): + if not isinstance(other, Rule): + return False + return self.origin == other.origin and self.expansion == other.expansion + + + +from warnings import warn +from copy import copy + + +class Pattern(Serialize): + raw = None + type = None + + def __init__(self, value, flags=(), raw=None): + self.value = value + self.flags = frozenset(flags) + self.raw = raw + + def __repr__(self): + return repr(self.to_regexp()) + + ## + + def __hash__(self): + return hash((type(self), self.value, self.flags)) + + def __eq__(self, other): + return type(self) == type(other) and self.value == other.value and self.flags == other.flags + + def to_regexp(self): + raise NotImplementedError() + + def min_width(self): + raise NotImplementedError() + + def max_width(self): + raise NotImplementedError() + + if Py36: + ## + + def _get_flags(self, value): + for f in self.flags: + value = (f'(?{f}:{value})') + return value + + else: + def _get_flags(self, value): + for f in self.flags: + value = ('(?%s)' % f) + value + return value + + + +class PatternStr(Pattern): + __serialize_fields__ = 'value', 'flags' + + type = "str" + + def to_regexp(self): + return self._get_flags(re.escape(self.value)) + + @property + def min_width(self): + return len(self.value) + max_width = min_width + + +class PatternRE(Pattern): + __serialize_fields__ = 'value', 'flags', '_width' + + type = "re" + + def to_regexp(self): + return self._get_flags(self.value) + + _width = None + def _get_width(self): + if self._width is None: + self._width = get_regexp_width(self.to_regexp()) + return self._width + + @property + def min_width(self): + return self._get_width()[0] + + @property + def max_width(self): + return self._get_width()[1] + + +class TerminalDef(Serialize): + __serialize_fields__ = 'name', 'pattern', 'priority' + __serialize_namespace__ = PatternStr, PatternRE + + def __init__(self, name, pattern, priority=1): + assert isinstance(pattern, Pattern), pattern + self.name = name + self.pattern = pattern + self.priority = priority + + def __repr__(self): + return f'{type(self).__name__}({self.name!r}, {self.pattern!r})' + + def user_repr(self): + if self.name.startswith('__'): ## + + return self.pattern.raw or self.name + else: + return self.name + + +class Token(Str): + #-- + __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') + + def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None): + try: + inst = super().__new__(cls, value) + except UnicodeDecodeError: + value = value.decode('latin1') + inst = super().__new__(cls, value) + + inst.type = type_ + inst.start_pos = start_pos if start_pos is not None else pos_in_stream + inst.value = value + inst.line = line + inst.column = column + inst.end_line = end_line + inst.end_column = end_column + inst.end_pos = end_pos + return inst + + @property + def pos_in_stream(self): + warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning, 2) + return self.start_pos + + def update(self, type_=None, value=None): + return Token.new_borrow_pos( + type_ if type_ is not None else self.type, + value if value is not None else self.value, + self + ) + + @classmethod + def new_borrow_pos(cls, type_, value, borrow_t): + return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) + + def __reduce__(self): + return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column)) + + def __repr__(self): + return f'Token({self.type!r}, {self.value!r})' + + def __deepcopy__(self, memo): + return Token(self.type, self.value, self.start_pos, self.line, self.column) + + def __eq__(self, other): + if isinstance(other, Token) and self.type != other.type: + return False + + return Str.__eq__(self, other) + + __hash__ = Str.__hash__ + + +class LineCounter: + __slots__ = 'char_pos', 'line', 'column', 'line_start_pos', 'newline_char' + + def __init__(self, newline_char): + self.newline_char = newline_char + self.char_pos = 0 + self.line = 1 + self.column = 1 + self.line_start_pos = 0 + + def __eq__(self, other): + if not isinstance(other, LineCounter): + return NotImplemented + + return self.char_pos == other.char_pos and self.newline_char == other.newline_char + + def feed(self, token, test_newline=True): + #-- + if test_newline: + newlines = token.count(self.newline_char) + if newlines: + self.line += newlines + self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 + + self.char_pos += len(token) + self.column = self.char_pos - self.line_start_pos + 1 + + +class UnlessCallback: + def __init__(self, scanner): + self.scanner = scanner + + def __call__(self, t): + res = self.scanner.match(t.value, 0) + if res: + _value, t.type = res + return t + + +class CallChain: + def __init__(self, callback1, callback2, cond): + self.callback1 = callback1 + self.callback2 = callback2 + self.cond = cond + + def __call__(self, t): + t2 = self.callback1(t) + return self.callback2(t) if self.cond(t2) else t2 + + +def _get_match(re_, regexp, s, flags): + m = re_.match(regexp, s, flags) + if m: + return m.group(0) + +def _create_unless(terminals, g_regex_flags, re_, use_bytes): + tokens_by_type = classify(terminals, lambda t: type(t.pattern)) + assert len(tokens_by_type) <= 2, tokens_by_type.keys() + embedded_strs = set() + callback = {} + for retok in tokens_by_type.get(PatternRE, []): + unless = [] + for strtok in tokens_by_type.get(PatternStr, []): + if strtok.priority > retok.priority: + continue + s = strtok.pattern.value + if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags): + unless.append(strtok) + if strtok.pattern.flags <= retok.pattern.flags: + embedded_strs.add(strtok) + if unless: + callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes)) + + new_terminals = [t for t in terminals if t not in embedded_strs] + return new_terminals, callback + + + +class Scanner: + def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False): + self.terminals = terminals + self.g_regex_flags = g_regex_flags + self.re_ = re_ + self.use_bytes = use_bytes + self.match_whole = match_whole + + self.allowed_types = {t.name for t in self.terminals} + + self._mres = self._build_mres(terminals, len(terminals)) + + def _build_mres(self, terminals, max_size): + ## + + ## + + ## + + postfix = '$' if self.match_whole else '' + mres = [] + while terminals: + pattern = '|'.join(f'(?P<{t.name}>{t.pattern.to_regexp() + postfix})' for t in terminals[:max_size]) + if self.use_bytes: + pattern = pattern.encode('latin-1') + try: + mre = self.re_.compile(pattern, self.g_regex_flags) + except AssertionError: ## + + return self._build_mres(terminals, max_size//2) + + mres.append((mre, {i: n for n, i in mre.groupindex.items()})) + terminals = terminals[max_size:] + return mres + + def match(self, text, pos): + for mre, type_from_index in self._mres: + m = mre.match(text, pos) + if m: + return m.group(0), type_from_index[m.lastindex] + + +def _regexp_has_newline(r): + #-- + return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) + + +class Lexer: + #-- + lex = NotImplemented + + def make_lexer_state(self, text): + line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') + return LexerState(text, line_ctr) + + +class TraditionalLexer(Lexer): + + def __init__(self, conf): + terminals = list(conf.terminals) + assert all(isinstance(t, TerminalDef) for t in terminals), terminals + + self.re = conf.re_module + + if not conf.skip_validation: + ## + + for t in terminals: + try: + self.re.compile(t.pattern.to_regexp(), conf.g_regex_flags) + except self.re.error: + raise LexError(f"Cannot compile token {t.name}: {t.pattern}") + + if t.pattern.min_width == 0: + raise LexError(f"Lexer does not allow zero-width terminals. ({t.name}: {t.pattern})") + + if not (set(conf.ignore) <= {t.name for t in terminals}): + raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals})) + + ## + + self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) + self.ignore_types = frozenset(conf.ignore) + + terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) + self.terminals = terminals + self.user_callbacks = conf.callbacks + self.g_regex_flags = conf.g_regex_flags + self.use_bytes = conf.use_bytes + self.terminals_by_name = conf.terminals_by_name + + self._scanner = None + + def _build_scanner(self): + terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) + assert all(self.callback.values()) + + for type_, f in self.user_callbacks.items(): + if type_ in self.callback: + ## + + self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_) + else: + self.callback[type_] = f + + self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes) + + @property + def scanner(self): + if self._scanner is None: + self._build_scanner() + return self._scanner + + def match(self, text, pos): + return self.scanner.match(text, pos) + + def lex(self, state, parser_state): + with suppress(EOFError): + while True: + yield self.next_token(state, parser_state) + + def next_token(self, lex_state, parser_state=None): + line_ctr = lex_state.line_ctr + while line_ctr.char_pos < len(lex_state.text): + res = self.match(lex_state.text, line_ctr.char_pos) + if not res: + allowed = self.scanner.allowed_types - self.ignore_types + if not allowed: + allowed = {""} + raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column, + allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token], + state=parser_state, terminals_by_name=self.terminals_by_name) + + value, type_ = res + + if type_ not in self.ignore_types: + t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) + line_ctr.feed(value, type_ in self.newline_types) + t.end_line = line_ctr.line + t.end_column = line_ctr.column + t.end_pos = line_ctr.char_pos + if t.type in self.callback: + t = self.callback[t.type](t) + if not isinstance(t, Token): + raise LexError("Callbacks must return a token (returned %r)" % t) + lex_state.last_token = t + return t + else: + if type_ in self.callback: + t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) + self.callback[type_](t2) + line_ctr.feed(value, type_ in self.newline_types) + + ## + + raise EOFError(self) + + +class LexerState: + __slots__ = 'text', 'line_ctr', 'last_token' + + def __init__(self, text, line_ctr, last_token=None): + self.text = text + self.line_ctr = line_ctr + self.last_token = last_token + + def __eq__(self, other): + if not isinstance(other, LexerState): + return NotImplemented + + return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token + + def __copy__(self): + return type(self)(self.text, copy(self.line_ctr), self.last_token) + + +class ContextualLexer(Lexer): + + def __init__(self, conf, states, always_accept=()): + terminals = list(conf.terminals) + terminals_by_name = conf.terminals_by_name + + trad_conf = copy(conf) + trad_conf.terminals = terminals + + lexer_by_tokens = {} + self.lexers = {} + for state, accepts in states.items(): + key = frozenset(accepts) + try: + lexer = lexer_by_tokens[key] + except KeyError: + accepts = set(accepts) | set(conf.ignore) | set(always_accept) + lexer_conf = copy(trad_conf) + lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name] + lexer = TraditionalLexer(lexer_conf) + lexer_by_tokens[key] = lexer + + self.lexers[state] = lexer + + assert trad_conf.terminals is terminals + self.root_lexer = TraditionalLexer(trad_conf) + + def make_lexer_state(self, text): + return self.root_lexer.make_lexer_state(text) + + def lex(self, lexer_state, parser_state): + try: + while True: + lexer = self.lexers[parser_state.position] + yield lexer.next_token(lexer_state, parser_state) + except EOFError: + pass + except UnexpectedCharacters as e: + ## + + ## + + try: + last_token = lexer_state.last_token ## + + token = self.root_lexer.next_token(lexer_state, parser_state) + raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_name) + except UnexpectedCharacters: + raise e ## + + +class LexerThread: + #-- + + def __init__(self, lexer, text): + self.lexer = lexer + self.state = lexer.make_lexer_state(text) + + def lex(self, parser_state): + return self.lexer.lex(self.state, parser_state) + + def __copy__(self): + copied = object.__new__(LexerThread) + copied.lexer = self.lexer + copied.state = copy(self.state) + return copied + + + +class LexerConf(Serialize): + __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' + __serialize_namespace__ = TerminalDef, + + def __init__(self, terminals, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False): + self.terminals = terminals + self.terminals_by_name = {t.name: t for t in self.terminals} + assert len(self.terminals) == len(self.terminals_by_name) + self.ignore = ignore + self.postlex = postlex + self.callbacks = callbacks or {} + self.g_regex_flags = g_regex_flags + self.re_module = re_module + self.skip_validation = skip_validation + self.use_bytes = use_bytes + self.lexer_type = None + + @property + def tokens(self): + warn("LexerConf.tokens is deprecated. Use LexerConf.terminals instead", DeprecationWarning) + return self.terminals + + def _deserialize(self): + self.terminals_by_name = {t.name: t for t in self.terminals} + + def __deepcopy__(self, memo=None): + return type(self)( + deepcopy(self.terminals, memo), + self.re_module, + deepcopy(self.ignore, memo), + deepcopy(self.postlex, memo), + deepcopy(self.callbacks, memo), + deepcopy(self.g_regex_flags, memo), + deepcopy(self.skip_validation, memo), + deepcopy(self.use_bytes, memo), + ) + + +class ParserConf(Serialize): + __serialize_fields__ = 'rules', 'start', 'parser_type' + + def __init__(self, rules, callbacks, start): + assert isinstance(start, list) + self.rules = rules + self.callbacks = callbacks + self.start = start + + self.parser_type = None + + +from functools import partial, wraps +from itertools import repeat, product + + +class ExpandSingleChild: + def __init__(self, node_builder): + self.node_builder = node_builder + + def __call__(self, children): + if len(children) == 1: + return children[0] + else: + return self.node_builder(children) + + + +class PropagatePositions: + def __init__(self, node_builder, node_filter=None): + self.node_builder = node_builder + self.node_filter = node_filter + + def __call__(self, children): + res = self.node_builder(children) + + if isinstance(res, Tree): + ## + + ## + + ## + + ## + + + res_meta = res.meta + + first_meta = self._pp_get_meta(children) + if first_meta is not None: + if not hasattr(res_meta, 'line'): + ## + + res_meta.line = getattr(first_meta, 'container_line', first_meta.line) + res_meta.column = getattr(first_meta, 'container_column', first_meta.column) + res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos) + res_meta.empty = False + + res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line) + res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column) + + last_meta = self._pp_get_meta(reversed(children)) + if last_meta is not None: + if not hasattr(res_meta, 'end_line'): + res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line) + res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column) + res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos) + res_meta.empty = False + + res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line) + res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column) + + return res + + def _pp_get_meta(self, children): + for c in children: + if self.node_filter is not None and not self.node_filter(c): + continue + if isinstance(c, Tree): + if not c.meta.empty: + return c.meta + elif isinstance(c, Token): + return c + +def make_propagate_positions(option): + if callable(option): + return partial(PropagatePositions, node_filter=option) + elif option is True: + return PropagatePositions + elif option is False: + return None + + raise ConfigurationError('Invalid option for propagate_positions: %r' % option) + + +class ChildFilter: + def __init__(self, to_include, append_none, node_builder): + self.node_builder = node_builder + self.to_include = to_include + self.append_none = append_none + + def __call__(self, children): + filtered = [] + + for i, to_expand, add_none in self.to_include: + if add_none: + filtered += [None] * add_none + if to_expand: + filtered += children[i].children + else: + filtered.append(children[i]) + + if self.append_none: + filtered += [None] * self.append_none + + return self.node_builder(filtered) + + +class ChildFilterLALR(ChildFilter): + #-- + + def __call__(self, children): + filtered = [] + for i, to_expand, add_none in self.to_include: + if add_none: + filtered += [None] * add_none + if to_expand: + if filtered: + filtered += children[i].children + else: ## + + filtered = children[i].children + else: + filtered.append(children[i]) + + if self.append_none: + filtered += [None] * self.append_none + + return self.node_builder(filtered) + + +class ChildFilterLALR_NoPlaceholders(ChildFilter): + #-- + def __init__(self, to_include, node_builder): + self.node_builder = node_builder + self.to_include = to_include + + def __call__(self, children): + filtered = [] + for i, to_expand in self.to_include: + if to_expand: + if filtered: + filtered += children[i].children + else: ## + + filtered = children[i].children + else: + filtered.append(children[i]) + return self.node_builder(filtered) + + +def _should_expand(sym): + return not sym.is_term and sym.name.startswith('_') + + +def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): + ## + + if _empty_indices: + assert _empty_indices.count(False) == len(expansion) + s = ''.join(str(int(b)) for b in _empty_indices) + empty_indices = [len(ones) for ones in s.split('0')] + assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion)) + else: + empty_indices = [0] * (len(expansion)+1) + + to_include = [] + nones_to_add = 0 + for i, sym in enumerate(expansion): + nones_to_add += empty_indices[i] + if keep_all_tokens or not (sym.is_term and sym.filter_out): + to_include.append((i, _should_expand(sym), nones_to_add)) + nones_to_add = 0 + + nones_to_add += empty_indices[len(expansion)] + + if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include): + if _empty_indices or ambiguous: + return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add) + else: + ## + + return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) + + +class AmbiguousExpander: + #-- + def __init__(self, to_expand, tree_class, node_builder): + self.node_builder = node_builder + self.tree_class = tree_class + self.to_expand = to_expand + + def __call__(self, children): + def _is_ambig_tree(t): + return hasattr(t, 'data') and t.data == '_ambig' + + ## + + ## + + ## + + ## + + ambiguous = [] + for i, child in enumerate(children): + if _is_ambig_tree(child): + if i in self.to_expand: + ambiguous.append(i) + + child.expand_kids_by_data('_ambig') + + if not ambiguous: + return self.node_builder(children) + + expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)] + return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))]) + + +def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): + to_expand = [i for i, sym in enumerate(expansion) + if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))] + if to_expand: + return partial(AmbiguousExpander, to_expand, tree_class) + + +class AmbiguousIntermediateExpander: + #-- + + def __init__(self, tree_class, node_builder): + self.node_builder = node_builder + self.tree_class = tree_class + + def __call__(self, children): + def _is_iambig_tree(child): + return hasattr(child, 'data') and child.data == '_iambig' + + def _collapse_iambig(children): + #-- + + ## + + ## + + if children and _is_iambig_tree(children[0]): + iambig_node = children[0] + result = [] + for grandchild in iambig_node.children: + collapsed = _collapse_iambig(grandchild.children) + if collapsed: + for child in collapsed: + child.children += children[1:] + result += collapsed + else: + new_tree = self.tree_class('_inter', grandchild.children + children[1:]) + result.append(new_tree) + return result + + collapsed = _collapse_iambig(children) + if collapsed: + processed_nodes = [self.node_builder(c.children) for c in collapsed] + return self.tree_class('_ambig', processed_nodes) + + return self.node_builder(children) + + +def ptb_inline_args(func): + @wraps(func) + def f(children): + return func(*children) + return f + + +def inplace_transformer(func): + @wraps(func) + def f(children): + ## + + tree = Tree(func.__name__, children) + return func(tree) + return f + + +def apply_visit_wrapper(func, name, wrapper): + if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: + raise NotImplementedError("Meta args not supported for internal transformer") + + @wraps(func) + def f(children): + return wrapper(func, name, children, None) + return f + + +class ParseTreeBuilder: + def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False): + self.tree_class = tree_class + self.propagate_positions = propagate_positions + self.ambiguous = ambiguous + self.maybe_placeholders = maybe_placeholders + + self.rule_builders = list(self._init_builders(rules)) + + def _init_builders(self, rules): + propagate_positions = make_propagate_positions(self.propagate_positions) + + for rule in rules: + options = rule.options + keep_all_tokens = options.keep_all_tokens + expand_single_child = options.expand1 + + wrapper_chain = list(filter(None, [ + (expand_single_child and not rule.alias) and ExpandSingleChild, + maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), + propagate_positions, + self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), + self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class) + ])) + + yield rule, wrapper_chain + + def create_callback(self, transformer=None): + callbacks = {} + + for rule, wrapper_chain in self.rule_builders: + + user_callback_name = rule.alias or rule.options.template_source or rule.origin.name + try: + f = getattr(transformer, user_callback_name) + ## + + wrapper = getattr(f, 'visit_wrapper', None) + if wrapper is not None: + f = apply_visit_wrapper(f, user_callback_name, wrapper) + else: + if isinstance(transformer, InlineTransformer): + f = ptb_inline_args(f) + elif isinstance(transformer, Transformer_InPlace): + f = inplace_transformer(f) + except AttributeError: + f = partial(self.tree_class, user_callback_name) + + for w in wrapper_chain: + f = w(f) + + if rule in callbacks: + raise GrammarError(f"Rule '{rule}' already exists") + + callbacks[rule] = f + + return callbacks + + + +class LALR_Parser(Serialize): + def __init__(self, parser_conf, debug=False): + analysis = LALR_Analyzer(parser_conf, debug=debug) + analysis.compute_lalr() + callbacks = parser_conf.callbacks + + self._parse_table = analysis.parse_table + self.parser_conf = parser_conf + self.parser = _Parser(analysis.parse_table, callbacks, debug) + + @classmethod + def deserialize(cls, data, memo, callbacks, debug=False): + inst = cls.__new__(cls) + inst._parse_table = IntParseTable.deserialize(data, memo) + inst.parser = _Parser(inst._parse_table, callbacks, debug) + return inst + + def serialize(self, memo): + return self._parse_table.serialize(memo) + + def parse_interactive(self, lexer, start): + return self.parser.parse(lexer, start, start_interactive=True) + + def parse(self, lexer, start, on_error=None): + try: + return self.parser.parse(lexer, start) + except UnexpectedInput as e: + if on_error is None: + raise + + while True: + if isinstance(e, UnexpectedCharacters): + s = e.interactive_parser.lexer_state.state + p = s.line_ctr.char_pos + + if not on_error(e): + raise e + + if isinstance(e, UnexpectedCharacters): + ## + + if p == s.line_ctr.char_pos: + s.line_ctr.feed(s.text[p:p+1]) + + try: + return e.interactive_parser.resume_parse() + except UnexpectedToken as e2: + if (isinstance(e, UnexpectedToken) + and e.token.type == e2.token.type == '$END' + and e.interactive_parser == e2.interactive_parser): + ## + + raise e2 + e = e2 + except UnexpectedCharacters as e2: + e = e2 + + +class ParseConf: + __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states' + + def __init__(self, parse_table, callbacks, start): + self.parse_table = parse_table + + self.start_state = self.parse_table.start_states[start] + self.end_state = self.parse_table.end_states[start] + self.states = self.parse_table.states + + self.callbacks = callbacks + self.start = start + + +class ParserState: + __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack' + + def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None): + self.parse_conf = parse_conf + self.lexer = lexer + self.state_stack = state_stack or [self.parse_conf.start_state] + self.value_stack = value_stack or [] + + @property + def position(self): + return self.state_stack[-1] + + ## + + def __eq__(self, other): + if not isinstance(other, ParserState): + return NotImplemented + return len(self.state_stack) == len(other.state_stack) and self.position == other.position + + def __copy__(self): + return type(self)( + self.parse_conf, + self.lexer, ## + + copy(self.state_stack), + deepcopy(self.value_stack), + ) + + def copy(self): + return copy(self) + + def feed_token(self, token, is_end=False): + state_stack = self.state_stack + value_stack = self.value_stack + states = self.parse_conf.states + end_state = self.parse_conf.end_state + callbacks = self.parse_conf.callbacks + + while True: + state = state_stack[-1] + try: + action, arg = states[state][token.type] + except KeyError: + expected = {s for s in states[state].keys() if s.isupper()} + raise UnexpectedToken(token, expected, state=self, interactive_parser=None) + + assert arg != end_state + + if action is Shift: + ## + + assert not is_end + state_stack.append(arg) + value_stack.append(token if token.type not in callbacks else callbacks[token.type](token)) + return + else: + ## + + rule = arg + size = len(rule.expansion) + if size: + s = value_stack[-size:] + del state_stack[-size:] + del value_stack[-size:] + else: + s = [] + + value = callbacks[rule](s) + + _action, new_state = states[state_stack[-1]][rule.origin.name] + assert _action is Shift + state_stack.append(new_state) + value_stack.append(value) + + if is_end and state_stack[-1] == end_state: + return value_stack[-1] + +class _Parser: + def __init__(self, parse_table, callbacks, debug=False): + self.parse_table = parse_table + self.callbacks = callbacks + self.debug = debug + + def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False): + parse_conf = ParseConf(self.parse_table, self.callbacks, start) + parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) + if start_interactive: + return InteractiveParser(self, parser_state, parser_state.lexer) + return self.parse_from_state(parser_state) + + + def parse_from_state(self, state): + ## + + try: + token = None + for token in state.lexer.lex(state): + state.feed_token(token) + + end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) + return state.feed_token(end_token, True) + except UnexpectedInput as e: + try: + e.interactive_parser = InteractiveParser(self, state, state.lexer) + except NameError: + pass + raise e + except Exception as e: + if self.debug: + print("") + print("STATE STACK DUMP") + print("----------------") + for i, s in enumerate(state.state_stack): + print('%d)' % i , s) + print("") + + raise + + +class Action: + def __init__(self, name): + self.name = name + def __str__(self): + return self.name + def __repr__(self): + return str(self) + +Shift = Action('Shift') +Reduce = Action('Reduce') + + +class ParseTable: + def __init__(self, states, start_states, end_states): + self.states = states + self.start_states = start_states + self.end_states = end_states + + def serialize(self, memo): + tokens = Enumerator() + rules = Enumerator() + + states = { + state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg)) + for token, (action, arg) in actions.items()} + for state, actions in self.states.items() + } + + return { + 'tokens': tokens.reversed(), + 'states': states, + 'start_states': self.start_states, + 'end_states': self.end_states, + } + + @classmethod + def deserialize(cls, data, memo): + tokens = data['tokens'] + states = { + state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg)) + for token, (action, arg) in actions.items()} + for state, actions in data['states'].items() + } + return cls(states, data['start_states'], data['end_states']) + + +class IntParseTable(ParseTable): + + @classmethod + def from_ParseTable(cls, parse_table): + enum = list(parse_table.states) + state_to_idx = {s:i for i,s in enumerate(enum)} + int_states = {} + + for s, la in parse_table.states.items(): + la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v + for k,v in la.items()} + int_states[ state_to_idx[s] ] = la + + + start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()} + end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()} + return cls(int_states, start_states, end_states) + + + +def _wrap_lexer(lexer_class): + future_interface = getattr(lexer_class, '__future_interface__', False) + if future_interface: + return lexer_class + else: + class CustomLexerWrapper(Lexer): + def __init__(self, lexer_conf): + self.lexer = lexer_class(lexer_conf) + def lex(self, lexer_state, parser_state): + return self.lexer.lex(lexer_state.text) + return CustomLexerWrapper + + +class MakeParsingFrontend: + def __init__(self, parser_type, lexer_type): + self.parser_type = parser_type + self.lexer_type = lexer_type + + def __call__(self, lexer_conf, parser_conf, options): + assert isinstance(lexer_conf, LexerConf) + assert isinstance(parser_conf, ParserConf) + parser_conf.parser_type = self.parser_type + lexer_conf.lexer_type = self.lexer_type + return ParsingFrontend(lexer_conf, parser_conf, options) + + def deserialize(self, data, memo, lexer_conf, callbacks, options): + parser_conf = ParserConf.deserialize(data['parser_conf'], memo) + parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug) + parser_conf.callbacks = callbacks + return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) + + + + +class ParsingFrontend(Serialize): + __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser', 'options' + + def __init__(self, lexer_conf, parser_conf, options, parser=None): + self.parser_conf = parser_conf + self.lexer_conf = lexer_conf + self.options = options + + ## + + if parser: ## + + self.parser = parser + else: + create_parser = { + 'lalr': create_lalr_parser, + 'earley': create_earley_parser, + 'cyk': CYK_FrontEnd, + }[parser_conf.parser_type] + self.parser = create_parser(lexer_conf, parser_conf, options) + + ## + + lexer_type = lexer_conf.lexer_type + self.skip_lexer = False + if lexer_type in ('dynamic', 'dynamic_complete'): + assert lexer_conf.postlex is None + self.skip_lexer = True + return + + try: + create_lexer = { + 'standard': create_traditional_lexer, + 'contextual': create_contextual_lexer, + }[lexer_type] + except KeyError: + assert issubclass(lexer_type, Lexer), lexer_type + self.lexer = _wrap_lexer(lexer_type)(lexer_conf) + else: + self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex) + + if lexer_conf.postlex: + self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex) + + def _verify_start(self, start=None): + if start is None: + start_decls = self.parser_conf.start + if len(start_decls) > 1: + raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls) + start ,= start_decls + elif start not in self.parser_conf.start: + raise ConfigurationError(f"Unknown start rule {start}. Must be one of {self.parser_conf.start!r}") + return start + + def parse(self, text, start=None, on_error=None): + chosen_start = self._verify_start(start) + stream = text if self.skip_lexer else LexerThread(self.lexer, text) + kw = {} if on_error is None else {'on_error': on_error} + return self.parser.parse(stream, chosen_start, **kw) + + def parse_interactive(self, text=None, start=None): + chosen_start = self._verify_start(start) + if self.parser_conf.parser_type != 'lalr': + raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") + stream = text if self.skip_lexer else LexerThread(self.lexer, text) + return self.parser.parse_interactive(stream, chosen_start) + + +def get_frontend(parser, lexer): + assert_config(parser, ('lalr', 'earley', 'cyk')) + if not isinstance(lexer, type): ## + + expected = { + 'lalr': ('standard', 'contextual'), + 'earley': ('standard', 'dynamic', 'dynamic_complete'), + 'cyk': ('standard', ), + }[parser] + assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser) + + return MakeParsingFrontend(parser, lexer) + + +def _get_lexer_callbacks(transformer, terminals): + result = {} + for terminal in terminals: + callback = getattr(transformer, terminal.name, None) + if callback is not None: + result[terminal.name] = callback + return result + +class PostLexConnector: + def __init__(self, lexer, postlexer): + self.lexer = lexer + self.postlexer = postlexer + + def make_lexer_state(self, text): + return self.lexer.make_lexer_state(text) + + def lex(self, lexer_state, parser_state): + i = self.lexer.lex(lexer_state, parser_state) + return self.postlexer.process(i) + + + +def create_traditional_lexer(lexer_conf, parser, postlex): + return TraditionalLexer(lexer_conf) + +def create_contextual_lexer(lexer_conf, parser, postlex): + states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()} + always_accept = postlex.always_accept if postlex else () + return ContextualLexer(lexer_conf, states, always_accept=always_accept) + +def create_lalr_parser(lexer_conf, parser_conf, options=None): + debug = options.debug if options else False + return LALR_Parser(parser_conf, debug=debug) + + +create_earley_parser = NotImplemented +CYK_FrontEnd = NotImplemented + + + +class LarkOptions(Serialize): + #-- + OPTIONS_DOC = """ + **=== General Options ===** + + start + The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start") + debug + Display debug information and extra warnings. Use only when debugging (default: False) + When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed. + transformer + Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) + propagate_positions + Propagates (line, column, end_line, end_column) attributes into all tree branches. + Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating. + maybe_placeholders + When ``True``, the ``[]`` operator returns ``None`` when not matched. + + When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all. + (default= ``False``. Recommended to set to ``True``) + cache + Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now. + + - When ``False``, does nothing (default) + - When ``True``, caches to a temporary file in the local directory + - When given a string, caches to the path pointed by the string + regex + When True, uses the ``regex`` module instead of the stdlib ``re``. + g_regex_flags + Flags that are applied to all terminals (both regex and strings) + keep_all_tokens + Prevent the tree builder from automagically removing "punctuation" tokens (default: False) + tree_class + Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``. + + **=== Algorithm Options ===** + + parser + Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley"). + (there is also a "cyk" option for legacy) + lexer + Decides whether or not to use a lexer stage + + - "auto" (default): Choose for me based on the parser + - "standard": Use a standard lexer + - "contextual": Stronger lexer (only works with parser="lalr") + - "dynamic": Flexible and powerful (only with parser="earley") + - "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible. + ambiguity + Decides how to handle ambiguity in the parse. Only relevant if parser="earley" + + - "resolve": The parser will automatically choose the simplest derivation + (it chooses consistently: greedy for tokens, non-greedy for rules) + - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). + - "forest": The parser will return the root of the shared packed parse forest. + + **=== Misc. / Domain Specific Options ===** + + postlex + Lexer post-processing (Default: None) Only works with the standard and contextual lexers. + priority + How priorities should be evaluated - auto, none, normal, invert (Default: auto) + lexer_callbacks + Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. + use_bytes + Accept an input of type ``bytes`` instead of ``str`` (Python 3 only). + edit_terminals + A callback for editing the terminals before parse. + import_paths + A List of either paths or loader functions to specify from where grammars are imported + source_path + Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading + **=== End of Options ===** + """ + if __doc__: + __doc__ += OPTIONS_DOC + + + ## + + ## + + ## + + ## + + ## + + ## + + ## + + ## + + _defaults = { + 'debug': False, + 'keep_all_tokens': False, + 'tree_class': None, + 'cache': False, + 'postlex': None, + 'parser': 'earley', + 'lexer': 'auto', + 'transformer': None, + 'start': 'start', + 'priority': 'auto', + 'ambiguity': 'auto', + 'regex': False, + 'propagate_positions': False, + 'lexer_callbacks': {}, + 'maybe_placeholders': False, + 'edit_terminals': None, + 'g_regex_flags': 0, + 'use_bytes': False, + 'import_paths': [], + 'source_path': None, + } + + def __init__(self, options_dict): + o = dict(options_dict) + + options = {} + for name, default in self._defaults.items(): + if name in o: + value = o.pop(name) + if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'): + value = bool(value) + else: + value = default + + options[name] = value + + if isinstance(options['start'], STRING_TYPE): + options['start'] = [options['start']] + + self.__dict__['options'] = options + + + assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) + + if self.parser == 'earley' and self.transformer: + raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. ' + 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') + + if o: + raise ConfigurationError("Unknown options: %s" % o.keys()) + + def __getattr__(self, name): + try: + return self.__dict__['options'][name] + except KeyError as e: + raise AttributeError(e) + + def __setattr__(self, name, value): + assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s") + self.options[name] = value + + def serialize(self, memo): + return self.options + + @classmethod + def deserialize(cls, data, memo): + return cls(data) + + +## + +## + +_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'} + +_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) +_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') + + +class PostLex(ABC): + @abstractmethod + def process(self, stream): + return stream + + always_accept = () + + +class Lark(Serialize): + #-- + def __init__(self, grammar, **options): + self.options = LarkOptions(options) + + ## + + use_regex = self.options.regex + if use_regex: + if regex: + re_module = regex + else: + raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.') + else: + re_module = re + + ## + + if self.options.source_path is None: + try: + self.source_path = grammar.name + except AttributeError: + self.source_path = '' + else: + self.source_path = self.options.source_path + + ## + + try: + read = grammar.read + except AttributeError: + pass + else: + grammar = read() + + cache_fn = None + cache_md5 = None + if isinstance(grammar, STRING_TYPE): + self.source_grammar = grammar + if self.options.use_bytes: + if not isascii(grammar): + raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") + if sys.version_info[0] == 2 and self.options.use_bytes != 'force': + raise ConfigurationError("`use_bytes=True` may have issues on python2." + "Use `use_bytes='force'` to use it at your own risk.") + + if self.options.cache: + if self.options.parser != 'lalr': + raise ConfigurationError("cache only works with parser='lalr' for now") + + unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') + options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) + from . import __version__ + s = grammar + options_str + __version__ + str(sys.version_info[:2]) + cache_md5 = hashlib.md5(s.encode('utf8')).hexdigest() + + if isinstance(self.options.cache, STRING_TYPE): + cache_fn = self.options.cache + else: + if self.options.cache is not True: + raise ConfigurationError("cache argument must be bool or str") + ## + + cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % ((cache_md5,) + sys.version_info[:2]) + + if FS.exists(cache_fn): + logger.debug('Loading grammar from cache: %s', cache_fn) + ## + + for name in (set(options) - _LOAD_ALLOWED_OPTIONS): + del options[name] + with FS.open(cache_fn, 'rb') as f: + old_options = self.options + try: + file_md5 = f.readline().rstrip(b'\n') + cached_used_files = pickle.load(f) + if file_md5 == cache_md5.encode('utf8') and verify_used_files(cached_used_files): + cached_parser_data = pickle.load(f) + self._load(cached_parser_data, **options) + return + except Exception: ## + + logger.exception("Failed to load Lark from cache: %r. We will try to carry on." % cache_fn) + + ## + + ## + + self.options = old_options + + + ## + + self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) + else: + assert isinstance(grammar, Grammar) + self.grammar = grammar + + + if self.options.lexer == 'auto': + if self.options.parser == 'lalr': + self.options.lexer = 'contextual' + elif self.options.parser == 'earley': + if self.options.postlex is not None: + logger.info("postlex can't be used with the dynamic lexer, so we use standard instead. " + "Consider using lalr with contextual instead of earley") + self.options.lexer = 'standard' + else: + self.options.lexer = 'dynamic' + elif self.options.parser == 'cyk': + self.options.lexer = 'standard' + else: + assert False, self.options.parser + lexer = self.options.lexer + if isinstance(lexer, type): + assert issubclass(lexer, Lexer) ## + + else: + assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete')) + if self.options.postlex is not None and 'dynamic' in lexer: + raise ConfigurationError("Can't use postlex with a dynamic lexer. Use standard or contextual instead") + + if self.options.ambiguity == 'auto': + if self.options.parser == 'earley': + self.options.ambiguity = 'resolve' + else: + assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s") + + if self.options.priority == 'auto': + self.options.priority = 'normal' + + if self.options.priority not in _VALID_PRIORITY_OPTIONS: + raise ConfigurationError(f"invalid priority option: {self.options.priority!r}. Must be one of {_VALID_PRIORITY_OPTIONS!r}") + assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' + if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: + raise ConfigurationError(f"invalid ambiguity option: {self.options.ambiguity!r}. Must be one of {_VALID_AMBIGUITY_OPTIONS!r}") + + if self.options.parser is None: + terminals_to_keep = '*' + elif self.options.postlex is not None: + terminals_to_keep = set(self.options.postlex.always_accept) + else: + terminals_to_keep = set() + + ## + + self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep) + + if self.options.edit_terminals: + for t in self.terminals: + self.options.edit_terminals(t) + + self._terminals_dict = {t.name: t for t in self.terminals} + + ## + + ## + + if self.options.priority == 'invert': + for rule in self.rules: + if rule.options.priority is not None: + rule.options.priority = -rule.options.priority + ## + + ## + + ## + + elif self.options.priority is None: + for rule in self.rules: + if rule.options.priority is not None: + rule.options.priority = None + + ## + + self.lexer_conf = LexerConf( + self.terminals, re_module, self.ignore_tokens, self.options.postlex, + self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes + ) + + if self.options.parser: + self.parser = self._build_parser() + elif lexer: + self.lexer = self._build_lexer() + + if cache_fn: + logger.debug('Saving grammar to cache: %s', cache_fn) + with FS.open(cache_fn, 'wb') as f: + f.write(cache_md5.encode('utf8') + b'\n') + pickle.dump(used_files, f) + self.save(f) + + if __doc__: + __doc__ += "\n\n" + LarkOptions.OPTIONS_DOC + + __serialize_fields__ = 'parser', 'rules', 'options' + + def _build_lexer(self, dont_ignore=False): + lexer_conf = self.lexer_conf + if dont_ignore: + from copy import copy + lexer_conf = copy(lexer_conf) + lexer_conf.ignore = () + return TraditionalLexer(lexer_conf) + + def _prepare_callbacks(self): + self._callbacks = {} + ## + + if self.options.ambiguity != 'forest': + self._parse_tree_builder = ParseTreeBuilder( + self.rules, + self.options.tree_class or Tree, + self.options.propagate_positions, + self.options.parser != 'lalr' and self.options.ambiguity == 'explicit', + self.options.maybe_placeholders + ) + self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) + self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals)) + + def _build_parser(self): + self._prepare_callbacks() + parser_class = get_frontend(self.options.parser, self.options.lexer) + parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) + return parser_class(self.lexer_conf, parser_conf, options=self.options) + + def save(self, f): + #-- + data, m = self.memo_serialize([TerminalDef, Rule]) + pickle.dump({'data': data, 'memo': m}, f, protocol=pickle.HIGHEST_PROTOCOL) + + @classmethod + def load(cls, f): + #-- + inst = cls.__new__(cls) + return inst._load(f) + + def _deserialize_lexer_conf(self, data, memo, options): + lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo) + lexer_conf.callbacks = options.lexer_callbacks or {} + lexer_conf.re_module = regex if options.regex else re + lexer_conf.use_bytes = options.use_bytes + lexer_conf.g_regex_flags = options.g_regex_flags + lexer_conf.skip_validation = True + lexer_conf.postlex = options.postlex + return lexer_conf + + def _load(self, f, **kwargs): + if isinstance(f, dict): + d = f + else: + d = pickle.load(f) + memo_json = d['memo'] + data = d['data'] + + assert memo_json + memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) + options = dict(data['options']) + if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): + raise ConfigurationError("Some options are not allowed when loading a Parser: {}" + .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS)) + options.update(kwargs) + self.options = LarkOptions.deserialize(options, memo) + self.rules = [Rule.deserialize(r, memo) for r in data['rules']] + self.source_path = '' + parser_class = get_frontend(self.options.parser, self.options.lexer) + self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options) + self.terminals = self.lexer_conf.terminals + self._prepare_callbacks() + self._terminals_dict = {t.name: t for t in self.terminals} + self.parser = parser_class.deserialize( + data['parser'], + memo, + self.lexer_conf, + self._callbacks, + self.options, ## + + ) + return self + + @classmethod + def _load_from_dict(cls, data, memo, **kwargs): + inst = cls.__new__(cls) + return inst._load({'data': data, 'memo': memo}, **kwargs) + + @classmethod + def open(cls, grammar_filename, rel_to=None, **options): + #-- + if rel_to: + basepath = os.path.dirname(rel_to) + grammar_filename = os.path.join(basepath, grammar_filename) + with open(grammar_filename, encoding='utf8') as f: + return cls(f, **options) + + @classmethod + def open_from_package(cls, package, grammar_path, search_paths=("",), **options): + #-- + package_loader = FromPackageLoader(package, search_paths) + full_path, text = package_loader(None, grammar_path) + options.setdefault('source_path', full_path) + options.setdefault('import_paths', []) + options['import_paths'].append(package_loader) + return cls(text, **options) + + def __repr__(self): + return f'Lark(open({self.source_path!r}), parser={self.options.parser!r}, lexer={self.options.lexer!r}, ...)' + + + def lex(self, text, dont_ignore=False): + #-- + if not hasattr(self, 'lexer') or dont_ignore: + lexer = self._build_lexer(dont_ignore) + else: + lexer = self.lexer + lexer_thread = LexerThread(lexer, text) + stream = lexer_thread.lex(None) + if self.options.postlex: + return self.options.postlex.process(stream) + return stream + + def get_terminal(self, name): + #-- + return self._terminals_dict[name] + + def parse_interactive(self, text=None, start=None): + #-- + return self.parser.parse_interactive(text, start=start) + + def parse(self, text, start=None, on_error=None): + #-- + return self.parser.parse(text, start=start, on_error=on_error) + + @property + def source(self): + warn("Attribute Lark.source was renamed to Lark.source_path", DeprecationWarning) + return self.source_path + + @source.setter + def source(self, value): + self.source_path = value + + @property + def grammar_source(self): + warn("Attribute Lark.grammar_source was renamed to Lark.source_grammar", DeprecationWarning) + return self.source_grammar + + @grammar_source.setter + def grammar_source(self, value): + self.source_grammar = value + + + +class DedentError(LarkError): + pass + +class Indenter(PostLex): + def __init__(self): + self.paren_level = None + self.indent_level = None + assert self.tab_len > 0 + + def handle_NL(self, token): + if self.paren_level > 0: + return + + yield token + + indent_str = token.rsplit('\n', 1)[1] ## + + indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len + + if indent > self.indent_level[-1]: + self.indent_level.append(indent) + yield Token.new_borrow_pos(self.INDENT_type, indent_str, token) + else: + while indent < self.indent_level[-1]: + self.indent_level.pop() + yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token) + + if indent != self.indent_level[-1]: + raise DedentError(f'Unexpected dedent to column {indent}. Expected dedent to {self.indent_level[-1]}') + + def _process(self, stream): + for token in stream: + if token.type == self.NL_type: + yield from self.handle_NL(token) + else: + yield token + + if token.type in self.OPEN_PAREN_types: + self.paren_level += 1 + elif token.type in self.CLOSE_PAREN_types: + self.paren_level -= 1 + assert self.paren_level >= 0 + + while len(self.indent_level) > 1: + self.indent_level.pop() + yield Token(self.DEDENT_type, '') + + assert self.indent_level == [0], self.indent_level + + def process(self, stream): + self.paren_level = 0 + self.indent_level = [0] + return self._process(stream) + + ## + + @property + def always_accept(self): + return (self.NL_type,) + + +import pickle, zlib, base64 +DATA = ( +{'parser': {'lexer_conf': {'terminals': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}, {'@': 12}, {'@': 13}, {'@': 14}, {'@': 15}, {'@': 16}, {'@': 17}, {'@': 18}, {'@': 19}, {'@': 20}, {'@': 21}, {'@': 22}, {'@': 23}, {'@': 24}, {'@': 25}, {'@': 26}, {'@': 27}, {'@': 28}, {'@': 29}, {'@': 30}, {'@': 31}, {'@': 32}, {'@': 33}, {'@': 34}], 'ignore': ['WS'], 'g_regex_flags': 0, 'use_bytes': False, 'lexer_type': 'contextual', '__type__': 'LexerConf'}, 'parser_conf': {'rules': [{'@': 35}, {'@': 36}, {'@': 37}, {'@': 38}, {'@': 39}, {'@': 40}, {'@': 41}, {'@': 42}, {'@': 43}, {'@': 44}, {'@': 45}, {'@': 46}, {'@': 47}, {'@': 48}, {'@': 49}, {'@': 50}, {'@': 51}, {'@': 52}, {'@': 53}, {'@': 54}, {'@': 55}, {'@': 56}, {'@': 57}, {'@': 58}, {'@': 59}, {'@': 60}, {'@': 61}, {'@': 62}, {'@': 63}, {'@': 64}, {'@': 65}, {'@': 66}, {'@': 67}, {'@': 68}, {'@': 69}, {'@': 70}, {'@': 71}, {'@': 72}, {'@': 73}, {'@': 74}, {'@': 75}, {'@': 76}, {'@': 77}, {'@': 78}, {'@': 79}, {'@': 80}, {'@': 81}, {'@': 82}, {'@': 83}, {'@': 84}, {'@': 85}, {'@': 86}, {'@': 87}, {'@': 88}, {'@': 89}, {'@': 90}, {'@': 91}, {'@': 92}, {'@': 93}, {'@': 94}, {'@': 95}, {'@': 96}, {'@': 97}, {'@': 98}, {'@': 99}, {'@': 100}, {'@': 101}, {'@': 102}, {'@': 103}, {'@': 104}, {'@': 105}, {'@': 106}, {'@': 107}, {'@': 108}, {'@': 109}, {'@': 110}, {'@': 111}, {'@': 112}, {'@': 113}, {'@': 114}, {'@': 115}, {'@': 116}, {'@': 117}, {'@': 118}, {'@': 119}, {'@': 120}, {'@': 121}, {'@': 122}, {'@': 123}, {'@': 124}, {'@': 125}, {'@': 126}, {'@': 127}, {'@': 128}, {'@': 129}, {'@': 130}], 'start': ['start'], 'parser_type': 'lalr', '__type__': 'ParserConf'}, 'parser': {'tokens': {0: 'RSQB', 1: 'LSQB', 2: 'COMMA', 3: 'RBRACE', 4: '$END', 5: 'RPAR', 6: 'INT', 7: 'key', 8: 'ESCAPED_STRING', 9: 'CNAME', 10: 'pair', 11: 'LBRACE', 12: 'json_object', 13: 'EQUAL', 14: 'PARAMETERS', 15: 'OPTION', 16: 'tuple', 17: 'numpytype_name', 18: 'type', 19: 'tuple_parameters', 20: 'TUPLE', 21: 'named_pair', 22: 'list_parameters', 23: 'option1', 24: 'BYTE', 25: 'union', 26: 'regulartype', 27: 'unknowntype', 28: 'option2', 29: 'named_pairs', 30: 'varlen_string', 31: 'UNION', 32: 'char', 33: 'VAR', 34: 'record_parameters', 35: 'STRING', 36: 'CATEGORICAL', 37: 'record', 38: 'DTYPE', 39: 'STRUCT', 40: 'categorical', 41: 'UNKNOWN', 42: 'named0', 43: 'named', 44: 'numpytype', 45: 'named_key', 46: 'CHAR', 47: 'LPAR', 48: 'TIMEDELTA64', 49: 'named_types', 50: 'byte', 51: 'listtype', 52: 'DATETIME64', 53: 'BYTES', 54: 'fixedlen_string', 55: 'QMARK', 56: 'types', 57: 'pairs', 58: '__pairs_star_1', 59: 'json', 60: 'json_array', 61: 'FALSE', 62: 'TRUE', 63: 'NULL', 64: 'SIGNED_NUMBER', 65: 'COLON', 66: '__types_star_0', 67: 'json_pair', 68: 'STAR', 69: '__json_object_star_3', 70: '__json_array_star_2', 71: 'start', 72: 'TYPE'}, 'states': {0: {0: (1, {'@': 57}), 1: (1, {'@': 57}), 2: (1, {'@': 57}), 3: (1, {'@': 57}), 4: (1, {'@': 57}), 5: (1, {'@': 57})}, 1: {0: (1, {'@': 106}), 2: (1, {'@': 106}), 3: (1, {'@': 106}), 4: (1, {'@': 106}), 5: (1, {'@': 106})}, 2: {0: (0, 165)}, 3: {1: (0, 160)}, 4: {6: (0, 72)}, 5: {7: (0, 52), 8: (0, 46), 9: (0, 47), 10: (0, 42)}, 6: {0: (0, 91)}, 7: {11: (0, 189), 12: (0, 63)}, 8: {11: (0, 189), 12: (0, 59)}, 9: {0: (1, {'@': 119}), 2: (1, {'@': 119}), 3: (1, {'@': 119})}, 10: {11: (0, 189), 12: (0, 65)}, 11: {3: (0, 9), 2: (0, 89)}, 12: {0: (1, {'@': 44}), 2: (1, {'@': 44}), 3: (1, {'@': 44}), 4: (1, {'@': 44}), 5: (1, {'@': 44})}, 13: {13: (0, 138)}, 14: {1: (0, 69), 0: (1, {'@': 61}), 2: (1, {'@': 61}), 3: (1, {'@': 61}), 4: (1, {'@': 61}), 5: (1, {'@': 61})}, 15: {13: (0, 119)}, 16: {1: (0, 164), 14: (0, 182), 15: (0, 140), 16: (0, 147), 17: (0, 197), 18: (0, 177), 19: (0, 174), 20: (0, 3), 21: (0, 201), 0: (0, 114), 22: (0, 22), 23: (0, 12), 9: (0, 123), 24: (0, 145), 25: (0, 116), 26: (0, 96), 27: (0, 200), 28: (0, 137), 29: (0, 112), 30: (0, 153), 31: (0, 120), 32: (0, 146), 11: (0, 32), 33: (0, 64), 34: (0, 186), 35: (0, 51), 36: (0, 143), 37: (0, 166), 38: (0, 0), 39: (0, 45), 40: (0, 40), 6: (0, 75), 41: (0, 14), 42: (0, 55), 43: (0, 128), 44: (0, 131), 45: (0, 98), 46: (0, 107), 47: (0, 181), 48: (0, 159), 49: (0, 104), 50: (0, 133), 51: (0, 198), 8: (0, 126), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191)}, 17: {21: (0, 201), 14: (0, 15), 8: (0, 126), 45: (0, 98), 29: (0, 31), 9: (0, 93)}, 18: {3: (1, {'@': 122}), 2: (1, {'@': 122})}, 19: {1: (0, 164), 15: (0, 140), 36: (0, 143), 18: (0, 60), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 38: (0, 0), 39: (0, 45), 56: (0, 155), 33: (0, 64), 40: (0, 40), 0: (0, 130), 6: (0, 75), 9: (0, 29), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 25: (0, 116), 26: (0, 96), 44: (0, 131), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 51: (0, 198), 28: (0, 137), 30: (0, 153), 31: (0, 120), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 34: (0, 186)}, 20: {0: (1, {'@': 112}), 2: (1, {'@': 112}), 3: (1, {'@': 112})}, 21: {0: (1, {'@': 90}), 2: (1, {'@': 90}), 3: (1, {'@': 90}), 4: (1, {'@': 90}), 5: (1, {'@': 90})}, 22: {0: (1, {'@': 53}), 2: (1, {'@': 53}), 3: (1, {'@': 53}), 4: (1, {'@': 53}), 5: (1, {'@': 53})}, 23: {0: (1, {'@': 74}), 2: (1, {'@': 74}), 3: (1, {'@': 74}), 4: (1, {'@': 74}), 5: (1, {'@': 74})}, 24: {14: (0, 179)}, 25: {0: (1, {'@': 109}), 2: (1, {'@': 109}), 3: (1, {'@': 109})}, 26: {0: (1, {'@': 118}), 2: (1, {'@': 118}), 3: (1, {'@': 118})}, 27: {0: (1, {'@': 111}), 2: (1, {'@': 111}), 3: (1, {'@': 111})}, 28: {0: (1, {'@': 128}), 2: (1, {'@': 128})}, 29: {1: (0, 16)}, 30: {3: (0, 170), 7: (0, 52), 57: (0, 193), 10: (0, 33), 8: (0, 46), 9: (0, 47)}, 31: {0: (1, {'@': 99})}, 32: {7: (0, 52), 10: (0, 33), 8: (0, 46), 9: (0, 47), 3: (0, 58), 57: (0, 76)}, 33: {58: (0, 157), 2: (0, 149), 3: (1, {'@': 84})}, 34: {0: (1, {'@': 96})}, 35: {59: (0, 80), 12: (0, 44), 60: (0, 199), 1: (0, 35), 11: (0, 189), 0: (0, 26), 61: (0, 20), 62: (0, 27), 8: (0, 25), 63: (0, 136), 64: (0, 127)}, 36: {0: (0, 152)}, 37: {0: (0, 122)}, 38: {0: (0, 50), 2: (0, 77)}, 39: {0: (1, {'@': 77}), 2: (1, {'@': 77}), 3: (1, {'@': 77}), 4: (1, {'@': 77}), 5: (1, {'@': 77})}, 40: {0: (1, {'@': 54}), 2: (1, {'@': 54}), 3: (1, {'@': 54}), 4: (1, {'@': 54}), 5: (1, {'@': 54})}, 41: {0: (1, {'@': 102}), 2: (1, {'@': 102})}, 42: {3: (1, {'@': 126}), 2: (1, {'@': 126})}, 43: {61: (0, 20), 11: (0, 189), 62: (0, 27), 8: (0, 25), 12: (0, 44), 1: (0, 35), 60: (0, 199), 63: (0, 136), 64: (0, 127), 59: (0, 90)}, 44: {0: (1, {'@': 115}), 2: (1, {'@': 115}), 3: (1, {'@': 115})}, 45: {1: (0, 113)}, 46: {65: (1, {'@': 86})}, 47: {65: (1, {'@': 87})}, 48: {1: (0, 164), 15: (0, 140), 36: (0, 143), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 38: (0, 0), 39: (0, 45), 40: (0, 40), 9: (0, 29), 6: (0, 75), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 51: (0, 198), 28: (0, 137), 30: (0, 153), 31: (0, 120), 34: (0, 186), 18: (0, 6), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 33: (0, 64)}, 49: {0: (1, {'@': 120}), 2: (1, {'@': 120}), 3: (1, {'@': 120})}, 50: {0: (1, {'@': 116}), 2: (1, {'@': 116}), 3: (1, {'@': 116})}, 51: {1: (0, 129), 0: (1, {'@': 64}), 2: (1, {'@': 64}), 3: (1, {'@': 64}), 4: (1, {'@': 64}), 5: (1, {'@': 64})}, 52: {65: (0, 74)}, 53: {13: (0, 121)}, 54: {0: (0, 97)}, 55: {0: (1, {'@': 50}), 2: (1, {'@': 50}), 3: (1, {'@': 50}), 4: (1, {'@': 50}), 5: (1, {'@': 50})}, 56: {11: (0, 189), 12: (0, 139)}, 57: {0: (0, 190), 2: (0, 168)}, 58: {0: (1, {'@': 82}), 2: (1, {'@': 82}), 3: (1, {'@': 82}), 4: (1, {'@': 82}), 5: (1, {'@': 82})}, 59: {0: (0, 21)}, 60: {66: (0, 106), 2: (0, 108), 0: (1, {'@': 76}), 5: (1, {'@': 76})}, 61: {0: (1, {'@': 95}), 2: (1, {'@': 95}), 3: (1, {'@': 95}), 4: (1, {'@': 95}), 5: (1, {'@': 95})}, 62: {8: (0, 163), 67: (0, 71)}, 63: {0: (0, 135)}, 64: {68: (0, 148)}, 65: {0: (0, 82)}, 66: {0: (1, {'@': 100})}, 67: {0: (1, {'@': 88}), 2: (1, {'@': 88}), 3: (1, {'@': 88}), 4: (1, {'@': 88}), 5: (1, {'@': 88})}, 68: {0: (0, 1), 1: (0, 164), 15: (0, 140), 36: (0, 143), 16: (0, 147), 17: (0, 197), 37: (0, 166), 18: (0, 177), 19: (0, 174), 20: (0, 3), 39: (0, 45), 49: (0, 54), 40: (0, 40), 6: (0, 75), 9: (0, 29), 38: (0, 0), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 51: (0, 198), 28: (0, 137), 30: (0, 153), 31: (0, 120), 34: (0, 186), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 33: (0, 64)}, 69: {14: (0, 87)}, 70: {0: (1, {'@': 72}), 2: (1, {'@': 72}), 3: (1, {'@': 72}), 4: (1, {'@': 72}), 5: (1, {'@': 72})}, 71: {3: (1, {'@': 129}), 2: (1, {'@': 129})}, 72: {0: (0, 103)}, 73: {0: (1, {'@': 97})}, 74: {1: (0, 164), 15: (0, 140), 36: (0, 143), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 38: (0, 0), 39: (0, 45), 40: (0, 40), 9: (0, 29), 6: (0, 75), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 18: (0, 102), 51: (0, 198), 28: (0, 137), 30: (0, 153), 31: (0, 120), 34: (0, 186), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 33: (0, 64)}, 75: {68: (0, 175)}, 76: {3: (0, 132)}, 77: {61: (0, 20), 11: (0, 189), 62: (0, 27), 8: (0, 25), 59: (0, 28), 12: (0, 44), 1: (0, 35), 60: (0, 199), 63: (0, 136), 64: (0, 127)}, 78: {0: (1, {'@': 117}), 2: (1, {'@': 117}), 3: (1, {'@': 117})}, 79: {69: (0, 11), 3: (0, 49), 2: (0, 62)}, 80: {70: (0, 38), 0: (0, 78), 2: (0, 43)}, 81: {0: (1, {'@': 55}), 2: (1, {'@': 55}), 3: (1, {'@': 55}), 4: (1, {'@': 55}), 5: (1, {'@': 55})}, 82: {0: (1, {'@': 71}), 2: (1, {'@': 71}), 3: (1, {'@': 71}), 4: (1, {'@': 71}), 5: (1, {'@': 71})}, 83: {0: (0, 67)}, 84: {0: (0, 70), 2: (0, 24)}, 85: {0: (0, 171)}, 86: {0: (0, 39)}, 87: {13: (0, 173)}, 88: {0: (1, {'@': 91}), 2: (1, {'@': 91}), 3: (1, {'@': 91}), 4: (1, {'@': 91}), 5: (1, {'@': 91})}, 89: {8: (0, 163), 67: (0, 162)}, 90: {0: (1, {'@': 127}), 2: (1, {'@': 127})}, 91: {0: (1, {'@': 108}), 2: (1, {'@': 108}), 3: (1, {'@': 108}), 4: (1, {'@': 108}), 5: (1, {'@': 108})}, 92: {13: (0, 183)}, 93: {65: (1, {'@': 104})}, 94: {1: (0, 164), 15: (0, 140), 36: (0, 143), 16: (0, 147), 17: (0, 197), 37: (0, 166), 18: (0, 177), 19: (0, 174), 20: (0, 3), 39: (0, 45), 40: (0, 40), 9: (0, 29), 6: (0, 75), 38: (0, 0), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 49: (0, 34), 48: (0, 159), 50: (0, 133), 14: (0, 53), 51: (0, 198), 28: (0, 137), 30: (0, 153), 31: (0, 120), 34: (0, 186), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 33: (0, 64)}, 95: {14: (0, 184)}, 96: {0: (1, {'@': 38}), 2: (1, {'@': 38}), 3: (1, {'@': 38}), 4: (1, {'@': 38}), 5: (1, {'@': 38})}, 97: {0: (1, {'@': 105}), 2: (1, {'@': 105}), 3: (1, {'@': 105}), 4: (1, {'@': 105}), 5: (1, {'@': 105})}, 98: {65: (0, 150)}, 99: {0: (1, {'@': 121}), 2: (1, {'@': 121}), 3: (1, {'@': 121})}, 100: {71: (0, 169), 1: (0, 164), 15: (0, 140), 36: (0, 143), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 38: (0, 0), 39: (0, 45), 40: (0, 40), 33: (0, 64), 6: (0, 75), 9: (0, 29), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 51: (0, 198), 28: (0, 137), 30: (0, 153), 18: (0, 101), 31: (0, 120), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 34: (0, 186)}, 101: {4: (1, {'@': 35})}, 102: {3: (1, {'@': 85}), 2: (1, {'@': 85})}, 103: {0: (1, {'@': 67}), 2: (1, {'@': 67}), 3: (1, {'@': 67}), 4: (1, {'@': 67}), 5: (1, {'@': 67})}, 104: {0: (0, 178)}, 105: {14: (0, 194)}, 106: {2: (0, 115), 0: (1, {'@': 75}), 5: (1, {'@': 75})}, 107: {0: (1, {'@': 68}), 2: (1, {'@': 68}), 3: (1, {'@': 68}), 4: (1, {'@': 68}), 5: (1, {'@': 68})}, 108: {1: (0, 164), 15: (0, 140), 36: (0, 143), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 38: (0, 0), 39: (0, 45), 40: (0, 40), 9: (0, 29), 6: (0, 75), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 51: (0, 198), 28: (0, 137), 30: (0, 153), 18: (0, 187), 31: (0, 120), 34: (0, 186), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 33: (0, 64)}, 109: {3: (1, {'@': 125}), 2: (1, {'@': 125})}, 110: {2: (0, 172)}, 111: {5: (0, 117)}, 112: {0: (0, 61)}, 113: {11: (0, 30)}, 114: {0: (1, {'@': 93}), 2: (1, {'@': 93}), 3: (1, {'@': 93}), 4: (1, {'@': 93}), 5: (1, {'@': 93})}, 115: {1: (0, 164), 15: (0, 140), 36: (0, 143), 18: (0, 134), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 38: (0, 0), 39: (0, 45), 40: (0, 40), 9: (0, 29), 6: (0, 75), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 51: (0, 198), 28: (0, 137), 30: (0, 153), 31: (0, 120), 34: (0, 186), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 33: (0, 64)}, 116: {0: (1, {'@': 52}), 2: (1, {'@': 52}), 3: (1, {'@': 52}), 4: (1, {'@': 52}), 5: (1, {'@': 52})}, 117: {0: (1, {'@': 73}), 2: (1, {'@': 73}), 3: (1, {'@': 73}), 4: (1, {'@': 73}), 5: (1, {'@': 73})}, 118: {2: (0, 158), 0: (0, 142)}, 119: {12: (0, 66), 11: (0, 189)}, 120: {1: (0, 68)}, 121: {11: (0, 189), 12: (0, 73)}, 122: {0: (1, {'@': 107}), 2: (1, {'@': 107}), 3: (1, {'@': 107}), 4: (1, {'@': 107}), 5: (1, {'@': 107})}, 123: {1: (0, 16), 65: (1, {'@': 104})}, 124: {13: (0, 156)}, 125: {1: (0, 164), 15: (0, 140), 36: (0, 143), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 38: (0, 0), 39: (0, 45), 40: (0, 40), 33: (0, 64), 9: (0, 29), 6: (0, 75), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 25: (0, 116), 26: (0, 96), 44: (0, 131), 18: (0, 84), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 51: (0, 198), 28: (0, 137), 30: (0, 153), 31: (0, 120), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 34: (0, 186)}, 126: {65: (1, {'@': 103})}, 127: {0: (1, {'@': 110}), 2: (1, {'@': 110}), 3: (1, {'@': 110})}, 128: {0: (1, {'@': 51}), 2: (1, {'@': 51}), 3: (1, {'@': 51}), 4: (1, {'@': 51}), 5: (1, {'@': 51})}, 129: {6: (0, 85)}, 130: {0: (0, 196), 2: (0, 105)}, 131: {0: (1, {'@': 36}), 2: (1, {'@': 36}), 3: (1, {'@': 36}), 4: (1, {'@': 36}), 5: (1, {'@': 36})}, 132: {0: (1, {'@': 81}), 2: (1, {'@': 81}), 3: (1, {'@': 81}), 4: (1, {'@': 81}), 5: (1, {'@': 81})}, 133: {0: (1, {'@': 43}), 2: (1, {'@': 43}), 3: (1, {'@': 43}), 4: (1, {'@': 43}), 5: (1, {'@': 43})}, 134: {0: (1, {'@': 124}), 5: (1, {'@': 124}), 2: (1, {'@': 124})}, 135: {0: (1, {'@': 79}), 2: (1, {'@': 79}), 3: (1, {'@': 79}), 4: (1, {'@': 79}), 5: (1, {'@': 79})}, 136: {0: (1, {'@': 113}), 2: (1, {'@': 113}), 3: (1, {'@': 113})}, 137: {0: (1, {'@': 45}), 2: (1, {'@': 45}), 3: (1, {'@': 45}), 4: (1, {'@': 45}), 5: (1, {'@': 45})}, 138: {11: (0, 189), 12: (0, 83)}, 139: {0: (0, 81)}, 140: {1: (0, 125)}, 141: {0: (1, {'@': 58}), 1: (1, {'@': 58}), 2: (1, {'@': 58}), 3: (1, {'@': 58}), 4: (1, {'@': 58}), 5: (1, {'@': 58})}, 142: {0: (1, {'@': 78}), 2: (1, {'@': 78}), 3: (1, {'@': 78}), 4: (1, {'@': 78}), 5: (1, {'@': 78})}, 143: {1: (0, 192)}, 144: {13: (0, 56)}, 145: {0: (1, {'@': 69}), 2: (1, {'@': 69}), 3: (1, {'@': 69}), 4: (1, {'@': 69}), 5: (1, {'@': 69})}, 146: {0: (1, {'@': 42}), 2: (1, {'@': 42}), 3: (1, {'@': 42}), 4: (1, {'@': 42}), 5: (1, {'@': 42})}, 147: {0: (1, {'@': 46}), 2: (1, {'@': 46}), 3: (1, {'@': 46}), 4: (1, {'@': 46}), 5: (1, {'@': 46})}, 148: {1: (0, 164), 15: (0, 140), 36: (0, 143), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 38: (0, 0), 39: (0, 45), 40: (0, 40), 9: (0, 29), 6: (0, 75), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 18: (0, 161), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 51: (0, 198), 28: (0, 137), 30: (0, 153), 31: (0, 120), 34: (0, 186), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 33: (0, 64)}, 149: {10: (0, 109), 8: (0, 46), 7: (0, 52), 9: (0, 47)}, 150: {1: (0, 164), 15: (0, 140), 36: (0, 143), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 38: (0, 0), 39: (0, 45), 40: (0, 40), 9: (0, 29), 6: (0, 75), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 51: (0, 198), 28: (0, 137), 30: (0, 153), 31: (0, 120), 34: (0, 186), 18: (0, 41), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 33: (0, 64)}, 151: {13: (0, 48)}, 152: {0: (1, {'@': 92}), 2: (1, {'@': 92}), 3: (1, {'@': 92}), 4: (1, {'@': 92}), 5: (1, {'@': 92})}, 153: {0: (1, {'@': 40}), 2: (1, {'@': 40}), 3: (1, {'@': 40}), 4: (1, {'@': 40}), 5: (1, {'@': 40})}, 154: {11: (0, 189), 12: (0, 36)}, 155: {0: (0, 118)}, 156: {11: (0, 189), 12: (0, 86)}, 157: {2: (0, 5), 3: (1, {'@': 83})}, 158: {14: (0, 124)}, 159: {0: (1, {'@': 59}), 1: (1, {'@': 59}), 2: (1, {'@': 59}), 3: (1, {'@': 59}), 4: (1, {'@': 59}), 5: (1, {'@': 59})}, 160: {1: (0, 19)}, 161: {0: (1, {'@': 63}), 2: (1, {'@': 63}), 3: (1, {'@': 63}), 4: (1, {'@': 63}), 5: (1, {'@': 63})}, 162: {3: (1, {'@': 130}), 2: (1, {'@': 130})}, 163: {65: (0, 176)}, 164: {1: (0, 164), 15: (0, 140), 36: (0, 143), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 38: (0, 0), 39: (0, 45), 40: (0, 40), 9: (0, 29), 6: (0, 75), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 51: (0, 198), 28: (0, 137), 30: (0, 153), 31: (0, 120), 34: (0, 186), 18: (0, 110), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 33: (0, 64)}, 165: {0: (1, {'@': 60}), 2: (1, {'@': 60}), 3: (1, {'@': 60}), 4: (1, {'@': 60}), 5: (1, {'@': 60})}, 166: {0: (1, {'@': 48}), 2: (1, {'@': 48}), 3: (1, {'@': 48}), 4: (1, {'@': 48}), 5: (1, {'@': 48})}, 167: {0: (1, {'@': 41}), 2: (1, {'@': 41}), 3: (1, {'@': 41}), 4: (1, {'@': 41}), 5: (1, {'@': 41})}, 168: {14: (0, 13)}, 169: {}, 170: {2: (0, 95), 0: (0, 88)}, 171: {0: (1, {'@': 66}), 2: (1, {'@': 66}), 3: (1, {'@': 66}), 4: (1, {'@': 66}), 5: (1, {'@': 66})}, 172: {14: (0, 92)}, 173: {12: (0, 2), 11: (0, 189)}, 174: {0: (1, {'@': 47}), 2: (1, {'@': 47}), 3: (1, {'@': 47}), 4: (1, {'@': 47}), 5: (1, {'@': 47})}, 175: {1: (0, 164), 15: (0, 140), 36: (0, 143), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 38: (0, 0), 39: (0, 45), 40: (0, 40), 9: (0, 29), 6: (0, 75), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 18: (0, 185), 50: (0, 133), 51: (0, 198), 28: (0, 137), 30: (0, 153), 31: (0, 120), 34: (0, 186), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 33: (0, 64)}, 176: {59: (0, 18), 61: (0, 20), 11: (0, 189), 62: (0, 27), 8: (0, 25), 12: (0, 44), 1: (0, 35), 60: (0, 199), 63: (0, 136), 64: (0, 127)}, 177: {2: (0, 94), 0: (1, {'@': 98})}, 178: {0: (1, {'@': 94}), 2: (1, {'@': 94}), 3: (1, {'@': 94}), 4: (1, {'@': 94}), 5: (1, {'@': 94})}, 179: {13: (0, 10)}, 180: {1: (0, 4), 0: (1, {'@': 65}), 2: (1, {'@': 65}), 3: (1, {'@': 65}), 4: (1, {'@': 65}), 5: (1, {'@': 65})}, 181: {1: (0, 164), 15: (0, 140), 36: (0, 143), 18: (0, 60), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 5: (0, 23), 38: (0, 0), 39: (0, 45), 40: (0, 40), 6: (0, 75), 9: (0, 29), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 56: (0, 111), 51: (0, 198), 28: (0, 137), 30: (0, 153), 31: (0, 120), 34: (0, 186), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 33: (0, 64)}, 182: {13: (0, 154)}, 183: {11: (0, 189), 12: (0, 37)}, 184: {13: (0, 8)}, 185: {0: (1, {'@': 62}), 2: (1, {'@': 62}), 3: (1, {'@': 62}), 4: (1, {'@': 62}), 5: (1, {'@': 62})}, 186: {0: (1, {'@': 49}), 2: (1, {'@': 49}), 3: (1, {'@': 49}), 4: (1, {'@': 49}), 5: (1, {'@': 49})}, 187: {0: (1, {'@': 123}), 5: (1, {'@': 123}), 2: (1, {'@': 123})}, 188: {0: (1, {'@': 70}), 2: (1, {'@': 70}), 3: (1, {'@': 70}), 4: (1, {'@': 70}), 5: (1, {'@': 70})}, 189: {67: (0, 79), 3: (0, 99), 8: (0, 163)}, 190: {0: (1, {'@': 89}), 2: (1, {'@': 89}), 3: (1, {'@': 89}), 4: (1, {'@': 89}), 5: (1, {'@': 89})}, 191: {1: (0, 164), 15: (0, 140), 36: (0, 143), 16: (0, 147), 17: (0, 197), 37: (0, 166), 19: (0, 174), 20: (0, 3), 38: (0, 0), 39: (0, 45), 40: (0, 40), 9: (0, 29), 6: (0, 75), 35: (0, 51), 22: (0, 22), 23: (0, 12), 41: (0, 14), 11: (0, 32), 42: (0, 55), 24: (0, 145), 43: (0, 128), 44: (0, 131), 26: (0, 96), 25: (0, 116), 46: (0, 107), 27: (0, 200), 47: (0, 181), 48: (0, 159), 50: (0, 133), 51: (0, 198), 18: (0, 188), 28: (0, 137), 30: (0, 153), 31: (0, 120), 34: (0, 186), 32: (0, 146), 52: (0, 141), 53: (0, 180), 54: (0, 167), 55: (0, 191), 33: (0, 64)}, 192: {72: (0, 151)}, 193: {3: (0, 57)}, 194: {13: (0, 7)}, 195: {14: (0, 144)}, 196: {0: (1, {'@': 80}), 2: (1, {'@': 80}), 3: (1, {'@': 80}), 4: (1, {'@': 80}), 5: (1, {'@': 80})}, 197: {1: (0, 195), 0: (1, {'@': 56}), 2: (1, {'@': 56}), 3: (1, {'@': 56}), 4: (1, {'@': 56}), 5: (1, {'@': 56})}, 198: {0: (1, {'@': 39}), 2: (1, {'@': 39}), 3: (1, {'@': 39}), 4: (1, {'@': 39}), 5: (1, {'@': 39})}, 199: {0: (1, {'@': 114}), 2: (1, {'@': 114}), 3: (1, {'@': 114})}, 200: {0: (1, {'@': 37}), 2: (1, {'@': 37}), 3: (1, {'@': 37}), 4: (1, {'@': 37}), 5: (1, {'@': 37})}, 201: {2: (0, 17), 0: (1, {'@': 101})}}, 'start_states': {'start': 100}, 'end_states': {'start': 169}}, 'options': {'debug': False, 'keep_all_tokens': False, 'tree_class': None, 'cache': False, 'postlex': None, 'parser': 'lalr', 'lexer': 'contextual', 'transformer': None, 'start': ['start'], 'priority': 'normal', 'ambiguity': 'auto', 'regex': False, 'propagate_positions': False, 'lexer_callbacks': {}, 'maybe_placeholders': False, 'edit_terminals': None, 'g_regex_flags': 0, 'use_bytes': False, 'import_paths': [], 'source_path': None}, '__type__': 'ParsingFrontend'}, 'rules': [{'@': 35}, {'@': 36}, {'@': 37}, {'@': 38}, {'@': 39}, {'@': 40}, {'@': 41}, {'@': 42}, {'@': 43}, {'@': 44}, {'@': 45}, {'@': 46}, {'@': 47}, {'@': 48}, {'@': 49}, {'@': 50}, {'@': 51}, {'@': 52}, {'@': 53}, {'@': 54}, {'@': 55}, {'@': 56}, {'@': 57}, {'@': 58}, {'@': 59}, {'@': 60}, {'@': 61}, {'@': 62}, {'@': 63}, {'@': 64}, {'@': 65}, {'@': 66}, {'@': 67}, {'@': 68}, {'@': 69}, {'@': 70}, {'@': 71}, {'@': 72}, {'@': 73}, {'@': 74}, {'@': 75}, {'@': 76}, {'@': 77}, {'@': 78}, {'@': 79}, {'@': 80}, {'@': 81}, {'@': 82}, {'@': 83}, {'@': 84}, {'@': 85}, {'@': 86}, {'@': 87}, {'@': 88}, {'@': 89}, {'@': 90}, {'@': 91}, {'@': 92}, {'@': 93}, {'@': 94}, {'@': 95}, {'@': 96}, {'@': 97}, {'@': 98}, {'@': 99}, {'@': 100}, {'@': 101}, {'@': 102}, {'@': 103}, {'@': 104}, {'@': 105}, {'@': 106}, {'@': 107}, {'@': 108}, {'@': 109}, {'@': 110}, {'@': 111}, {'@': 112}, {'@': 113}, {'@': 114}, {'@': 115}, {'@': 116}, {'@': 117}, {'@': 118}, {'@': 119}, {'@': 120}, {'@': 121}, {'@': 122}, {'@': 123}, {'@': 124}, {'@': 125}, {'@': 126}, {'@': 127}, {'@': 128}, {'@': 129}, {'@': 130}], 'options': {'debug': False, 'keep_all_tokens': False, 'tree_class': None, 'cache': False, 'postlex': None, 'parser': 'lalr', 'lexer': 'contextual', 'transformer': None, 'start': ['start'], 'priority': 'normal', 'ambiguity': 'auto', 'regex': False, 'propagate_positions': False, 'lexer_callbacks': {}, 'maybe_placeholders': False, 'edit_terminals': None, 'g_regex_flags': 0, 'use_bytes': False, 'import_paths': [], 'source_path': None}, '__type__': 'Lark'} +) +MEMO = ( +{0: {'name': 'INT', 'pattern': {'value': '(?:[0-9])+', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 1, '__type__': 'TerminalDef'}, 1: {'name': 'SIGNED_NUMBER', 'pattern': {'value': '(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 1, '__type__': 'TerminalDef'}, 2: {'name': 'ESCAPED_STRING', 'pattern': {'value': '".*?(? varlen_string + | "bytes" -> varlen_bytestring + +fixedlen_string: ("string" "[" INT "]") -> fixedlen_string + | ("bytes" "[" INT "]") -> fixedlen_bytestring + +char: "char" + +byte: "byte" + +option1: "?" type + +option2: "option" "[" type ("," "parameters" "=" json_object)? "]" + +tuple: "(" types? ")" +types: type ("," type)* + +tuple_parameters: "tuple" "[" "[" types? "]" ("," "parameters" "=" json_object)? "]" + +record: "{" pairs? "}" +pairs: pair ("," pair)* +pair: key ":" type +key: ESCAPED_STRING -> string + | CNAME -> identifier + +record_parameters: "struct" "[" "{" pairs? "}" ("," "parameters" "=" json_object)? "]" + +named0: CNAME "[" ("parameters" "=" json_object)? "]" +named: CNAME "[" (named_types | named_pairs) "]" +named_types: type ("," (named_types | "parameters" "=" json_object))? +named_pairs: named_pair ("," (named_pairs | "parameters" "=" json_object))? +named_pair: named_key ":" type +named_key: ESCAPED_STRING -> string + | CNAME -> identifier + +union: "union" "[" named_types? "]" + +list_parameters: "[" type "," "parameters" "=" json_object "]" + +categorical: "categorical" "[" "type" "=" type "]" + +json: ESCAPED_STRING -> string + | SIGNED_NUMBER -> number + | "true" -> true + | "false" -> false + | "null" -> null + | json_array + | json_object + +json_array: "[" [json ("," json)*] "]" +json_object: "{" [json_pair ("," json_pair)*] "}" +json_pair: ESCAPED_STRING ":" json + +%import common.INT +%import common.CNAME +%import common.ESCAPED_STRING +%import common.SIGNED_NUMBER +%import common.WS + +%ignore WS From 48d09abe531d922de5be748c03ddcfe88e3a3ce2 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 23 Jun 2022 16:41:18 -0500 Subject: [PATCH 11/12] New v2 type-parser is done. --- src/awkward/_typeparser/parser.py | 283 ---------------- src/awkward/_v2/types/__init__.py | 4 +- src/awkward/_v2/types/type.py | 259 +++++++++++++++ tests/v2/test_0773-typeparser.py | 528 +++++++++++++++++++++++++----- 4 files changed, 697 insertions(+), 377 deletions(-) diff --git a/src/awkward/_typeparser/parser.py b/src/awkward/_typeparser/parser.py index 4e60d83db5..924a7d024f 100644 --- a/src/awkward/_typeparser/parser.py +++ b/src/awkward/_typeparser/parser.py @@ -316,286 +316,3 @@ def toast_v1(ptnode, highlevel, categorical): def from_datashape_v1(typestr, highlevel=False): parseobj = Lark_StandAlone(transformer=TreeToJson()) return toast_v1(parseobj.parse(typestr), highlevel, False) - - -def toast(ptnode, highlevel, categorical): - if ptnode.__class__.__name__ == "Token": - return ptnode.value - - elif ptnode.data == "start": - return toast(ptnode.children[0], highlevel, categorical) - - elif ptnode.data == "input": - assert len(ptnode.children) == 1 - return toast(ptnode.children[0], highlevel, categorical) - - elif ptnode.data == "predefined_typestr": - if ptnode.children[0] == "string": - parms = {"__array__": "string"} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.ListType( - ak._v2.types.NumpyType( - "uint8", parameters={"__array__": "char"}, typestr="char" - ), - parameters=parms, - typestr="string", - ) - elif ptnode.children[0] == "char": - parms = {"__array__": "char"} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.NumpyType("uint8", parameters=parms, typestr="char") - elif ptnode.children[0] == "byte": - parms = {"__array__": "byte"} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.NumpyType("uint8", parameters=parms, typestr="byte") - elif ptnode.children[0] == "bytes": - parms = {"__array__": "bytestring"} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.ListType( - ak._v2.types.NumpyType( - "uint8", parameters={"__array__": "byte"}, typestr="byte" - ), - parameters=parms, - typestr="bytes", - ) - else: - raise AssertionError(f"unhandled typestring {ptnode.children[0]}") - - elif ptnode.data == "primitive": - if len(ptnode.children) == 1: - parms = {} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.NumpyType( - toast(ptnode.children[0], highlevel, False), parameters=parms - ) - elif len(ptnode.children) == 2: - parms = toast(ptnode.children[1], highlevel, False) - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.NumpyType( - toast(ptnode.children[0], highlevel, categorical), - parms, - ) - else: - raise AssertionError("unhandled NumpyType node") - - elif ptnode.data == "categories": - assert highlevel is True - return toast(ptnode.children[0], highlevel, True) - - elif ptnode.data == "unknown": - if len(ptnode.children) == 0: - parms = {} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.UnknownType(parameters=parms) - elif len(ptnode.children) == 1: - parms = toast(ptnode.children[0], highlevel, False) - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.UnknownType(parameters=parms) - else: - raise AssertionError("unhandled UnknownType node") - - elif ptnode.data == "listtype": - return toast(ptnode.children[0], highlevel, categorical) - - elif ptnode.data == "list_single": - parms = {} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.ListType( - toast(ptnode.children[0], highlevel, False), parameters=parms - ) - - elif ptnode.data == "list_parm": - parms = toast(ptnode.children[1], highlevel, False) - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.ListType( - toast(ptnode.children[0], highlevel, categorical), parms - ) - - elif ptnode.data == "uniontype": - return toast(ptnode.children[0], highlevel, categorical) - - elif ptnode.data == "union_single": - parms = {} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - content_list = [] - for node in ptnode.children: - content_list.append(toast(node, highlevel, False)) - return ak._v2.types.UnionType(content_list, parameters=parms) - - elif ptnode.data == "union_parm": - parms = toast(ptnode.children[-1], highlevel, False) - if categorical: - parms.update({"__categorical__": True}) - categorical = False - content_list = [] - for node in ptnode.children[:-1]: - content_list.append(toast(node, highlevel, False)) - return ak._v2.types.UnionType(content_list, parms) - - elif ptnode.data == "optiontype": - return toast(ptnode.children[0], highlevel, categorical) - - elif ptnode.data == "option_single": - parms = {} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.OptionType( - toast(ptnode.children[0], highlevel, False), parameters=parms - ) - - elif ptnode.data == "option_parm": - parms = toast(ptnode.children[1], highlevel, False) - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.OptionType( - toast(ptnode.children[0], highlevel, False), - parameters=parms, - ) - - elif ptnode.data == "option_highlevel": - assert highlevel - parms = {} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.OptionType( - toast(ptnode.children[0], highlevel, False), parameters=parms - ) - - elif ptnode.data == "record": - return toast(ptnode.children[0], highlevel, categorical) - - elif ptnode.data == "record_tuple": - parms = {} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - content_list = [] - for node in ptnode.children: - content_list.append(toast(node, highlevel, categorical)) - return ak._v2.types.RecordType(content_list, None, parameters=parms) - - elif ptnode.data == "record_dict": - parms = {} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - content_types = [] - content_keys = [] - for i in range(0, len(ptnode.children), 2): - content_keys.append(ptnode.children[i]) - content_types.append(toast(ptnode.children[i + 1], highlevel, categorical)) - return ak._v2.types.RecordType(content_types, content_keys, parameters=parms) - - elif ptnode.data == "record_tuple_param": - parms = toast(ptnode.children[-1], highlevel, False) - if categorical: - parms.update({"__categorical__": True}) - categorical = False - content_list = [] - for node in ptnode.children[:-1]: - content_list.append(toast(node, highlevel, False)) - return ak._v2.types.RecordType(content_list, None, parameters=parms) - - elif ptnode.data == "record_struct": - parms = toast(ptnode.children[-1], highlevel, False) - if categorical: - parms.update({"__categorical__": True}) - categorical = False - content_list = [] - content_keys = [] - for node in ptnode.children[:-1]: - if isinstance(node, str): - content_keys.append(node) - else: - content_list.append(toast(node, highlevel, False)) - return ak._v2.types.RecordType( - content_list, - content_keys, - parameters=parms, - ) - - elif ptnode.data == "record_highlevel": - assert highlevel - parms = {"__record__": ptnode.children[0]} - if categorical: - parms.update({"__categorical__": True}) - categorical = False - content_list = [] - content_keys = [] - for node in ptnode.children[1:]: - if isinstance(node, str): - content_keys.append(node) - else: - content_list.append(toast(node, highlevel, False)) - return ak._v2.types.RecordType( - content_list, - content_keys, - parameters=parms, - ) - - elif ptnode.data == "regular": - assert (len(ptnode.children)) == 1 - return toast(ptnode.children[0], highlevel, categorical) - - elif ptnode.data == "regular_inparm": - assert len(ptnode.children) == 2 - if highlevel: - return ak._v2.types.ArrayType( - toast(ptnode.children[1], highlevel, categorical), ptnode.children[0] - ) - return ak._v2.types.RegularType( - toast(ptnode.children[1], highlevel, categorical), ptnode.children[0] - ) - - elif ptnode.data == "regular_outparm": - assert len(ptnode.children) == 3 - parms = toast(ptnode.children[2], highlevel, False) - if categorical: - parms.update({"__categorical__": True}) - categorical = False - return ak._v2.types.RegularType( - toast(ptnode.children[1], highlevel, False), - ptnode.children[0], - parms, - ) - - elif ptnode.data == "def_option": - assert len(ptnode.children) == 1 - return ptnode.children[0] - - elif ptnode.data == "options": - assert len(ptnode.children) == 1 - return toast(ptnode.children[0], highlevel, categorical) - - else: - raise AssertionError("unhandled node") - - -def from_datashape(typestr, highlevel=False): - parseobj = Lark_StandAlone(transformer=TreeToJson()) - return toast(parseobj.parse(typestr), highlevel, False) diff --git a/src/awkward/_v2/types/__init__.py b/src/awkward/_v2/types/__init__.py index c137398824..3c2836e603 100644 --- a/src/awkward/_v2/types/__init__.py +++ b/src/awkward/_v2/types/__init__.py @@ -1,8 +1,6 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE -from awkward._typeparser.parser import from_datashape # noqa: F401 - -from awkward._v2.types.type import Type # noqa: F401 +from awkward._v2.types.type import Type, from_datashape # noqa: F401 from awkward._v2.types.unknowntype import UnknownType # noqa: F401 from awkward._v2.types.numpytype import NumpyType # noqa: F401 from awkward._v2.types.regulartype import RegularType # noqa: F401 diff --git a/src/awkward/_v2/types/type.py b/src/awkward/_v2/types/type.py index da9a00c7c1..1f636c0729 100644 --- a/src/awkward/_v2/types/type.py +++ b/src/awkward/_v2/types/type.py @@ -4,6 +4,8 @@ import sys import awkward as ak +from awkward._v2.types._awkward_datashape_parser import Lark_StandAlone, Transformer + np = ak.nplike.NumpyMetadata.instance() @@ -67,3 +69,260 @@ def _repr_args(self): out.append("typestr=" + repr(self._typestr)) return out + + +class _DataShapeTransformer(Transformer): + @staticmethod + def _parameters(args, i): + if i < len(args): + return args[i] + else: + return None + + def start(self, args): + return args[0] + + def type(self, args): + return args[0] + + def numpytype(self, args): + return ak._v2.types.NumpyType(args[0], parameters=self._parameters(args, 1)) + + def numpytype_name(self, args): + return str(args[0]) + + def unknowntype(self, args): + return ak._v2.types.UnknownType(parameters=self._parameters(args, 0)) + + def regulartype(self, args): + return ak._v2.types.RegularType(args[1], int(args[0])) + + def listtype(self, args): + return ak._v2.types.ListType(args[0]) + + def varlen_string(self, args): + return ak._v2.types.ListType( + ak._v2.types.NumpyType("uint8", {"__array__": "char"}), + {"__array__": "string"}, + ) + + def varlen_bytestring(self, args): + return ak._v2.types.ListType( + ak._v2.types.NumpyType("uint8", {"__array__": "byte"}), + {"__array__": "bytestring"}, + ) + + def fixedlen_string(self, args): + return ak._v2.types.RegularType( + ak._v2.types.NumpyType("uint8", {"__array__": "char"}), + int(args[0]), + {"__array__": "string"}, + ) + + def fixedlen_bytestring(self, args): + return ak._v2.types.RegularType( + ak._v2.types.NumpyType("uint8", {"__array__": "byte"}), + int(args[0]), + {"__array__": "bytestring"}, + ) + + def char(self, args): + return ak._v2.types.NumpyType("uint8", {"__array__": "char"}) + + def byte(self, args): + return ak._v2.types.NumpyType("uint8", {"__array__": "byte"}) + + def option1(self, args): + return ak._v2.types.OptionType(args[0]) + + def option2(self, args): + return ak._v2.types.OptionType(args[0], parameters=self._parameters(args, 1)) + + def tuple(self, args): + if len(args) == 0: + types = [] + else: + types = args[0] + return ak._v2.types.RecordType(types, None) + + def types(self, args): + return args + + def tuple_parameters(self, args): + if len(args) != 0 and isinstance(args[0], list): + types = args[0] + else: + types = [] + + if len(args) != 0 and isinstance(args[-1], dict): + parameters = args[-1] + else: + parameters = {} + + return ak._v2.types.RecordType(types, None, parameters) + + def record(self, args): + if len(args) == 0: + fields = [] + types = [] + else: + fields = [x[0] for x in args[0]] + types = [x[1] for x in args[0]] + return ak._v2.types.RecordType(types, fields) + + def pairs(self, args): + return args + + def pair(self, args): + return tuple(args) + + def record_parameters(self, args): + if len(args) != 0 and isinstance(args[0], list): + fields = [x[0] for x in args[0]] + types = [x[1] for x in args[0]] + else: + fields = [] + types = [] + + if len(args) != 0 and isinstance(args[-1], dict): + parameters = args[-1] + else: + parameters = {} + + return ak._v2.types.RecordType(types, fields, parameters) + + def named0(self, args): + parameters = {"__record__": str(args[0])} + if 1 < len(args): + parameters.update(args[1]) + return ak._v2.types.RecordType([], None, parameters) + + def named(self, args): + parameters = {"__record__": str(args[0])} + + if isinstance(args[1][-1], dict): + arguments = args[1][:-1] + parameters.update(args[1][-1]) + else: + arguments = args[1] + + if any(isinstance(x, tuple) for x in arguments): + fields = [x[0] for x in arguments] + contents = [x[1] for x in arguments] + else: + fields = None + contents = arguments + + return ak._v2.types.RecordType(contents, fields, parameters) + + def named_types(self, args): + if len(args) == 2 and isinstance(args[1], list): + return args[:1] + args[1] + else: + return args + + def named_pairs(self, args): + if len(args) == 2 and isinstance(args[1], list): + return args[:1] + args[1] + else: + return args + + def named_pair(self, args): + return tuple(args) + + def identifier(self, args): + return str(args[0]) + + def union(self, args): + if len(args) == 0: + arguments = [] + parameters = None + elif isinstance(args[0][-1], dict): + arguments = args[0][:-1] + parameters = args[0][-1] + else: + arguments = args[0] + parameters = None + + return ak._v2.types.UnionType(arguments, parameters) + + def list_parameters(self, args): + # modify recently created type object + args[0].parameters.update(args[1]) + return args[0] + + def categorical(self, args): + # modify recently created type object + args[0].parameters["__categorical__"] = True + return args[0] + + def json(self, args): + return args[0] + + def json_object(self, args): + return dict(args) + + def json_pair(self, args): + return (json.loads(args[0]), args[1]) + + def json_array(self, args): + return list(args) + + def string(self, args): + return json.loads(args[0]) + + def number(self, args): + try: + return int(args[0]) + except ValueError: + return float(args[0]) + + def true(self, args): + return True + + def false(self, args): + return False + + def null(self, args): + return None + + +def from_datashape(datashape, highlevel=True): + """ + Parses `datashape` (str) and returns a #ak._v2.types.Type object, the inverse of + calling `str` on a #ak._v2.types.Type. + + If `highlevel=True`, and the type string starts with a number (e.g. '1000 * ...'), + the return type is #ak._v2.types.ArrayType, representing an #ak._v2.highlevel.Array. + + If `highlevel=True` and the type string starts with a record indicator (e.g. `{`), + the return type is #ak._v2.types.RecordType, representing an #ak._v2.highlevel.Record, + rather than an array of them. + + Other strings (e.g. starting with `var *`, `?`, `option`, etc.) are not compatible + with `highlevel=True`; an exception would be raised. + + If `highlevel=False`, the type is assumed to represent a layout (e.g. a number + indicates a #ak._v2.types.RegularType, rather than a #ak._v2.types.ArrayType). + """ + from awkward._v2.types.regulartype import RegularType + from awkward._v2.types.recordtype import RecordType + from awkward._v2.types.arraytype import ArrayType + + parser = Lark_StandAlone(transformer=_DataShapeTransformer()) + out = parser.parse(datashape) + + if highlevel: + if isinstance(out, RegularType): + return ArrayType(out.content, out.size) + elif isinstance(out, RecordType): + return out + else: + raise ak._v2._util.error( + ValueError( + f"type '{type(out).__name__}' is not compatible with highlevel=True" + ) + ) + + else: + return out diff --git a/tests/v2/test_0773-typeparser.py b/tests/v2/test_0773-typeparser.py index 906cc68479..186bf68ef8 100644 --- a/tests/v2/test_0773-typeparser.py +++ b/tests/v2/test_0773-typeparser.py @@ -3,236 +3,235 @@ import pytest # noqa: F401 import awkward as ak # noqa: F401 +from awkward._v2.types.numpytype import NumpyType +from awkward._v2.types.unknowntype import UnknownType +from awkward._v2.types.regulartype import RegularType +from awkward._v2.types.listtype import ListType +from awkward._v2.types.optiontype import OptionType +from awkward._v2.types.recordtype import RecordType +from awkward._v2.types.uniontype import UnionType + def test_primitive_1(): text = "int64" - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.NumpyType) assert (str(parsedtype)) == text def test_primitive_2(): text = 'int64[parameters={"wonky": ["parameter", 3.14]}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.NumpyType) assert (str(parsedtype)) == text def test_unknown_1(): text = "unknown" - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.UnknownType) assert (str(parsedtype)) == text def test_unknown_2(): text = 'unknown[parameters={"wonky": ["parameter", 3.14]}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.UnknownType) assert str(parsedtype) == text def test_record_tuple_1(): text = "(int64)" - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RecordType) assert str(parsedtype) == text def test_record_tuple_2(): text = '(int64[parameters={"wonky": ["bla", 1, 2]}])' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RecordType) assert str(parsedtype) == text def test_record_tuple_3(): text = '(int64, int64[parameters={"wonky": ["bla", 1, 2]}])' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RecordType) assert str(parsedtype) == text def test_record_dict_1(): text = '{"1": int64}' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RecordType) assert str(parsedtype) == text def test_record_dict_2(): - text = '{"bla": int64[parameters={"wonky": ["bla", 1, 2]}]}' - parsedtype = ak._v2.types.from_datashape(text) + text = '{bla: int64[parameters={"wonky": ["bla", 1, 2]}]}' + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RecordType) - assert str(parsedtype) == '{bla: int64[parameters={"wonky": ["bla", 1, 2]}]}' + assert str(parsedtype) == text def test_record_dict_3(): - text = '{"bla": int64[parameters={"wonky": ["bla", 1, 2]}], "foo": int64}' - parsedtype = ak._v2.types.from_datashape(text) + text = '{bla: int64[parameters={"wonky": ["bla", 1, 2]}], foo: int64}' + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RecordType) - assert ( - str(parsedtype) - == '{bla: int64[parameters={"wonky": ["bla", 1, 2]}], foo: int64}' - ) + assert str(parsedtype) == text def test_record_parmtuple_1(): text = 'tuple[[int64[parameters={"xkcd": [11, 12, 13]}]], parameters={"wonky": ["bla", 1, 2]}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RecordType) assert str(parsedtype) == text def test_record_parmtuple_2(): text = 'tuple[[int64, int64], parameters={"wonky": ["bla", 1, 2]}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RecordType) assert str(parsedtype) == text def test_record_struct_1(): - text = 'struct[["1"], [int64[parameters={"xkcd": [11, 12, 13]}]], parameters={"wonky": ["bla", 1, 2]}]' - parsedtype = ak._v2.types.from_datashape(text) + text = 'struct[{"1": int64[parameters={"xkcd": [11, 12, 13]}]}, parameters={"wonky": ["bla", 1, 2]}]' + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RecordType) - assert ( - str(parsedtype) - == 'struct[{"1": int64[parameters={"xkcd": [11, 12, 13]}]}, parameters={"wonky": ["bla", 1, 2]}]' - ) + assert str(parsedtype) == text def test_record_struct_2(): - text = 'struct[["1", "2"], [int64[parameters={"xkcd": [11, 12, 13]}], int64], parameters={"wonky": ["bla", 1, 2]}]' - parsedtype = ak._v2.types.from_datashape(text) + text = 'struct[{"1": int64[parameters={"xkcd": [11, 12, 13]}], "2": int64}, parameters={"wonky": ["bla", 1, 2]}]' + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RecordType) - assert ( - str(parsedtype) - == 'struct[{"1": int64[parameters={"xkcd": [11, 12, 13]}], "2": int64}, parameters={"wonky": ["bla", 1, 2]}]' - ) + assert str(parsedtype) == text def test_option_numpy_1(): text = "?int64" - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.OptionType) assert str(parsedtype) == text def test_option_numpy_2(): text = '?int64[parameters={"wonky": [1, 2, 3]}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.OptionType) assert str(parsedtype) == text def test_option_numpy_1_parm(): text = 'option[int64, parameters={"foo": "bar"}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.OptionType) assert str(parsedtype) == text def test_option_numpy_2_parm(): text = 'option[int64[parameters={"wonky": [1, 2]}], parameters={"foo": "bar"}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.OptionType) assert str(parsedtype) == text def test_option_unknown_1(): text = "?unknown" - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.OptionType) assert str(parsedtype) == text def test_option_unknown_2(): text = '?unknown[parameters={"foo": "bar"}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.OptionType) assert str(parsedtype) == text def test_option_unknown_1_parm(): text = 'option[unknown, parameters={"foo": "bar"}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.OptionType) assert str(parsedtype) == text def test_option_unknown_2_parm(): text = 'option[unknown, parameters={"foo": "bar"}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.OptionType) assert str(parsedtype) == text def test_regular_numpy_1(): text = "5 * int64" - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RegularType) assert str(parsedtype) == text def test_regular_numpy_2(): text = '5 * int64[parameters={"bar": "foo"}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RegularType) assert str(parsedtype) == text def test_regular_numpy_2_parm(): text = '[0 * int64[parameters={"foo": "bar"}], parameters={"bla": "bloop"}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RegularType) assert str(parsedtype) == text def test_regular_unknown_1_parm(): text = '[0 * unknown, parameters={"foo": "bar"}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.RegularType) assert str(parsedtype) == text def test_list_numpy_1(): text = "var * float64" - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.ListType) assert str(parsedtype) == text def test_list_numpy_1_parm(): text = '[var * float64[parameters={"wonky": "boop"}], parameters={"foo": "bar"}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.ListType) assert str(parsedtype) == text def test_union_numpy_empty_1(): text = 'union[float64[parameters={"wonky": "boop"}], unknown]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.UnionType) assert str(parsedtype) == text def test_union_numpy_empty_1_parm(): text = 'union[float64[parameters={"wonky": "boop"}], unknown, parameters={"pratyush": "das"}]' - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.UnionType) assert str(parsedtype) == text def test_arraytype_string(): text = str(ak._v2.Array([["one", "two", "three"], [], ["four", "five"]]).type) - parsedtype = ak._v2.types.from_datashape(text, True) + parsedtype = ak._v2.types.from_datashape(text, highlevel=True) assert isinstance(parsedtype, ak._v2.types.ArrayType) assert str(parsedtype) == text def test_arraytype_bytestring(): text = str(ak._v2.Array([[b"one", b"two", b"three"], [], [b"four", b"five"]]).type) - parsedtype = ak._v2.types.from_datashape(text, True) + parsedtype = ak._v2.types.from_datashape(text, highlevel=True) assert isinstance(parsedtype, ak._v2.types.ArrayType) assert str(parsedtype) == text @@ -243,7 +242,7 @@ def test_arraytype_categorical_1(): ak._v2.Array(["one", "one", "two", "three", "one", "three"]) ).type ) - parsedtype = ak._v2.types.from_datashape(text, True) + parsedtype = ak._v2.types.from_datashape(text, highlevel=True) assert isinstance(parsedtype, ak._v2.types.ArrayType) assert str(parsedtype) == text @@ -254,179 +253,526 @@ def test_arraytype_categorical_2(): ak._v2.Array([1.1, 1.1, 2.2, 3.3, 1.1, 3.3]) ).type ) - parsedtype = ak._v2.types.from_datashape(text, True) + parsedtype = ak._v2.types.from_datashape(text, highlevel=True) assert isinstance(parsedtype, ak._v2.types.ArrayType) assert str(parsedtype) == text def test_arraytype_record_1(): - text = '3 * Thingy["x": int64, "y": float64]' - parsedtype = ak._v2.types.from_datashape(text, True) + text = str( + ak._v2.Array( + [{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 3, "y": 3.3}], + with_name="Thingy", + ).type + ) + parsedtype = ak._v2.types.from_datashape(text, highlevel=True) assert isinstance(parsedtype, ak._v2.types.ArrayType) - assert str(parsedtype) == "3 * Thingy[x: int64, y: float64]" + assert str(parsedtype) == text def test_arraytype_record_2(): - text = '3 * var * Thingy["x": int64, "y": float64]' - parsedtype = ak._v2.types.from_datashape(text, True) + text = str( + ak._v2.Array( + [[{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}], [], [{"x": 3, "y": 3.3}]], + with_name="Thingy", + ).type + ) + parsedtype = ak._v2.types.from_datashape(text, highlevel=True) assert isinstance(parsedtype, ak._v2.types.ArrayType) - assert str(parsedtype) == "3 * var * Thingy[x: int64, y: float64]" + assert str(parsedtype) == text def test_arraytype_1(): text = str(ak._v2.Array([[1, 2, 3], None, [4, 5]]).type) - parsedtype = ak._v2.types.from_datashape(text, True) + parsedtype = ak._v2.types.from_datashape(text, highlevel=True) assert isinstance(parsedtype, ak._v2.types.ArrayType) assert str(parsedtype) == text def test_arraytype_2(): text = str( - ak.with_parameter(ak._v2.Array([[1, 2, 3], [], [4, 5]]), "wonky", "string").type + ak._v2.with_parameter( + ak._v2.Array([[1, 2, 3], [], [4, 5]]), "wonky", "string" + ).type ) - parsedtype = ak._v2.types.from_datashape(text, True) + parsedtype = ak._v2.types.from_datashape(text, highlevel=True) assert isinstance(parsedtype, ak._v2.types.ArrayType) assert str(parsedtype) == text def test_arraytype_3(): text = str( - ak.with_parameter( + ak._v2.with_parameter( ak._v2.Array([[1, 2, 3], [], [4, 5]]), "wonky", {"other": "JSON"} ).type ) - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_4(): text = str( - ak.with_parameter( + ak._v2.with_parameter( ak._v2.Array([[1, 2, 3], None, [4, 5]]), "wonky", "string" ).type ) - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_5(): text = str( - ak.with_parameter(ak._v2.Array([1, 2, 3, None, 4, 5]), "wonky", "string").type + ak._v2.with_parameter( + ak._v2.Array([1, 2, 3, None, 4, 5]), "wonky", "string" + ).type ) - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_6(): - text = str(ak.with_parameter(ak._v2.Array([1, 2, 3, 4, 5]), "wonky", "string").type) - parsedtype = ak._v2.types.from_datashape(text) + text = str( + ak._v2.with_parameter(ak._v2.Array([1, 2, 3, 4, 5]), "wonky", "string").type + ) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_7(): text = str(ak._v2.Array([1, 2, 3, None, 4, 5]).type) - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_8(): text = str( - ak.with_parameter( + ak._v2.with_parameter( ak._v2.Array([{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 3, "y": 3.3}]), "wonky", "string", ).type ) - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_9(): text = str(ak._v2.Array([(1, 1.1), (2, 2.2), (3, 3.3)]).type) - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_10(): text = str( - ak.with_parameter( + ak._v2.with_parameter( ak._v2.Array([(1, 1.1), (2, 2.2), (3, 3.3)]), "wonky", "string" ).type ) - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_11(): text = str(ak._v2.Array([[(1, 1.1), (2, 2.2)], [], [(3, 3.3)]]).type) - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_12(): text = str(ak._v2.to_regular(ak._v2.Array([[1, 2], [3, 4], [5, 6]])).type) - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_13(): text = str( - ak.with_parameter( + ak._v2.with_parameter( ak._v2.to_regular(ak._v2.Array([[1, 2], [3, 4], [5, 6]])), "wonky", "string" ).type ) - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_14(): text = str( - ak.with_parameter( + ak._v2.with_parameter( ak._v2.Array([1, 2, 3, [1], [1, 2], [1, 2, 3]]), "wonky", "string" ).type ) - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_15(): text = str( - ak.with_parameter( + ak._v2.with_parameter( ak._v2.Array([1, 2, 3, None, [1], [1, 2], [1, 2, 3]]), "wonky", "string" ).type ) - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_16(): - text = "7 * ?union[int64, var * int64]" - parsedtype = ak._v2.types.from_datashape(text) + text = str(ak._v2.Array([1, 2, 3, None, [1], [1, 2], [1, 2, 3]]).type) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_arraytype_17(): - text = "7 * ?union[int64, var * unknown]" - parsedtype = ak._v2.types.from_datashape(text) + text = str(ak._v2.Array([1, 2, 3, None, [], [], []]).type) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert str(parsedtype) == text def test_string(): text = "string" - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.ListType) assert str(parsedtype) == text def test_hardcoded(): text = "var * string" - parsedtype = ak._v2.types.from_datashape(text) + parsedtype = ak._v2.types.from_datashape(text, highlevel=False) assert isinstance(parsedtype, ak._v2.types.ListType) assert str(parsedtype) == text def test_record_highlevel(): - text = 'Thingy["x": int64, "y": float64]' - parsedtype = ak._v2.types.from_datashape(text, True) + text = "Thingy[x: int64, y: float64]" + parsedtype = ak._v2.types.from_datashape(text, highlevel=True) assert isinstance(parsedtype, ak._v2.types.RecordType) - assert str(parsedtype) == "Thingy[x: int64, y: float64]" + assert str(parsedtype) == text + + +def test_numpytype_int32(): + t = NumpyType("int32") + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_numpytype_datetime64(): + t = NumpyType("datetime64") + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_numpytype_datetime64_10s(): + t = NumpyType("datetime64[10s]") + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_numpytype_int32_parameter(): + t = NumpyType("int32", {"__array__": "Something"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_numpytype_datetime64_parameter(): + t = NumpyType("datetime64", {"__array__": "Something"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_numpytype_datetime64_10s_parameter(): + t = NumpyType("datetime64[10s]", {"__array__": "Something"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_numpytype_int32_categorical(): + t = NumpyType("int32", {"__categorical__": True}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_numpytype_int32_parameters_categorical(): + t = NumpyType("int32", {"__array__": "Something", "__categorical__": True}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_unknowntype(): + t = UnknownType() + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_unknowntype_parameter(): + t = UnknownType({"__array__": "Something"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_unknowntype_categorical(): + t = UnknownType({"__categorical__": True}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_unknowntype_categorical_parameter(): + t = UnknownType({"__array__": "Something", "__categorical__": True}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_regulartype_numpytype(): + t = RegularType(NumpyType("int32"), 5) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_regulartype_numpytype_parameter(): + t = RegularType(NumpyType("int32"), 5, {"__array__": "Something"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_regulartype_numpytype_categorical(): + t = RegularType(NumpyType("int32"), 5, {"__categorical__": True}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_regulartype_numpytype_categorical_parameter(): + t = RegularType( + NumpyType("int32"), 5, {"__categorical__": True, "__array__": "Something"} + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_listtype_numpytype(): + t = ListType(NumpyType("int32")) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_listtype_numpytype_parameter(): + t = ListType(NumpyType("int32"), {"__array__": "Something"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_listtype_numpytype_categorical(): + t = ListType(NumpyType("int32"), {"__categorical__": True}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_listtype_numpytype_categorical_parameter(): + t = ListType( + NumpyType("int32"), {"__categorical__": True, "__array__": "Something"} + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_varlen_string(): + t = ListType(NumpyType("uint8", {"__array__": "char"}), {"__array__": "string"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_varlen_bytestring(): + t = ListType(NumpyType("uint8", {"__array__": "char"}), {"__array__": "bytestring"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_fixedlen_string(): + t = RegularType( + NumpyType("uint8", {"__array__": "char"}), 5, {"__array__": "string"} + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_fixedlen_bytestring(): + t = RegularType( + NumpyType("uint8", {"__array__": "byte"}), 5, {"__array__": "bytestring"} + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_char(): + t = NumpyType("uint8", {"__array__": "char"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_byte(): + t = NumpyType("uint8", {"__array__": "byte"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_optiontype_numpytype_int32(): + t = OptionType(NumpyType("int32")) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_optiontype_numpytype_int32_parameters(): + t = OptionType(NumpyType("int32"), {"__array__": "Something"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_optiontype_numpytype_int32_categorical(): + t = OptionType(NumpyType("int32"), {"__categorical__": True}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_optiontype_numpytype_int32_categorical_parameters(): + t = OptionType( + NumpyType("int32"), {"__array__": "Something", "__categorical__": True} + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_option_varlen_string(): + t = OptionType( + ListType(NumpyType("uint8", {"__array__": "char"}), {"__array__": "string"}) + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_option_varlen_string_parameters(): + t = OptionType( + ListType(NumpyType("uint8", {"__array__": "char"}), {"__array__": "string"}), + {"__array__": "Something"}, + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_record_empty(): + t = RecordType([], None) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_record_fields_empty(): + t = RecordType([], []) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_record_int32(): + t = RecordType([NumpyType("int32")], None) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_record_int32_float64(): + t = RecordType([NumpyType("int32"), NumpyType("float64")], None) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_record_fields_int32(): + t = RecordType([NumpyType("int32")], ["one"]) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_record_fields_int32_float64(): + t = RecordType([NumpyType("int32"), NumpyType("float64")], ["one", "t w o"]) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_record_empty_parameters(): + t = RecordType([], None, {"p": [123]}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_record_fields_empty_parameters(): + t = RecordType([], [], {"p": [123]}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_record_int32_parameters(): + t = RecordType([NumpyType("int32")], None, {"p": [123]}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_record_int32_float64_parameters(): + t = RecordType([NumpyType("int32"), NumpyType("float64")], None, {"p": [123]}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_record_fields_int32_parameters(): + t = RecordType([NumpyType("int32")], ["one"], {"p": [123]}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_record_fields_int32_float64_parameters(): + t = RecordType( + [NumpyType("int32"), NumpyType("float64")], ["one", "t w o"], {"p": [123]} + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_named_record_empty(): + t = RecordType([], None, {"__record__": "Name"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_named_record_int32(): + t = RecordType([NumpyType("int32")], None, {"__record__": "Name"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_named_record_int32_float64(): + t = RecordType( + [NumpyType("int32"), NumpyType("float64")], None, {"__record__": "Name"} + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_named_record_fields_int32(): + t = RecordType([NumpyType("int32")], ["one"], {"__record__": "Name"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_named_record_fields_int32_float64(): + t = RecordType( + [NumpyType("int32"), NumpyType("float64")], + ["one", "t w o"], + {"__record__": "Name"}, + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_named_record_empty_parameters(): + t = RecordType([], None, {"__record__": "Name", "p": [123]}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_named_record_int32_parameters(): + t = RecordType([NumpyType("int32")], None, {"__record__": "Name", "p": [123]}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_named_record_int32_float64_parameters(): + t = RecordType( + [NumpyType("int32"), NumpyType("float64")], + None, + {"__record__": "Name", "p": [123]}, + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_named_record_fields_int32_parameters(): + t = RecordType([NumpyType("int32")], ["one"], {"__record__": "Name", "p": [123]}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_named_record_fields_int32_float64_parameters(): + t = RecordType( + [NumpyType("int32"), NumpyType("float64")], + ["one", "t w o"], + {"__record__": "Name", "p": [123]}, + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_union_empty(): + t = UnionType([]) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_union_float64(): + t = UnionType([NumpyType("float64")]) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_union_float64_datetime64(): + t = UnionType( + [NumpyType("float64"), NumpyType("datetime64")], + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_union_float64_parameters(): + t = UnionType([NumpyType("float64")], {"__array__": "Something"}) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) + + +def test_union_float64_datetime64_parameters(): + t = UnionType( + [NumpyType("float64"), NumpyType("datetime64")], + {"__array__": "Something"}, + ) + assert str(ak._v2.types.from_datashape(str(t), highlevel=False)) == str(t) From 642e6b90ff2a504e66c828ad50c675ebc3d74957 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 23 Jun 2022 17:03:28 -0500 Subject: [PATCH 12/12] Ignore pylint errors for the auto-generated code, too. --- src/awkward/_v2/types/_awkward_datashape_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/awkward/_v2/types/_awkward_datashape_parser.py b/src/awkward/_v2/types/_awkward_datashape_parser.py index affe03272e..657672cf62 100644 --- a/src/awkward/_v2/types/_awkward_datashape_parser.py +++ b/src/awkward/_v2/types/_awkward_datashape_parser.py @@ -1,5 +1,6 @@ # flake8: noqa # fmt: off +# pylint: skip-file # The file was automatically generated by Lark v0.12.0 __version__ = "0.12.0"