-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.py
273 lines (227 loc) · 10 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
import os
from lark import Lark, Transformer, Tree, Token
dir_path = os.path.dirname(os.path.realpath(__file__))
GRAMMAR_FILE = dir_path + "/grammar.lark"
# Load grammar from file
with open(GRAMMAR_FILE, 'r') as f:
grammar = f.read()
# Create parser
parser = Lark(grammar, start='start', parser='lalr')
def extract_chars(token):
"""Recursively extract characters from nested token."""
if isinstance(token, str):
return token
elif isinstance(token, list):
return ''.join(extract_chars(t) for t in token)
elif isinstance(token, Tree):
return extract_chars(token.children)
elif isinstance(token, Token):
return token.value
else:
return ''
class ASTTransformer(Transformer):
"""Build AST nodes for the parsed language"""
"""Naming guideliness for keys:
- Constants: UPPERCASE
- Collated strings: Capitalised
- Subtokens: lowercase
Ensure sync with transpiler.py
"""
def passthrough(self, items): return items
def firstitem(self, items): return items[0]
# Root level
def start(self, items):
return {"TOKEN": "Program", "body": items}
# fundamentals
def imports(self, items):
return {"TOKEN": "imports", "body": items}
def unit(self, items):
return items[0]
# blocks of code
def block(self, items):
return {"TOKEN": "block", "body": items}
control_block = passthrough
def if_block(self, items):
return {"TOKEN": "if_block", "test": items[1], "iftrue": items[2], "iffalse": items[3] or None}
def else_block(self, items):
return {"TOKEN": "else_block", "body": items[1]}
def while_block(self, items):
return {"TOKEN": "while_block", "test": items[1], "body": items[2]}
def for_block(self, items):
return {"TOKEN": "for_block", "identifier": items[1], "range": items[2], "body": items[3]}
def switch_block(self, items):
return {"TOKEN": "switch_block", "expression": items[1], "body": items[2]}
switch_body = passthrough
def switch_case(self, items):
return {"TOKEN": "switch_case", "cases": items[1], "body": items[2]}
def switch_default(self, items):
return {"TOKEN": "switch_default", "body": items[1]}
def function_decl(self, items):
return {"TOKEN": "function_decl", "identifier": items[1], "parameters": items[2], "body": items[3]}
func_block = passthrough
def class_decl(self, items):
return {"TOKEN": "class_decl", "identifier": items[1], "parameters": items[2], "body": items[3]}
method = function_decl
# line of code
def module_statement(self, items):
return {"TOKEN": "module_statement", "path": items[1]}
def import_statement(self, items):
return {"TOKEN": "import_statement", "path": items[1]}
def statement(self, items):
return {"TOKEN": "statement", "body": items[0]}
statement_content = firstitem
def declaration(self, items):
return {"TOKEN": "declaration", "varword": items[0], "identifier": items[1], "body": items[2] or None}
declaration_contents = passthrough
def typed_decl(self, items):
return {"TOKEN": "declaration_body", "type": items[0], "value": None}
def valued_decl(self, items):
return {"TOKEN": "declaration_body", "type": None, "value": items[0]}
def typed_valued_decl(self, items):
return {"TOKEN": "declaration_body", "type": items[0], "value": items[1]}
def definition(self, items):
return {"TOKEN": "definition", "identifier": items[0], "value": items[1]}
def reassignment(self, items):
return {"TOKEN": "reassignment", "identifier": items[0], "operator": items[1], "value": items[2]}
def unary_reassignment(self, items):
return {"TOKEN": "unary_reassignment", "identifier": items[0], "operator": items[1]}
def throw_statement(self, items):
return {"TOKEN": "throw_statement", "body": items[1]}
def return_statement(self, items):
return {"TOKEN": "return_statement", "value": items[1]}
# expressions
expression_list = passthrough
def expression(self, items):
return items[0]
definition_expression = firstitem
def function_invocation(self, items):
return {"TOKEN": "function_invocation", "function": items[0], "args": items[1], "handler": items[2] or None}
handler = firstitem
def catcher(self, items):
return {"TOKEN": "catcher", "identifier": items[1], "body": items[2]}
def array_getter(self, items):
return {"TOKEN": "array_getter", "identifier": items[0], "expression": items[1] }
def map_getter(self, items):
if items[2] == None:
return {"TOKEN": "map_getter", "identifier": items[0], "Key": items[1] }
else:
return {"TOKEN": "method_call", "identifier": items[0], "Key": items[1], "arguments": items[2] }
def typed_expression(self, items):
return {"TOKEN": "typed_expression", "type": items[0], "value": items[1]}
def unary_expression(self, items):
return {"TOKEN": "unary_expression", "Operator": items[0], "rhs": items[1]}
def math_expression(self, items):
return {"TOKEN": "math_expression", "lhs": items[0], "Operator": items[1], "rhs": items[2]}
def bitwise_expression(self, items):
return {"TOKEN": "bitwise_expression", "lhs": items[0], "Operator": items[1], "rhs": items[2]}
def logical_expression(self, items):
return {"TOKEN": "logical_expression", "lhs": items[0], "Operator": items[1], "rhs": items[2]}
def comparison_expression(self, items):
return {"TOKEN": "comparison_expression", "lhs": items[0], "Operator": items[1], "rhs": items[2]}
func_expression = passthrough
def function_expression(self, items):
return {"TOKEN": "function_expression", "parameters": items[1], "body": items[2]}
def lambda_expression(self, items):
return {"TOKEN": "lambda_expression", "parameters": items[1], "body": items[2]}
def parenth_expression(self, items):
return items[0]
# keywords
def var_keyword(self, items):
return extract_chars(items)
# basic elements
params_list = passthrough
def function_param(self, items):
return {"TOKEN": "function_param", "Identifier": items[0], "Type": items[1]}
def args_list(self, items):
return items
def map_key(self, items):
return items[0]
def variable_identifier(self, items):
return {"TOKEN": "identifier", "Name": extract_chars(items)}
def type_list(self, items):
return items
# atomics
def identifier(self, items):
return extract_chars(items)
def number(self, items):
return {"TOKEN": "number", "value": items[0]}
def based_number(self, items):
[value, base] = items[0].split('_')
return {"TOKEN": "based_number", "Base": base, "Value": value}
def numeral(self, items):
return extract_chars(items)
digit = firstitem
def string(self, items):
return extract_chars(items)
def boolean(self, items):
return items[0] == "true"
def null(self, items):
return None
def array(self, items):
indices = [item for i, item in enumerate(items) if i % 2 == 0]
values = [item for i, item in enumerate(items) if i % 2 == 1]
return {"TOKEN": "array", "indices": indices, "values": values}
def map(self, items):
keys = [item for i, item in enumerate(items) if i % 2 == 0]
values = [item for i, item in enumerate(items) if i % 2 == 1]
return {"TOKEN": "map", "keys": keys, "values": values}
def range(self, items):
return {"TOKEN": "range", "start": items[0], "end": items[1]}
def type_value(self, items):
return extract_chars(items[0])
def lit_type(self, items):
return {"TOKEN": "lit_type", "type": items[0]}
# operators
reassignment_op = firstitem
unary_reassignment_op = firstitem
unary_op = firstitem
bitwise_op = firstitem
logical_op = firstitem
comparison_op = firstitem
# symbols
def sym_positive(self, items): return "+"
def sym_negative(self, items): return "-"
def sym_lognot(self, items): return "!"
def sym_bitnot(self, items): return "~"
def sym_add(self, items): return "/"
def sym_add(self, items): return "+"
def sym_subtract(self, items): return "-"
def sym_multiply(self, items): return "*"
def sym_divide(self, items): return "/"
def sym_exponent(self, items): return "^"
def sym_bitand(self, items): return "&"
def sym_bitor(self, items): return "|"
def sym_bitxor(self, items): return "><"
def sym_bitlshift(self, items): return "<<"
def sym_bitrshift(self, items): return ">>"
def sym_logand(self, items): return "&&"
def sym_logor(self, items): return "||"
def sym_equals(self, items): return "=="
def sym_nequals(self, items): return "!="
def sym_less(self, items): return "<"
def sym_leq(self, items): return "<="
def sym_greater(self, items): return ">"
def sym_geq(self, items): return ">="
def sym_pluseq(self, items): return "+="
def sym_mineq(self, items): return "-="
def sym_multeq(self, items): return "*="
def sym_diveq(self, items): return "/="
def sym_expeq(self, items): return "^="
def sym_bitandeq(self, items): return "&="
def sym_bitoreq(self, items): return "|="
def sym_bitxoreq(self, items): return "><="
def sym_bitlshifteq(self, items): return "<<="
def sym_bitrshifteq(self, items): return ">>="
def sym_logandeq(self, items): return "&&="
def sym_logoreq(self, items): return "||="
def sym_inverteq(self, items): return "=!="
def sym_private(self, items): return "#"
def sym_nullable(self, item): return "?"
def sym_errorable(self, item): return "!"
def parse(code):
"""Parse a code string"""
tree = parser.parse(code)
ast = ASTTransformer().transform(tree)
return ast
def get_parser():
return parser