-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.py
434 lines (365 loc) · 18.8 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
# SPDX-FileCopyrightText: © 2021 Jochem van Kranenburg <jochem.vankranenburg@outlook.com>
# PDX-License-Identifier: AGPL-3.0 License
from typing import Callable, Tuple, List, TypeVar
from parse.error import increment
from parse.nodes import *
from lex.token import *
A = TypeVar('A')
B = TypeVar('B')
C = TypeVar('C')
# iterateDecorator :: Callable -> Callable
def iterateDecorator(f: Callable[[int, List[Token], Token, A], Tuple[int, B]]) -> Callable[[int, List[Token], Token, A], Tuple[int, B]]:
""" Iterate decorator
Decorator to use for incrementally calling functions until a separater token is reached.
Parameters:
f (Callable): The function to execute at every index.
Returns:
Callable: The decorator to use on a function.
"""
def iterate(tokenIndex: int, tokenList: List[Token], separateToken: Token, *args: Tuple) -> Tuple[int, B]:
if type(tokenList[tokenIndex]) != separateToken:
return tokenIndex, args[0]
tokenIndex = increment(tokenIndex, tokenList)
tokenIndex, args = f(tokenIndex, tokenList, separateToken, *args)
return iterate(tokenIndex, tokenList, separateToken, args)
return iterate
# factor :: [Token] -> Integer -> Tuple
def factor(tokenList : List[Token], tokenIndex : int) -> Tuple[int, Union[VariableNode, NumberNode]]:
""" Parse factor
This function parses a factor; either a variable or a literal integer or float.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
Node: A variable- or numbernode.
"""
currentToken = tokenList[tokenIndex]
if type(currentToken) == VariableToken:
tokenIndex = increment(tokenIndex, tokenList)
return tokenIndex, VariableNode(currentToken)
elif type(currentToken) in (IntegerToken, FloatToken):
tokenIndex = increment(tokenIndex, tokenList)
return tokenIndex, NumberNode(currentToken)
# term :: [Token] -> Integer -> Tuple
def term(tokenList : List[Token], tokenIndex : int) -> Tuple[int, Union[VariableNode, NumberNode]]:
""" Parse term
This function parses the result of a term. That's either a variable or a number.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
Node: The result of a term; a variable- or numbernode.
"""
return binaryOperator(tokenList, factor, (MultiplyToken, DivideToken), tokenIndex)
# comparison :: [Token] -> Integer -> Tuple
def comparison(tokenList : List[Token], tokenIndex : int) -> Tuple[int, Union[VariableNode, NumberNode, OperatorNode]]:
""" Parse comparisong
This function returns (a part of) an arithmetic expression. This can either be a
variable, number or operator node.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
Node: (A part of) an arithmetic expression; a variable, number or nodes with operator (if partial).
"""
return binaryOperator(tokenList, arithmeticExpression,
(EqualityToken, NonEqualityToken, LessToken, GreaterToken, LessEqualToken, GreaterEqualToken), tokenIndex)
# arithmeticExpression :: [Token] -> Integer -> Tuple
def arithmeticExpression(tokenList : List[Token], tokenIndex : int) -> Tuple[int, Union[VariableNode, NumberNode, OperatorNode]]:
""" Parse arithmetic expression
This function parses an arithmetic expression and returns the result of this expression. This
can either be a variable, number or operator node.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
Node: (A part of) the result of an arithmetic expression; a variable, number or nodes with operator (if partial).
"""
return binaryOperator(tokenList, term, (AddToken, SubstractToken), tokenIndex)
# expression :: [Token] -> Integer -> Tuple
def expression(tokenList : List[Token], tokenIndex : int) -> Tuple[int, Node]:
""" Parse an expression
This function parses an expression in the broadest term possible. Can be either of a variable,
if-statement, while-loop, for-loop, function definition, function call or an operator.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
Node: A (partial) result of an expression.
"""
currentToken = tokenList[tokenIndex]
if type(currentToken) == VariableKeywordToken:
tokenIndex, node = variable(tokenList, tokenIndex)
elif type(currentToken) == IfToken:
tokenIndex, node = ifExpr(tokenList, tokenIndex)
elif type(currentToken) == WhileToken:
tokenIndex, node = whileExpr(tokenList, tokenIndex)
elif type(currentToken) == FunctionToken:
tokenIndex, node = functionDef(tokenList, tokenIndex)
elif type(currentToken) == ExecuteToken:
tokenIndex, node = functionCall(tokenList, tokenIndex)
elif type(currentToken) == ForToken:
tokenIndex, node = forExpr(tokenList, tokenIndex)
else:
tokenIndex, node = binaryOperator(tokenList, comparison, (AndToken, OrToken), tokenIndex)
return tokenIndex, node
# ifExpr :: [Token] -> Integer -> Tuple
def ifExpr(tokenList : List[Token], tokenIndex : int) -> Tuple[int, IfNode]:
""" Parse if-statement
This function parses an if-statement and (when provided) also the else-expression.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
IfNode: An IfNode with the condition, expression and optional else expression.
"""
tokenIndex = increment(tokenIndex, tokenList, IfToken)
tokenIndex, condition = expression(tokenList, tokenIndex)
tokenIndex = increment(tokenIndex, tokenList, ThenToken)
if type(tokenList[tokenIndex]) == NewlineToken:
tokenIndex = increment(tokenIndex, tokenList)
tokenIndex, expr = statements(tokenList, tokenIndex)
else:
tokenIndex, expr = expression(tokenList, tokenIndex)
if type(tokenList[tokenIndex]) == ElseToken:
tokenIndex = increment(tokenIndex, tokenList)
if type(tokenList[tokenIndex]) == NewlineToken:
tokenIndex = increment(tokenIndex, tokenList)
tokenIndex, elseExpr = statements(tokenList, tokenIndex)
else:
tokenIndex, elseExpr = expression(tokenList, tokenIndex)
tokenIndex = increment(tokenIndex, tokenList, FunctionEndToken)
return tokenIndex, IfNode(condition, expr, elseExpr)
tokenIndex = increment(tokenIndex, tokenList, FunctionEndToken)
return tokenIndex, IfNode(condition, expr)
# whileExpr :: [Token] -> Integer -> Tuple
def whileExpr(tokenList : List[Token], tokenIndex : int) -> Tuple[int, WhileNode]:
""" Parse while-loop
This function parses a while-loop with corresponding condition.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
WhileNode: A WhileNode with the expression to execute as long as the condition is met.
"""
tokenIndex = increment(tokenIndex, tokenList, WhileToken)
tokenIndex, condition = expression(tokenList, tokenIndex)
tokenIndex = increment(tokenIndex, tokenList, ThenToken)
if type(tokenList[tokenIndex]) == NewlineToken:
tokenIndex = increment(tokenIndex, tokenList)
tokenIndex, _statements = statements(tokenList, tokenIndex)
tokenIndex = increment(tokenIndex, tokenList, FunctionEndToken)
return tokenIndex, WhileNode(condition, _statements)
else:
tokenIndex, expr = expression(tokenList, tokenIndex)
tokenIndex = increment(tokenIndex, tokenList, FunctionEndToken)
return tokenIndex, WhileNode(condition, expr)
# forExpr :: [Token] -> Integer -> Tuple
def forExpr(tokenList : List[Token], tokenIndex : int) -> Tuple[int, ForNode]:
""" Parse for-loop
This function parses a for-loop with corresponding intial, end and optional
step-value.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
ForNode: A ForNode with the expression to execute as long as the condition is met.
"""
tokenIndex = increment(tokenIndex, tokenList, ForToken)
tokenIndex, startNode = variable(tokenList, tokenIndex)
tokenIndex = increment(tokenIndex, tokenList, ToToken)
tokenIndex, endValue = expression(tokenList, tokenIndex)
if type(tokenList[tokenIndex]) == StepToken:
tokenIndex = increment(tokenIndex, tokenList)
tokenIndex, stepValue = expression(tokenList, tokenIndex)
else:
stepValue = None
tokenIndex = increment(tokenIndex, tokenList, ThenToken)
if type(tokenList[tokenIndex]) == NewlineToken:
tokenIndex = increment(tokenIndex, tokenList)
tokenIndex, _statements = statements(tokenList, tokenIndex)
tokenIndex = increment(tokenIndex, tokenList, FunctionEndToken)
return tokenIndex, ForNode(startNode.var_name, startNode, endValue, stepValue, _statements)
else:
tokenIndex, expr = expression(tokenList, tokenIndex)
tokenIndex = increment(tokenIndex, tokenList, FunctionEndToken)
return tokenIndex, ForNode(startNode.var_name, startNode, endValue, stepValue, expr)
# variable :: [Token] -> Integer -> Tuple
def variable(tokenList : List[Token], tokenIndex : int) -> Tuple[int, VariableNode]:
""" Parse variables
This function parses variables and their assignments.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
VariableNode: a VariableNode with the name and value or expression for determining value. """
tokenIndex = increment(tokenIndex, tokenList)
increment(tokenIndex, tokenList, VariableToken)
variableName = tokenList[tokenIndex].stringToParse
tokenIndex = increment(tokenIndex, tokenList)
tokenIndex = increment(tokenIndex, tokenList, AssignmentToken)
tokenIndex, expr = expression(tokenList, tokenIndex)
return tokenIndex, VariableNode(variableName, expr)
# functionCall :: [Token] -> Integer -> Tuple
def functionCall(tokenList : List[Token], tokenIndex : int) -> Tuple[int, CallNode]:
""" Parse functioncall
This function parses a functioncall with corresponding arguments and
their values.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
CallNode: A CallNode with the given name and arguments to execute it with.
"""
@iterateDecorator
# parseArguments :: Integer -> [Token] -> Token -> List -> Tuple
def parseArguments(tokenIndex: int, tokenList: List[Token], separateToken: Token, arguments: List[Node] = []) -> Tuple[int, List[Node]]:
tokenIndex, intExpr = expression(tokenList, tokenIndex)
arguments.append(intExpr)
return tokenIndex, arguments
tokenIndex = increment(tokenIndex, tokenList, ExecuteToken)
tokenIndex, name = expression(tokenList, tokenIndex)
if type(tokenList[tokenIndex]) == FunctionParameterToken:
tokenIndex = increment(tokenIndex, tokenList)
if type(tokenList[tokenIndex]) == NowToken: # Function call without parameters.
tokenIndex = increment(tokenIndex, tokenList)
else: # Funcation call with parameters.
arguments = []
tokenIndex, intExpr = expression(tokenList, tokenIndex)
arguments.append(intExpr)
tokenIndex, arguments = parseArguments(tokenIndex, tokenList, CommaToken, arguments)
tokenIndex = increment(tokenIndex, tokenList, NowToken)
return tokenIndex, CallNode(name, arguments)
else:
if type(tokenList[tokenIndex]) == NowToken:
tokenIndex = increment(tokenIndex, tokenList)
return tokenIndex, CallNode(name, None)
# functionDef :: [Token] -> Integer -> Tuple
def functionDef(tokenList : List[Token], tokenIndex : int) -> Tuple[int, FunctionNode]:
""" Parse function definition
This function parses a function definition including their corresponding
argumentnames and codeSequences.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
FunctionNode: A FunctionNode with name, argument names and sequence to execute on execution.
"""
@iterateDecorator
def parseArguments(tokenIndex: int, tokenList: List[Token], separateToken: Token, arguments: List[Node] = []) -> Tuple[int, List[Node]]:
arguments.append(tokenList[tokenIndex].stringToParse)
tokenIndex = increment(tokenIndex, tokenList)
return tokenIndex, arguments
tokenIndex = increment(tokenIndex, tokenList, FunctionToken)
functionName = tokenList[tokenIndex].stringToParse
tokenIndex = increment(tokenIndex, tokenList)
arguments = []
if type(tokenList[tokenIndex]) == FunctionParameterToken:
tokenIndex = increment(tokenIndex, tokenList)
if type(tokenList[tokenIndex]) == VariableToken:
arguments.append(tokenList[tokenIndex].stringToParse)
tokenIndex = increment(tokenIndex, tokenList)
tokenIndex, arguments = parseArguments(tokenIndex, tokenList, AndToken, arguments)
tokenIndex = increment(tokenIndex, tokenList, FunctionStartToken)
if type(tokenList[tokenIndex]) == NewlineToken:
tokenIndex = increment(tokenIndex, tokenList)
tokenIndex, expr = statements(tokenList, tokenIndex)
else:
tokenIndex, expr = expression(tokenList, tokenIndex)
tokenIndex = increment(tokenIndex, tokenList, FunctionEndToken)
return tokenIndex, FunctionNode(functionName, arguments, expr)
# statements :: [Token] -> Integer -> [Node] -> Tuple
def statements(tokenList : List[Token], tokenIndex : int, statings: List[Node] = None) -> Tuple[int, Node]:
""" Parse statements
This function might be described as the heart of the parser for multi-line statements.
Hence, it's especially useful for interpreting files; it splits the entire file into
multiple multi-line statements. Each of those statements is executed; one by one.
So actually, the parser for files is the same as the parser used for the shell; the file
contains multiple entries that can also be typed in the shell one by one. It keeps parsing
the file until there are no more lines to be skipped.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
Node: The List(Root)Node with all statements to execute.
"""
@iterateDecorator
def skipLines(tokenIndex: int, tokenList: List[Token], separateToken: Token, skippedLines: int) -> Tuple[int, int]:
skippedLines += 1
return tokenIndex, skippedLines
skippedLines = 0
tokenIndex, skippedLines = skipLines(tokenIndex, tokenList, NewlineToken, skippedLines)
if statings == None:
statings = []
elif not skippedLines:
return tokenIndex, ListNode(statings)
tokenIndex, state = statement(tokenList, tokenIndex)
statings.append(state)
return statements(tokenList, tokenIndex, statings)
# statement :: [Token] -> Integer -> Tuple
def statement(tokenList: List[Token], tokenIndex: int) -> Tuple[int, Node]:
""" Parse statement
Gets called by statements() to get each individual statement.
Parameters:
tokenList (List): The list with tokens to parse.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
Node: The parsed statement (expression).
"""
if type(tokenList[tokenIndex]) == ReturnToken:
tokenIndex = increment(tokenIndex, tokenList)
tokenIndex, expr = expression(tokenList, tokenIndex)
return tokenIndex, ReturnNode(expr)
else:
tokenIndex, expr = expression(tokenList, tokenIndex)
return tokenIndex, expr
# binaryOperator :: [Token] -> Callable -> [Token] -> Integer -> Tuple
def binaryOperator(tokenList : List[Token], f : Callable[[A, B], C], operations : List[Token], tokenIndex : int) -> Tuple[int, Union[VariableNode, NumberNode, OperatorNode]]:
""" Parse binary operator
Creates OperatorNode with operator and left and right node to execute it on.
Parameters:
tokenList (List): The list with tokens to parse.
f (Callable): The function to execute for getting left and right node on which operator should be executed.
operations (List): The list with operators which this function is 'allowed' to be.
tokenIndex (int): The current index at which we're parsing the tokenList.
Returns:
int: The incremented token index.
Node: The operator-, left- or right node.
"""
try:
tokenIndex, left = f(tokenList, tokenIndex)
except TypeError:
print("Error in expression at line " + str(tokenList[tokenIndex].lineNumber) + "!")
print("Tyring to continue as good as possible... (will most likely fail)")
return tokenIndex, NumberNode(Token(0, tokenList[tokenIndex].lineNumber))
def traverse(tokenIndex : int, left : Node):
if type(tokenList[tokenIndex]) not in operations:
return tokenIndex, left
operatorToken = tokenList[tokenIndex]
tokenIndex = increment(tokenIndex, tokenList)
tokenIndex, right = f(tokenList, tokenIndex)
left = OperatorNode(left, operatorToken, right)
return traverse(tokenIndex, left)
return traverse(tokenIndex, left)
def parse(tokens: List[Token]) -> Node:
""" Parse tokenlist
Parse the passed tokenlist.
Parameters:
tokens (List): The list with tokens to parse.
Returns:
Node: The Root node of the AST.
"""
return statements(tokens, 0)[1]