From e5f6ffa23a2113fad8610ad04d3970d640d08ccf Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 2 Aug 2023 09:49:26 -0400 Subject: [PATCH] Simplify creation of numerics --- config.yml | 16 ++- include/yarp/parser.h | 11 +- lib/yarp/lex_compat.rb | 10 +- src/yarp.c | 285 +++++++++++++++++++++++------------------ test/newline_test.rb | 10 +- 5 files changed, 185 insertions(+), 147 deletions(-) diff --git a/config.yml b/config.yml index c2504130c0e..141e3d54579 100644 --- a/config.yml +++ b/config.yml @@ -88,6 +88,12 @@ tokens: comment: "=~" - name: FLOAT comment: "a floating point number" + - name: FLOAT_IMAGINARY + comment: "a floating pointer number with an imaginary suffix" + - name: FLOAT_RATIONAL + comment: "a floating pointer number with a rational suffix" + - name: FLOAT_RATIONAL_IMAGINARY + comment: "a floating pointer number with a rational and imaginary suffix" - name: GLOBAL_VARIABLE comment: "a global variable" - name: GREATER @@ -106,12 +112,16 @@ tokens: comment: "an identifier" - name: IGNORED_NEWLINE comment: "an ignored newline" - - name: IMAGINARY_NUMBER - comment: "an imaginary number literal" - name: INSTANCE_VARIABLE comment: "an instance variable" - name: INTEGER comment: "an integer (any base)" + - name: INTEGER_IMAGINARY + comment: "an integer with an imaginary suffix" + - name: INTEGER_RATIONAL + comment: "an integer with a rational suffix" + - name: INTEGER_RATIONAL_IMAGINARY + comment: "an integer with a rational and imaginary suffix" - name: KEYWORD_ALIAS comment: "alias" - name: KEYWORD_AND @@ -266,8 +276,6 @@ tokens: comment: "+=" - name: QUESTION_MARK comment: "?" - - name: RATIONAL_NUMBER - comment: "a rational number literal" - name: REGEXP_BEGIN comment: "the beginning of a regular expression" - name: REGEXP_END diff --git a/include/yarp/parser.h b/include/yarp/parser.h index a23f8860849..2091be7fd92 100644 --- a/include/yarp/parser.h +++ b/include/yarp/parser.h @@ -96,10 +96,7 @@ typedef struct yp_lex_mode { // This state is used when we are lexing a string or a string-like // token, as in string content with either quote or an xstring. - YP_LEX_STRING, - - // you lexed a number with extra information attached - YP_LEX_NUMERIC, + YP_LEX_STRING } mode; union { @@ -163,12 +160,6 @@ typedef struct yp_lex_mode { char breakpoints[6]; } string; - struct { - yp_token_type_t type; - const char *start; - const char *end; - } numeric; - struct { // These pointers point to the beginning and end of the heredoc // identifier. diff --git a/lib/yarp/lex_compat.rb b/lib/yarp/lex_compat.rb index e15f26ec954..984aa8185eb 100644 --- a/lib/yarp/lex_compat.rb +++ b/lib/yarp/lex_compat.rb @@ -55,6 +55,9 @@ class LexCompat EQUAL_GREATER: :on_op, EQUAL_TILDE: :on_op, FLOAT: :on_float, + FLOAT_IMAGINARY: :on_imaginary, + FLOAT_RATIONAL: :on_rational, + FLOAT_RATIONAL_IMAGINARY: :on_imaginary, GREATER: :on_op, GREATER_EQUAL: :on_op, GREATER_GREATER: :on_op, @@ -64,8 +67,10 @@ class LexCompat HEREDOC_START: :on_heredoc_beg, IDENTIFIER: :on_ident, IGNORED_NEWLINE: :on_ignored_nl, - IMAGINARY_NUMBER: :on_imaginary, INTEGER: :on_int, + INTEGER_IMAGINARY: :on_imaginary, + INTEGER_RATIONAL: :on_rational, + INTEGER_RATIONAL_IMAGINARY: :on_imaginary, INSTANCE_VARIABLE: :on_ivar, INVALID: :INVALID, KEYWORD___ENCODING__: :on_kw, @@ -145,7 +150,8 @@ class LexCompat PLUS: :on_op, PLUS_EQUAL: :on_op, QUESTION_MARK: :on_op, - RATIONAL_NUMBER: :on_rational, + RATIONAL_FLOAT: :on_rational, + RATIONAL_INTEGER: :on_rational, REGEXP_BEGIN: :on_regexp_beg, REGEXP_END: :on_regexp_end, SEMICOLON: :on_semicolon, diff --git a/src/yarp.c b/src/yarp.c index 1f6bea2eebc..09c39cb424b 100644 --- a/src/yarp.c +++ b/src/yarp.c @@ -111,7 +111,6 @@ debug_lex_mode(yp_parser_t *parser) { case YP_LEX_LIST: fprintf(stderr, "LIST (terminator=%c, interpolation=%d)", lex_mode->as.list.terminator, lex_mode->as.list.interpolation); break; case YP_LEX_REGEXP: fprintf(stderr, "REGEXP (terminator=%c)", lex_mode->as.regexp.terminator); break; case YP_LEX_STRING: fprintf(stderr, "STRING (terminator=%c, interpolation=%d)", lex_mode->as.string.terminator, lex_mode->as.string.interpolation); break; - case YP_LEX_NUMERIC: fprintf(stderr, "NUMERIC (token_type=%s)", yp_token_type_to_str(lex_mode->as.numeric.type)); break; } lex_mode = lex_mode->prev; @@ -1916,6 +1915,69 @@ yp_float_node_create(yp_parser_t *parser, const yp_token_t *token) { return node; } +// Allocate and initialize a new FloatNode node from a FLOAT_IMAGINARY token. +static yp_imaginary_node_t * +yp_float_node_imaginary_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_FLOAT_IMAGINARY); + + yp_imaginary_node_t *node = YP_ALLOC_NODE(parser, yp_imaginary_node_t); + *node = (yp_imaginary_node_t) { + { + .type = YP_NODE_IMAGINARY_NODE, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .numeric = (yp_node_t *) yp_float_node_create(parser, &((yp_token_t) { + .type = YP_TOKEN_FLOAT, + .start = token->start, + .end = token->end - 1 + })) + }; + + return node; +} + +// Allocate and initialize a new FloatNode node from a FLOAT_RATIONAL token. +static yp_rational_node_t * +yp_float_node_rational_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_FLOAT_RATIONAL); + + yp_rational_node_t *node = YP_ALLOC_NODE(parser, yp_rational_node_t); + *node = (yp_rational_node_t) { + { + .type = YP_NODE_RATIONAL_NODE, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .numeric = (yp_node_t *) yp_float_node_create(parser, &((yp_token_t) { + .type = YP_TOKEN_FLOAT, + .start = token->start, + .end = token->end - 1 + })) + }; + + return node; +} + +// Allocate and initialize a new FloatNode node from a FLOAT_RATIONAL_IMAGINARY token. +static yp_imaginary_node_t * +yp_float_node_rational_imaginary_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_FLOAT_RATIONAL_IMAGINARY); + + yp_imaginary_node_t *node = YP_ALLOC_NODE(parser, yp_imaginary_node_t); + *node = (yp_imaginary_node_t) { + { + .type = YP_NODE_IMAGINARY_NODE, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .numeric = (yp_node_t *) yp_float_node_rational_create(parser, &((yp_token_t) { + .type = YP_TOKEN_FLOAT_RATIONAL, + .start = token->start, + .end = token->end - 1 + })) + }; + + return node; +} + // Allocate and initialize a new ForNode node. static yp_for_node_t * yp_for_node_create( @@ -2297,90 +2359,66 @@ yp_integer_node_create(yp_parser_t *parser, const yp_token_t *token) { return node; } -// Allocate and initialize a new RationalNode node. +// Allocate and initialize a new IntegerNode node from an INTEGER_IMAGINARY token. +static yp_imaginary_node_t * +yp_integer_node_imaginary_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_INTEGER_IMAGINARY); + + yp_imaginary_node_t *node = YP_ALLOC_NODE(parser, yp_imaginary_node_t); + *node = (yp_imaginary_node_t) { + { + .type = YP_NODE_IMAGINARY_NODE, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .numeric = (yp_node_t *) yp_integer_node_create(parser, &((yp_token_t) { + .type = YP_TOKEN_INTEGER, + .start = token->start, + .end = token->end - 1 + })) + }; + + return node; +} + +// Allocate and initialize a new IntegerNode node from an INTEGER_RATIONAL token. static yp_rational_node_t * -yp_rational_node_create(yp_parser_t *parser, const yp_token_t *token) { - assert(token->type == YP_TOKEN_RATIONAL_NUMBER); - assert(parser->lex_modes.current->mode == YP_LEX_NUMERIC); - - yp_node_t *numeric_node; - yp_token_t numeric_token = { - .type = parser->lex_modes.current->as.numeric.type, - .start = parser->lex_modes.current->as.numeric.start, - .end = parser->lex_modes.current->as.numeric.end - }; - switch (parser->lex_modes.current->as.numeric.type) { - case YP_TOKEN_INTEGER: { - lex_mode_pop(parser); - numeric_node = (yp_node_t *)yp_integer_node_create(parser, &numeric_token); - break; - } - case YP_TOKEN_FLOAT: { - lex_mode_pop(parser); - numeric_node = (yp_node_t *)yp_float_node_create(parser, &numeric_token); - break; - } - default: { - lex_mode_pop(parser); - numeric_node = (yp_node_t *)yp_missing_node_create(parser, numeric_token.start, numeric_token.end); - (void)numeric_node; // Suppress clang-analyzer-deadcode.DeadStores warning - assert(false && "unreachable"); - } - } +yp_integer_node_rational_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_INTEGER_RATIONAL); yp_rational_node_t *node = YP_ALLOC_NODE(parser, yp_rational_node_t); - *node = (yp_rational_node_t) { - { .type = YP_NODE_RATIONAL_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }, - .numeric = numeric_node, + { + .type = YP_NODE_RATIONAL_NODE, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .numeric = (yp_node_t *) yp_integer_node_create(parser, &((yp_token_t) { + .type = YP_TOKEN_INTEGER, + .start = token->start, + .end = token->end - 1 + })) }; - assert(parser->lex_modes.current->mode != YP_LEX_NUMERIC); + return node; } -// Allocate and initialize a new ImaginaryNode node. +// Allocate and initialize a new IntegerNode node from an INTEGER_RATIONAL_IMAGINARY token. static yp_imaginary_node_t * -yp_imaginary_node_create(yp_parser_t *parser, const yp_token_t *token) { - assert(token->type == YP_TOKEN_IMAGINARY_NUMBER); - assert(parser->lex_modes.current->mode == YP_LEX_NUMERIC); - - yp_node_t *numeric_node; - yp_token_t numeric_token = { - .type = parser->lex_modes.current->as.numeric.type, - .start = parser->lex_modes.current->as.numeric.start, - .end = parser->lex_modes.current->as.numeric.end - }; - switch (parser->lex_modes.current->as.numeric.type) { - case YP_TOKEN_INTEGER: { - lex_mode_pop(parser); - numeric_node = (yp_node_t *)yp_integer_node_create(parser, &numeric_token); - break; - } - case YP_TOKEN_FLOAT: { - lex_mode_pop(parser); - numeric_node = (yp_node_t *)yp_float_node_create(parser, &numeric_token); - break; - } - case YP_TOKEN_RATIONAL_NUMBER: { - lex_mode_pop(parser); - numeric_node = (yp_node_t *)yp_rational_node_create(parser, &numeric_token); - break; - } - default: { - lex_mode_pop(parser); - numeric_node = (yp_node_t *)yp_missing_node_create(parser, numeric_token.start, numeric_token.end); - (void)numeric_node; // Suppress clang-analyzer-deadcode.DeadStores warning - assert(false && "unreachable"); - } - } +yp_integer_node_rational_imaginary_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_INTEGER_RATIONAL_IMAGINARY); yp_imaginary_node_t *node = YP_ALLOC_NODE(parser, yp_imaginary_node_t); - *node = (yp_imaginary_node_t) { - { .type = YP_NODE_IMAGINARY_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }, - .numeric = numeric_node + { + .type = YP_NODE_IMAGINARY_NODE, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .numeric = (yp_node_t *) yp_integer_node_rational_create(parser, &((yp_token_t) { + .type = YP_TOKEN_INTEGER_RATIONAL, + .start = token->start, + .end = token->end - 1 + })) }; - assert(parser->lex_modes.current->mode != YP_LEX_NUMERIC); + return node; } @@ -4802,37 +4840,6 @@ lex_numeric_prefix(yp_parser_t *parser) { return type; } -static yp_token_type_t -lex_finalize_numeric_type(yp_parser_t *parser, yp_token_type_t numeric_type, const char *numeric_end, const char *rational_end, const char *imaginary_end) { - if (rational_end || imaginary_end) { - lex_mode_push(parser, (yp_lex_mode_t) { - .mode = YP_LEX_NUMERIC, - .as.numeric.type = numeric_type, - .as.numeric.start = parser->current.start, - .as.numeric.end = numeric_end - }); - } - - if (rational_end && imaginary_end) { - lex_mode_push(parser, (yp_lex_mode_t) { - .mode = YP_LEX_NUMERIC, - .as.numeric.type = YP_TOKEN_RATIONAL_NUMBER, - .as.numeric.start = parser->current.start, - .as.numeric.end = rational_end - }); - } - - if (imaginary_end) { - return YP_TOKEN_IMAGINARY_NUMBER; - } - - if (rational_end) { - return YP_TOKEN_RATIONAL_NUMBER; - } - - return numeric_type; -} - static yp_token_type_t lex_numeric(yp_parser_t *parser) { yp_token_type_t type = YP_TOKEN_INTEGER; @@ -4841,22 +4848,35 @@ lex_numeric(yp_parser_t *parser) { type = lex_numeric_prefix(parser); const char *end = parser->current.end; - const char *rational_end = NULL; - const char *imaginary_end = NULL; + yp_token_type_t suffix_type = type; - if (match(parser, 'r')) { - rational_end = parser->current.end; - } + if (type == YP_TOKEN_INTEGER) { + if (match(parser, 'r')) { + suffix_type = YP_TOKEN_INTEGER_RATIONAL; - if (match(parser, 'i')) { - imaginary_end = parser->current.end; + if (match(parser, 'i')) { + suffix_type = YP_TOKEN_INTEGER_RATIONAL_IMAGINARY; + } + } else if (match(parser, 'i')) { + suffix_type = YP_TOKEN_INTEGER_IMAGINARY; + } + } else { + if (match(parser, 'r')) { + suffix_type = YP_TOKEN_FLOAT_RATIONAL; + + if (match(parser, 'i')) { + suffix_type = YP_TOKEN_FLOAT_RATIONAL_IMAGINARY; + } + } else if (match(parser, 'i')) { + suffix_type = YP_TOKEN_FLOAT_IMAGINARY; + } } const unsigned char uc = (const unsigned char) peek(parser); if (uc != '\0' && (uc >= 0x80 || ((uc >= 'a' && uc <= 'z') || (uc >= 'A' && uc <= 'Z')) || uc == '_')) { parser->current.end = end; } else { - type = lex_finalize_numeric_type(parser, type, end, rational_end, imaginary_end); + type = suffix_type; } } @@ -5476,7 +5496,6 @@ parser_lex(yp_parser_t *parser) { case YP_LEX_DEFAULT: case YP_LEX_EMBEXPR: case YP_LEX_EMBVAR: - case YP_LEX_NUMERIC: // We have a specific named label here because we are going to jump back to // this location in the event that we have lexed a token that should not be @@ -8965,13 +8984,15 @@ parse_conditional(yp_parser_t *parser, yp_context_t context) { // This macro allows you to define a case statement for all of the token types // that represent the beginning of nodes that are "primitives" in a pattern // matching expression. -#define YP_CASE_PRIMITIVE YP_TOKEN_INTEGER: case YP_TOKEN_FLOAT: case YP_TOKEN_RATIONAL_NUMBER: \ - case YP_TOKEN_IMAGINARY_NUMBER: case YP_TOKEN_SYMBOL_BEGIN: case YP_TOKEN_REGEXP_BEGIN: case YP_TOKEN_BACKTICK: \ - case YP_TOKEN_PERCENT_LOWER_X: case YP_TOKEN_PERCENT_LOWER_I: case YP_TOKEN_PERCENT_LOWER_W: \ - case YP_TOKEN_PERCENT_UPPER_I: case YP_TOKEN_PERCENT_UPPER_W: case YP_TOKEN_STRING_BEGIN: case YP_TOKEN_KEYWORD_NIL: \ - case YP_TOKEN_KEYWORD_SELF: case YP_TOKEN_KEYWORD_TRUE: case YP_TOKEN_KEYWORD_FALSE: case YP_TOKEN_KEYWORD___FILE__: \ - case YP_TOKEN_KEYWORD___LINE__: case YP_TOKEN_KEYWORD___ENCODING__: case YP_TOKEN_MINUS_GREATER: \ - case YP_TOKEN_HEREDOC_START: case YP_TOKEN_UMINUS_NUM: case YP_TOKEN_CHARACTER_LITERAL +#define YP_CASE_PRIMITIVE YP_TOKEN_INTEGER: case YP_TOKEN_INTEGER_IMAGINARY: case YP_TOKEN_INTEGER_RATIONAL: \ + case YP_TOKEN_INTEGER_RATIONAL_IMAGINARY: case YP_TOKEN_FLOAT: case YP_TOKEN_FLOAT_IMAGINARY: \ + case YP_TOKEN_FLOAT_RATIONAL: case YP_TOKEN_FLOAT_RATIONAL_IMAGINARY: case YP_TOKEN_SYMBOL_BEGIN: \ + case YP_TOKEN_REGEXP_BEGIN: case YP_TOKEN_BACKTICK: case YP_TOKEN_PERCENT_LOWER_X: case YP_TOKEN_PERCENT_LOWER_I: \ + case YP_TOKEN_PERCENT_LOWER_W: case YP_TOKEN_PERCENT_UPPER_I: case YP_TOKEN_PERCENT_UPPER_W: \ + case YP_TOKEN_STRING_BEGIN: case YP_TOKEN_KEYWORD_NIL: case YP_TOKEN_KEYWORD_SELF: case YP_TOKEN_KEYWORD_TRUE: \ + case YP_TOKEN_KEYWORD_FALSE: case YP_TOKEN_KEYWORD___FILE__: case YP_TOKEN_KEYWORD___LINE__: \ + case YP_TOKEN_KEYWORD___ENCODING__: case YP_TOKEN_MINUS_GREATER: case YP_TOKEN_HEREDOC_START: \ + case YP_TOKEN_UMINUS_NUM: case YP_TOKEN_CHARACTER_LITERAL // This macro allows you to define a case statement for all of the token types // that could begin a parameter. @@ -10345,7 +10366,16 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { } case YP_TOKEN_FLOAT: parser_lex(parser); - return (yp_node_t *)yp_float_node_create(parser, &parser->previous); + return (yp_node_t *) yp_float_node_create(parser, &parser->previous); + case YP_TOKEN_FLOAT_IMAGINARY: + parser_lex(parser); + return (yp_node_t *) yp_float_node_imaginary_create(parser, &parser->previous); + case YP_TOKEN_FLOAT_RATIONAL: + parser_lex(parser); + return (yp_node_t *) yp_float_node_rational_create(parser, &parser->previous); + case YP_TOKEN_FLOAT_RATIONAL_IMAGINARY: + parser_lex(parser); + return (yp_node_t *) yp_float_node_rational_imaginary_create(parser, &parser->previous); case YP_TOKEN_NUMBERED_REFERENCE: { parser_lex(parser); yp_node_t *node = (yp_node_t *) yp_numbered_reference_read_node_create(parser, &parser->previous); @@ -10482,9 +10512,6 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { return node; } } - case YP_TOKEN_IMAGINARY_NUMBER: - parser_lex(parser); - return (yp_node_t *) yp_imaginary_node_create(parser, &parser->previous); case YP_TOKEN_INSTANCE_VARIABLE: { parser_lex(parser); yp_node_t *node = (yp_node_t *) yp_instance_variable_read_node_create(parser, &parser->previous); @@ -10498,6 +10525,15 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { case YP_TOKEN_INTEGER: parser_lex(parser); return (yp_node_t *) yp_integer_node_create(parser, &parser->previous); + case YP_TOKEN_INTEGER_IMAGINARY: + parser_lex(parser); + return (yp_node_t *) yp_integer_node_imaginary_create(parser, &parser->previous); + case YP_TOKEN_INTEGER_RATIONAL: + parser_lex(parser); + return (yp_node_t *) yp_integer_node_rational_create(parser, &parser->previous); + case YP_TOKEN_INTEGER_RATIONAL_IMAGINARY: + parser_lex(parser); + return (yp_node_t *) yp_integer_node_rational_imaginary_create(parser, &parser->previous); case YP_TOKEN_KEYWORD___ENCODING__: parser_lex(parser); return (yp_node_t *) yp_source_encoding_node_create(parser, &parser->previous); @@ -11658,9 +11694,6 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { return (yp_node_t *) array; } - case YP_TOKEN_RATIONAL_NUMBER: - parser_lex(parser); - return (yp_node_t *) yp_rational_node_create(parser, &parser->previous); case YP_TOKEN_REGEXP_BEGIN: { yp_token_t opening = parser->current; parser_lex(parser); diff --git a/test/newline_test.rb b/test/newline_test.rb index 29c3b7f017a..5a81f52637e 100644 --- a/test/newline_test.rb +++ b/test/newline_test.rb @@ -38,15 +38,15 @@ def visit(node) expected.delete_at actual.index(62) elsif relative == "lib/yarp/lex_compat.rb" # extra flag for: dedent_next =\n ((token.event: due to bytecode order - actual.delete(514) + actual.delete(520) # different line for: token =\n case event: due to bytecode order - actual.delete(571) - expected.delete(572) + actual.delete(577) + expected.delete(578) # extra flag for: lex_state =\n if RIPPER: due to bytecode order - actual.delete(604) + actual.delete(610) # extra flag for: (token[2].start_with?("\#$") || token[2].start_with?("\#@")) # unclear when ParenthesesNode should allow a second flag on the same line or not - actual.delete(731) + actual.delete(737) end assert_equal expected, actual