From eb090d81268ad28a9b05ad4434a55b757bbd9760 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Tue, 18 Jul 2023 14:37:26 -0700 Subject: [PATCH] Fix heredocs inside %W and %w lists The problem was that we were treating heredoc bodies as part of the %W list because we didn't push the scanning cursor past the heredoc after lexing out the here doc. To fix this, we changed the whitespace scanning function to quit scanning when it reaches a newline but only in the case that a heredoc is present. Additionally, we need to prevent double counting newlines in the case of a heredoc. For example: ```ruby %W(< --- include/yarp/util/yp_char.h | 2 +- src/util/yp_char.c | 9 ++++-- src/util/yp_newline_list.c | 6 ++-- src/yarp.c | 14 +++++++++- test/parse_test.rb | 1 - .../seattlerb/pct_w_heredoc_interp_nested.txt | 28 +++++++++++++++++++ 6 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 test/snapshots/seattlerb/pct_w_heredoc_interp_nested.txt diff --git a/include/yarp/util/yp_char.h b/include/yarp/util/yp_char.h index 85e5ce4c656..dcc011f0a12 100644 --- a/include/yarp/util/yp_char.h +++ b/include/yarp/util/yp_char.h @@ -15,7 +15,7 @@ size_t yp_strspn_whitespace(const char *string, ptrdiff_t length); // whitespace while also tracking the location of each newline. Disallows // searching past the given maximum number of characters. size_t -yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list); +yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list, bool); // Returns the number of characters at the start of the string that are inline // whitespace. Disallows searching past the given maximum number of characters. diff --git a/src/util/yp_char.c b/src/util/yp_char.c index 9befcb51053..1c0c20edd92 100644 --- a/src/util/yp_char.c +++ b/src/util/yp_char.c @@ -75,7 +75,7 @@ yp_strspn_whitespace(const char *string, ptrdiff_t length) { // whitespace while also tracking the location of each newline. Disallows // searching past the given maximum number of characters. size_t -yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list) { +yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list, bool stop_at_newline) { if (length <= 0) return 0; size_t size = 0; @@ -83,7 +83,12 @@ yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t while (size < maximum && (yp_char_table[(unsigned char) string[size]] & YP_CHAR_BIT_WHITESPACE)) { if (string[size] == '\n') { - yp_newline_list_append(newline_list, string + size); + if (stop_at_newline) { + return size + 1; + } + else { + yp_newline_list_append(newline_list, string + size); + } } size++; diff --git a/src/util/yp_newline_list.c b/src/util/yp_newline_list.c index c619e83c92e..8b24f82a024 100644 --- a/src/util/yp_newline_list.c +++ b/src/util/yp_newline_list.c @@ -25,13 +25,15 @@ yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity bool yp_newline_list_append(yp_newline_list_t *list, const char *cursor) { if (list->size == list->capacity) { - list->capacity = list->capacity * 3 / 2; + list->capacity = (list->capacity * 3) / 2; list->offsets = (size_t *) realloc(list->offsets, list->capacity * sizeof(size_t)); if (list->offsets == NULL) return false; } assert(cursor >= list->start); - list->offsets[list->size++] = (size_t) (cursor - list->start + 1); + size_t newline_offset = (size_t) (cursor - list->start + 1); + assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]); + list->offsets[list->size++] = newline_offset; return true; } diff --git a/src/yarp.c b/src/yarp.c index c80aa5499a0..2ae3b3ea28f 100644 --- a/src/yarp.c +++ b/src/yarp.c @@ -6505,14 +6505,26 @@ parser_lex(yp_parser_t *parser) { } } case YP_LEX_LIST: + if (parser->next_start != NULL) { + parser->current.end = parser->next_start; + parser->next_start = NULL; + } + // First we'll set the beginning of the token. parser->current.start = parser->current.end; // If there's any whitespace at the start of the list, then we're // going to trim it off the beginning and create a new token. size_t whitespace; - if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list)) > 0) { + + bool should_stop = parser->heredoc_end; + + if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list, should_stop)) > 0) { parser->current.end += whitespace; + if (parser->current.end[-1] == '\n') { + // mutates next_start + parser_flush_heredoc_end(parser); + } LEX(YP_TOKEN_WORDS_SEP); } diff --git a/test/parse_test.rb b/test/parse_test.rb index 3eff7d447f2..b6020b82dbc 100644 --- a/test/parse_test.rb +++ b/test/parse_test.rb @@ -28,7 +28,6 @@ def test_empty_string known_failures = %w[ seattlerb/heredoc_nested.txt - seattlerb/pct_w_heredoc_interp_nested.txt ] def find_source_file_node(node) diff --git a/test/snapshots/seattlerb/pct_w_heredoc_interp_nested.txt b/test/snapshots/seattlerb/pct_w_heredoc_interp_nested.txt new file mode 100644 index 00000000000..89ce74ce196 --- /dev/null +++ b/test/snapshots/seattlerb/pct_w_heredoc_interp_nested.txt @@ -0,0 +1,28 @@ +ProgramNode(0...30)( + [], + StatementsNode(0...30)( + [ArrayNode(0...30)( + [StringNode(4...5)(nil, (4...5), nil, "1"), + InterpolatedStringNode(0...12)( + nil, + [EmbeddedStatementsNode(6...12)( + (6...8), + StatementsNode(8...19)( + [InterpolatedStringNode(8...19)( + (8...11), + [StringNode(15...17)(nil, (15...17), nil, "2\n")], + (17...19) + )] + ), + (11...12) + )], + nil + ), + StringNode(13...14)(nil, (13...14), nil, "3"), + StringNode(25...26)(nil, (25...26), nil, "4"), + StringNode(27...28)(nil, (27...28), nil, "5")], + (0...3), + (29...30) + )] + ) +)