Skip to content

Commit

Permalink
Merge pull request #1293 from flavorjones/flavorjones-newline-termina…
Browse files Browse the repository at this point in the history
…ted-things

fix: support for newline-terminated `%q`, `%Q`, `%r`
  • Loading branch information
kddnewton authored Aug 21, 2023
2 parents 613c6ff + 1e49408 commit 4f5a6f8
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 19 deletions.
3 changes: 3 additions & 0 deletions include/yarp/util/yp_newline_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ bool yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t cap
// the offsets succeeds (if one was necessary), otherwise returns false.
bool yp_newline_list_append(yp_newline_list_t *list, const char *cursor);

// Conditionally append a new offset to the newline list, if the value passed in is a newline.
bool yp_newline_list_check_append(yp_newline_list_t *list, const char *cursor);

// Returns the line and column of the given offset. If the offset is not in the
// list, the line and column of the closest offset less than the given offset
// are returned.
Expand Down
9 changes: 9 additions & 0 deletions src/util/yp_newline_list.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
return true;
}

// Conditionally append a new offset to the newline list, if the value passed in is a newline.
bool
yp_newline_list_check_append(yp_newline_list_t *list, const char *cursor) {
if (*cursor != '\n') {
return true;
}
return yp_newline_list_append(list, cursor);
}

// Returns the line and column of the given offset, assuming we don't have any
// information about the previous index that we found.
static yp_line_column_t
Expand Down
35 changes: 16 additions & 19 deletions src/yarp.c
Original file line number Diff line number Diff line change
Expand Up @@ -6183,9 +6183,7 @@ parser_lex(yp_parser_t *parser) {
parser->current.end++;
}

if (*parser->current.end == '\n') {
yp_newline_list_append(&parser->newline_list, parser->current.end);
}
yp_newline_list_check_append(&parser->newline_list, parser->current.end);

parser->current.end++;
LEX(YP_TOKEN_STRING_BEGIN);
Expand Down Expand Up @@ -6215,6 +6213,7 @@ parser_lex(yp_parser_t *parser) {

if (parser->current.end < parser->end) {
lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
yp_newline_list_check_append(&parser->newline_list, parser->current.end);
parser->current.end++;
}

Expand All @@ -6225,6 +6224,7 @@ parser_lex(yp_parser_t *parser) {

if (parser->current.end < parser->end) {
lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
yp_newline_list_check_append(&parser->newline_list, parser->current.end);
parser->current.end++;
}

Expand All @@ -6235,6 +6235,7 @@ parser_lex(yp_parser_t *parser) {

if (parser->current.end < parser->end) {
lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
yp_newline_list_check_append(&parser->newline_list, parser->current.end);
parser->current.end++;
}

Expand Down Expand Up @@ -6462,9 +6463,7 @@ parser_lex(yp_parser_t *parser) {

// If the result is an escaped newline, then we need to
// track that newline.
if (breakpoint[difference - 1] == '\n') {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);

breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
continue;
Expand Down Expand Up @@ -6526,7 +6525,13 @@ parser_lex(yp_parser_t *parser) {
// If we've hit a newline, then we need to track that in the
// list of newlines.
if (*breakpoint == '\n') {
yp_newline_list_append(&parser->newline_list, breakpoint);
// For the special case of a newline-terminated regular expression, we will pass
// through this branch twice -- once with YP_TOKEN_REGEXP_BEGIN and then again
// with YP_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by
// tracking it only in the REGEXP_BEGIN case.
if (!(lex_mode->as.regexp.terminator == '\n' && parser->current.type != YP_TOKEN_REGEXP_BEGIN)) {
yp_newline_list_append(&parser->newline_list, breakpoint);
}

if (lex_mode->as.regexp.terminator != '\n') {
// If the terminator is not a newline, then we can set
Expand Down Expand Up @@ -6571,9 +6576,7 @@ parser_lex(yp_parser_t *parser) {

// If the result is an escaped newline, then we need to
// track that newline.
if (breakpoint[difference - 1] == '\n') {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);

breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
continue;
Expand Down Expand Up @@ -6664,9 +6667,7 @@ parser_lex(yp_parser_t *parser) {
parser->current.end = breakpoint + 2;
yp_newline_list_append(&parser->newline_list, breakpoint + 1);
} else {
if (*parser->current.end == '\n') {
yp_newline_list_append(&parser->newline_list, parser->current.end);
}
yp_newline_list_check_append(&parser->newline_list, parser->current.end);

parser->current.end = breakpoint + 1;
}
Expand Down Expand Up @@ -6716,9 +6717,7 @@ parser_lex(yp_parser_t *parser) {

// If the result is an escaped newline, then we need to
// track that newline.
if (breakpoint[difference - 1] == '\n') {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);

breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
break;
Expand Down Expand Up @@ -6889,9 +6888,7 @@ parser_lex(yp_parser_t *parser) {
yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);

if (breakpoint[difference - 1] == '\n') {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);

breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
}
Expand Down
13 changes: 13 additions & 0 deletions test/fixtures/newline_terminated.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# note that %i, %I, %w, and %W do not support newline termination in CRuby

%
foo

%q
foo

%Q
foo

%r
foo
2 changes: 2 additions & 0 deletions test/parse_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ def test_parse_takes_file_path
end

Dir["*.txt", base: base].each do |relative|
next if relative == "newline_terminated.txt"

# We test every snippet (separated by \n\n) in isolation
# to ensure the parser does not try to read bytes further than the end of each snippet
define_method "test_individual_snippets_#{relative}" do
Expand Down
15 changes: 15 additions & 0 deletions test/snapshots/newline_terminated.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 4f5a6f8

Please sign in to comment.