Skip to content

Commit

Permalink
Remove string concat in favor of a flat list
Browse files Browse the repository at this point in the history
Right now when you have a lot of string concats it ends up being
difficult to work with because of the depth of the tree. You end
up descending very far for every string literal that is part of the
concat.

There are already times when we use an interpolated string node to
group together two string segments that are part of the same string
(like when they are interupted by the contents of a heredoc). This
commit takes the same approach and replaces string concats with
interpolated string nodes.

Now that they're a flat list, they should be much easier to work
with. There's still some missing information here that would be
useful to consumers: whether or not there is _actually_ any
interpolation contained in the list. We could remedy this with
another node type that is named something like string list, or we
could add a flag to interpolated string node indicating that there
is interpolation. Either way I want to solve that in a follow-up
commit, since this commit is valuable on its own.
  • Loading branch information
kddnewton committed Nov 21, 2023
1 parent 060bcc8 commit 1e7ae3a
Show file tree
Hide file tree
Showing 11 changed files with 293 additions and 311 deletions.
11 changes: 0 additions & 11 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2370,17 +2370,6 @@ nodes:
foo; bar; baz
^^^^^^^^^^^^^
- name: StringConcatNode
fields:
- name: left
type: node
- name: right
type: node
comment: |
Represents the use of compile-time string concatenation.
"foo" "bar"
^^^^^^^^^^^
- name: StringNode
fields:
- name: flags
Expand Down
56 changes: 21 additions & 35 deletions src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -5127,28 +5127,6 @@ pm_statements_node_body_append(pm_statements_node_t *node, pm_node_t *statement)
statement->flags |= PM_NODE_FLAG_NEWLINE;
}

/**
* Allocate a new StringConcatNode node.
*/
static pm_string_concat_node_t *
pm_string_concat_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right) {
pm_string_concat_node_t *node = PM_ALLOC_NODE(parser, pm_string_concat_node_t);

*node = (pm_string_concat_node_t) {
{
.type = PM_STRING_CONCAT_NODE,
.location = {
.start = left->location.start,
.end = right->location.end
}
},
.left = left,
.right = right
};

return node;
}

/**
* Allocate a new StringNode node with the current string on the parser.
*/
Expand Down Expand Up @@ -13470,9 +13448,10 @@ parse_strings_empty_content(const uint8_t *location) {
* Parse a set of strings that could be concatenated together.
*/
static inline pm_node_t *
parse_strings(pm_parser_t *parser) {
parse_strings(pm_parser_t *parser, pm_node_t *current) {
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
pm_node_t *result = NULL;

bool concating = false;
bool state_is_arg_labeled = lex_state_p(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);

while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
Expand Down Expand Up @@ -13608,7 +13587,7 @@ parse_strings(pm_parser_t *parser) {
}
}

if (result == NULL) {
if (current == NULL) {
// If the node we just parsed is a symbol node, then we can't
// concatenate it with anything else, so we can now return that
// node.
Expand All @@ -13618,7 +13597,7 @@ parse_strings(pm_parser_t *parser) {

// If we don't already have a node, then it's fine and we can just
// set the result to be the node we just parsed.
result = node;
current = node;
} else {
// Otherwise we need to check the type of the node we just parsed.
// If it cannot be concatenated with the previous node, then we'll
Expand All @@ -13627,13 +13606,22 @@ parse_strings(pm_parser_t *parser) {
pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
}

// Either way we will create a concat node to hold the strings
// together.
result = (pm_node_t *) pm_string_concat_node_create(parser, result, node);
// If we haven't already created our container for concatenation,
// we'll do that now.
if (!concating) {
concating = true;
pm_token_t bounds = not_provided(parser);

pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
pm_interpolated_string_node_append(container, current);
current = (pm_node_t *) container;
}

pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
}
}

return result;
return current;
}

/**
Expand Down Expand Up @@ -13894,8 +13882,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
// Characters can be followed by strings in which case they are
// automatically concatenated.
if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
pm_node_t *concat = parse_strings(parser);
return (pm_node_t *) pm_string_concat_node_create(parser, node, concat);
return parse_strings(parser, node);
}

return node;
Expand Down Expand Up @@ -14169,8 +14156,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
}

if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
pm_node_t *concat = parse_strings(parser);
return (pm_node_t *) pm_string_concat_node_create(parser, node, concat);
return parse_strings(parser, node);
}

return node;
Expand Down Expand Up @@ -15773,7 +15759,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
return (pm_node_t *) node;
}
case PM_TOKEN_STRING_BEGIN:
return parse_strings(parser);
return parse_strings(parser, NULL);
case PM_TOKEN_SYMBOL_BEGIN: {
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser);
Expand Down
5 changes: 1 addition & 4 deletions test/prism/location_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,7 @@ def test_InterpolatedRegularExpressionNode
def test_InterpolatedStringNode
assert_location(InterpolatedStringNode, "\"foo \#@bar baz\"")
assert_location(InterpolatedStringNode, "<<~A\nhello \#{1} world\nA", 0...4)
assert_location(InterpolatedStringNode, '"foo" "bar"')
end

def test_InterpolatedSymbolNode
Expand Down Expand Up @@ -789,10 +790,6 @@ def test_StatementsNode
assert_location(StatementsNode, "\"\#{foo}\"", 3...6) { |node| node.parts.first.statements }
end

def test_StringConcatNode
assert_location(StringConcatNode, '"foo" "bar"')
end

def test_StringNode
assert_location(StringNode, '"foo"')
assert_location(StringNode, '%q[foo]')
Expand Down
31 changes: 16 additions & 15 deletions test/prism/snapshots/dos_endings.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

59 changes: 30 additions & 29 deletions test/prism/snapshots/seattlerb/parse_line_evstr_after_break.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 16 additions & 15 deletions test/prism/snapshots/seattlerb/str_lit_concat_bad_encodings.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 16 additions & 15 deletions test/prism/snapshots/strings.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 16 additions & 15 deletions test/prism/snapshots/unparser/corpus/literal/literal.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1e7ae3a

Please sign in to comment.