diff --git a/bin/parse b/bin/parse index 4b90e0ec7e5..10b9e868f6a 100755 --- a/bin/parse +++ b/bin/parse @@ -9,7 +9,10 @@ $:.unshift(File.expand_path("../lib", __dir__)) require "yarp" if ARGV[0] == "-e" - pp YARP.parse(ARGV[1]) + result = YARP.parse(ARGV[1]) else - pp YARP.parse_file(ARGV[0]) + result = YARP.parse_file(ARGV[0]) end + +result.mark_newlines if ENV['MARK_NEWLINES'] +pp result diff --git a/config.yml b/config.yml index 0997663b7f5..c2504130c0e 100644 --- a/config.yml +++ b/config.yml @@ -485,6 +485,7 @@ nodes: kind: EnsureNode - name: end_keyword_loc type: location? + newline: false comment: | Represents a begin statement. @@ -940,7 +941,6 @@ nodes: kind: StatementsNode - name: end_keyword_loc type: location? - newline: false comment: | Represents an `else` clause in a `case`, `if`, or `unless` statement. @@ -1179,7 +1179,7 @@ nodes: type: node? - name: end_keyword_loc type: location? - newline: false + newline: predicate comment: | Represents the use of the `if` keyword, either in the block form or the modifier form. @@ -1289,7 +1289,7 @@ nodes: type: location - name: flags type: uint32 - newline: false + newline: parts comment: | Represents a regular expression literal that contains interpolation. @@ -1303,7 +1303,7 @@ nodes: type: node[] - name: closing_loc type: location? - newline: false + newline: parts comment: | Represents a string literal that contains interpolation. @@ -1317,7 +1317,7 @@ nodes: type: node[] - name: closing_loc type: location? - newline: false + newline: parts comment: | Represents a symbol literal that contains interpolation. @@ -1331,7 +1331,7 @@ nodes: type: node[] - name: closing_loc type: location - newline: false + newline: parts comment: | Represents an xstring literal that contains interpolation. @@ -1701,7 +1701,6 @@ nodes: - name: statements type: node kind: StatementsNode - newline: false comment: The top level node of any parse tree. - name: RangeNode child_nodes: @@ -1785,6 +1784,7 @@ nodes: type: location - name: rescue_expression type: node + newline: expression comment: | Represents an expression modified with a rescue. @@ -1909,7 +1909,6 @@ nodes: child_nodes: - name: body type: node[] - newline: false comment: | Represents a set of statements contained within some scope. @@ -2019,7 +2018,7 @@ nodes: kind: ElseNode - name: end_keyword_loc type: location? - newline: false + newline: predicate comment: | Represents the use of the `unless` keyword, either in the block form or the modifier form. @@ -2037,6 +2036,7 @@ nodes: - name: statements type: node? kind: StatementsNode + newline: predicate comment: | Represents the use of the `until` keyword, either in the block form or the modifier form. @@ -2068,6 +2068,7 @@ nodes: - name: statements type: node? kind: StatementsNode + newline: predicate comment: | Represents the use of the `while` keyword, either in the block form or the modifier form. diff --git a/java/org/yarp/MarkNewlinesVisitor.java b/java/org/yarp/MarkNewlinesVisitor.java new file mode 100644 index 00000000000..d1d38ea96dd --- /dev/null +++ b/java/org/yarp/MarkNewlinesVisitor.java @@ -0,0 +1,62 @@ +package org.yarp; + +// Keep in sync with Ruby MarkNewlinesVisitor +final class MarkNewlinesVisitor extends AbstractNodeVisitor { + + private final Nodes.Source source; + private boolean[] newlineMarked; + + MarkNewlinesVisitor(Nodes.Source source, boolean[] newlineMarked) { + this.source = source; + this.newlineMarked = newlineMarked; + } + + @Override + public Void visitBlockNode(Nodes.BlockNode node) { + boolean[] oldNewlineMarked = this.newlineMarked; + this.newlineMarked = new boolean[oldNewlineMarked.length]; + try { + return super.visitBlockNode(node); + } finally { + this.newlineMarked = oldNewlineMarked; + } + } + + @Override + public Void visitLambdaNode(Nodes.LambdaNode node) { + boolean[] oldNewlineMarked = this.newlineMarked; + this.newlineMarked = new boolean[oldNewlineMarked.length]; + try { + return super.visitLambdaNode(node); + } finally { + this.newlineMarked = oldNewlineMarked; + } + } + + @Override + public Void visitIfNode(Nodes.IfNode node) { + node.setNewLineFlag(this.source, this.newlineMarked); + return super.visitIfNode(node); + } + + @Override + public Void visitUnlessNode(Nodes.UnlessNode node) { + node.setNewLineFlag(this.source, this.newlineMarked); + return super.visitUnlessNode(node); + } + + @Override + public Void visitStatementsNode(Nodes.StatementsNode node) { + for (Nodes.Node child : node.body) { + child.setNewLineFlag(this.source, this.newlineMarked); + } + return super.visitStatementsNode(node); + } + + @Override + protected Void defaultVisit(Nodes.Node node) { + node.visitChildNodes(this); + return null; + } + +} diff --git a/lib/yarp.rb b/lib/yarp.rb index 8730ef6f0f3..c1c9dd70f8d 100644 --- a/lib/yarp.rb +++ b/lib/yarp.rb @@ -141,6 +141,27 @@ def deconstruct_keys(keys) end end + # A class that knows how to walk down the tree. None of the individual visit + # methods are implemented on this visitor, so it forces the consumer to + # implement each one that they need. For a default implementation that + # continues walking the tree, see the Visitor class. + class BasicVisitor + def visit(node) + node&.accept(self) + end + + def visit_all(nodes) + nodes.map { |node| visit(node) } + end + + def visit_child_nodes(node) + visit_all(node.child_nodes) + end + end + + class Visitor < BasicVisitor + end + # This represents the result of a call to ::parse or ::parse_file. It contains # the AST, any comments that were encounters, and any errors that were # encountered. @@ -166,6 +187,45 @@ def success? def failure? !success? end + + # Keep in sync with Java MarkNewlinesVisitor + class MarkNewlinesVisitor < YARP::Visitor + def initialize(newline_marked) + @newline_marked = newline_marked + end + + def visit_block_node(node) + old_newline_marked = @newline_marked + @newline_marked = Array.new(old_newline_marked.size, false) + begin + super(node) + ensure + @newline_marked = old_newline_marked + end + end + alias_method :visit_lambda_node, :visit_block_node + + def visit_if_node(node) + node.set_newline_flag(@newline_marked) + super(node) + end + alias_method :visit_unless_node, :visit_if_node + + def visit_statements_node(node) + node.body.each do |child| + child.set_newline_flag(@newline_marked) + end + super(node) + end + end + private_constant :MarkNewlinesVisitor + + def mark_newlines + newline_marked = Array.new(1 + @source.offsets.size, false) + visitor = MarkNewlinesVisitor.new(newline_marked) + value.accept(visitor) + value + end end # This represents a token from the Ruby source. @@ -207,10 +267,23 @@ def ==(other) class Node attr_reader :location + def newline? + @newline ? true : false + end + + def set_newline_flag(newline_marked) + line = location.start_line + unless newline_marked[line] + newline_marked[line] = true + @newline = true + end + end + def pretty_print(q) q.group do q.text(self.class.name.split("::").last) location.pretty_print(q) + q.text("[Li:#{location.start_line}]") if newline? q.text("(") q.nest(2) do deconstructed = deconstruct_keys([]) diff --git a/tasks/check_manifest.rake b/tasks/check_manifest.rake index b0d3c19f833..fe3fbe89553 100644 --- a/tasks/check_manifest.rake +++ b/tasks/check_manifest.rake @@ -11,6 +11,7 @@ task :check_manifest => [:templates, "configure"] do bin build fuzz + java pkg tasks templates @@ -34,10 +35,6 @@ task :check_manifest => [:templates, "configure"] do config.status configure.ac include/yarp/config.h - java/org/yarp/AbstractNodeVisitor.java - java/org/yarp/Loader.java - java/org/yarp/Nodes.java - java/org/yarp/Parser.java lib/yarp.{jar,so,bundle} ] diff --git a/templates/java/org/yarp/Loader.java.erb b/templates/java/org/yarp/Loader.java.erb index 66fc9419602..e5270635dc1 100644 --- a/templates/java/org/yarp/Loader.java.erb +++ b/templates/java/org/yarp/Loader.java.erb @@ -43,12 +43,10 @@ public class Loader { private final ByteBuffer buffer; private ConstantPool constantPool; private final Nodes.Source source; - private final boolean[] newlineMarked; private Loader(byte[] serialized, Nodes.Source source) { this.buffer = ByteBuffer.wrap(serialized).order(ByteOrder.nativeOrder()); this.source = source; - this.newlineMarked = new boolean[1 + source.getLineCount()]; } private Nodes.Node load() { @@ -78,7 +76,9 @@ public class Loader { throw new Error("Expected to consume all bytes while deserializing but there were " + left + " bytes left"); } - node.setNewLineFlag(this.source, newlineMarked); + boolean[] newlineMarked = new boolean[1 + source.getLineCount()]; + MarkNewlinesVisitor visitor = new MarkNewlinesVisitor(source, newlineMarked); + node.accept(visitor); return node; } diff --git a/templates/java/org/yarp/Nodes.java.erb b/templates/java/org/yarp/Nodes.java.erb index 0625a1a77ef..37196af09a7 100644 --- a/templates/java/org/yarp/Nodes.java.erb +++ b/templates/java/org/yarp/Nodes.java.erb @@ -73,10 +73,6 @@ public abstract class Nodes { this.length = length; } - public int length() { - return length; - } - public int endOffset() { return startOffset + length; } @@ -123,15 +119,11 @@ public abstract class Nodes { this.length = length; } - public int length() { - return length; - } - - public int endOffset() { + public final int endOffset() { return startOffset + length; } - public boolean hasNewLineFlag() { + public final boolean hasNewLineFlag() { return newLineFlag; } @@ -145,6 +137,8 @@ public abstract class Nodes { public abstract T accept(AbstractNodeVisitor visitor); + public abstract void visitChildNodes(AbstractNodeVisitor visitor); + public abstract Node[] childNodes(); @Override @@ -194,43 +188,59 @@ public abstract class Nodes { <%- end -%> } - <%- unless node.newline -%> + <%- if node.newline == false -%> + @Override + public void setNewLineFlag(Source source, boolean[] newlineMarked) { + // Never mark <%= node.name %> with a newline flag, mark children instead + } + <%- elsif node.newline.is_a?(String) -%> + @Override public void setNewLineFlag(Source source, boolean[] newlineMarked) { + <%- param = node.params.find { |p| p.name == node.newline } or raise node.newline -%> + <%- case param -%> + <%- in SingleNodeParam -%> + this.<%= param.name %>.setNewLineFlag(source, newlineMarked); + <%- in NodeListParam -%> + Node first = this.<%= param.name %>.length > 0 ? this.<%= param.name %>[0] : null; + if (first != null) { + first.setNewLineFlag(source, newlineMarked); + } + <%- end -%> + } + <%- end -%> + + public void visitChildNodes(AbstractNodeVisitor visitor) { <%- node.params.each do |param| -%> <%- case param -%> <%- when NodeListParam -%> - for (Nodes.Node node : this.<%= param.name %>) { - node.setNewLineFlag(source, newlineMarked); + for (Nodes.Node child : this.<%= param.name %>) { + child.accept(visitor); } <%- when NodeParam -%> - this.<%= param.name %>.setNewLineFlag(source, newlineMarked); + this.<%= param.name %>.accept(visitor); <%- when OptionalNodeParam -%> if (this.<%= param.name %> != null) { - this.<%= param.name %>.setNewLineFlag(source, newlineMarked); + this.<%= param.name %>.accept(visitor); } - <%- when -> _ { !param.java_type.include?("Node") } -%> - <%- else -%> - <%- raise "Param type #{param.class} not handled in #{__FILE__ }" -%> <%- end -%> <%- end -%> } - <%- end -%> public Node[] childNodes() { <%- if node.params.none?(NodeListParam) and node.params.none?(SingleNodeParam) -%> return EMPTY_ARRAY; <%- elsif node.params.one?(NodeListParam) and node.params.none?(SingleNodeParam) -%> - return <%= node.params.grep(NodeListParam).first.name %>; + return this.<%= node.params.grep(NodeListParam).first.name %>; <%- elsif node.params.none?(NodeListParam) -%> - return new Node[] { <%= node.params.grep(SingleNodeParam).map(&:name).join(', ') %> }; + return new Node[] { <%= node.params.grep(SingleNodeParam).map { "this.#{_1.name}" }.join(', ') %> }; <%- else -%> ArrayList childNodes = new ArrayList<>(); <%- node.params.each do |param| -%> <%- case param -%> <%- when SingleNodeParam -%> - childNodes.add(<%= param.name %>); + childNodes.add(this.<%= param.name %>); <%- when NodeListParam -%> - childNodes.addAll(Arrays.asList(<%= param.name %>)); + childNodes.addAll(Arrays.asList(this.<%= param.name %>)); <%- end -%> <%- end -%> return childNodes.toArray(EMPTY_ARRAY); diff --git a/templates/lib/yarp/node.rb.erb b/templates/lib/yarp/node.rb.erb index a0d05bba63d..6b8c025be9c 100644 --- a/templates/lib/yarp/node.rb.erb +++ b/templates/lib/yarp/node.rb.erb @@ -19,6 +19,23 @@ module YARP visitor.visit_<%= node.human %>(self) end + <%- if node.newline == false -%> + def set_newline_flag(newline_marked) + # Never mark <%= node.name %> with a newline flag, mark children instead + end + <%- elsif node.newline.is_a?(String) -%> + def set_newline_flag(newline_marked) + <%- param = node.params.find { |p| p.name == node.newline } or raise node.newline -%> + <%- case param -%> + <%- in SingleNodeParam -%> + <%= param.name %>.set_newline_flag(newline_marked) + <%- in NodeListParam -%> + first = <%= param.name %>.first + first.set_newline_flag(newline_marked) if first + <%- end -%> + end + <%- end -%> + # def child_nodes: () -> Array[nil | Node] def child_nodes [<%= node.params.filter_map { |param| @@ -69,24 +86,6 @@ module YARP end <%- end -%> - # A class that knows how to walk down the tree. None of the individual visit - # methods are implemented on this visitor, so it forces the consumer to - # implement each one that they need. For a default implementation that - # continues walking the tree, see the Visitor class. - class BasicVisitor - def visit(node) - node&.accept(self) - end - - def visit_all(nodes) - nodes.map { |node| visit(node) } - end - - def visit_child_nodes(node) - visit_all(node.child_nodes) - end - end - class Visitor < BasicVisitor <%- nodes.each do |node| -%> # Visit a <%= node.name %> node diff --git a/test/parse_test.rb b/test/parse_test.rb index d0cdfe2c267..792cdbd6f68 100644 --- a/test/parse_test.rb +++ b/test/parse_test.rb @@ -2,6 +2,9 @@ require "yarp_test_helper" +# It is useful to have a diff even if the strings to compare are big +Test::Unit::Assertions::AssertionMessage.max_diff_target_string_size = 5000 + class ParseTest < Test::Unit::TestCase # When we pretty-print the trees to compare against the snapshots, we want to # be certain that we print with the same external encoding. This is because @@ -57,6 +60,71 @@ def test_parse_takes_file_path assert_equal filepath, find_source_file_node(parsed_result.value).filepath end + root = File.dirname(__dir__) + # We need valid Ruby files for this test and no "void expression" + # as that would count as a line in YARP but not with RubyVM::InstructionSequence + Dir["{lib,test}/**/*.rb", base: root].each do |relative| + filepath = File.join(root, relative) + + define_method "test_newline_flags_#{relative}" do + # puts relative + + source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8) + verbose, $VERBOSE = $VERBOSE, nil + begin + insns = RubyVM::InstructionSequence.compile(source) + ensure + $VERBOSE = verbose + end + + queue = [insns] + cruby_lines = [] + while iseq = queue.shift + iseq.trace_points.each do |line, event| + cruby_lines << line if event == :line + end + iseq.each_child do |insn| + queue << insn unless insn.label.start_with?('ensure in ') + end + end + cruby_lines.sort! + + result = YARP.parse(source, relative) + assert_empty result.errors + + result.mark_newlines + ast = result.value + yarp_lines = [] + visitor = Class.new(YARP::Visitor) do + define_method(:visit) do |node| + if node and node.newline? + yarp_lines << result.source.line(node.location.start_offset) + end + super(node) + end + end + ast.accept(visitor.new) + + if relative == 'lib/yarp/serialize.rb' + # while (b = io.getbyte) >= 128 has 2 newline flags + cruby_lines.delete_at yarp_lines.index(62) + elsif relative == 'lib/yarp/lex_compat.rb' + # extra flag for: dedent_next =\n ((token.event: due to bytecode order + yarp_lines.delete(514) + # different line for: token =\n case event: due to bytecode order + yarp_lines.delete(571) + cruby_lines.delete(572) + # extra flag for: lex_state =\n if RIPPER: due to bytecode order + yarp_lines.delete(604) + # extra flag for: (token[2].start_with?("\#$") || token[2].start_with?("\#@")) + # unclear when ParenthesesNode should allow a second flag on the same line or not + yarp_lines.delete(731) + end + + assert_equal cruby_lines, yarp_lines + end + end + base = File.join(__dir__, "fixtures") Dir["**/*.txt", base: base].each do |relative| next if known_failures.include?(relative)