diff --git a/bin/console b/bin/console deleted file mode 100755 index b95ce8ee36b..00000000000 --- a/bin/console +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -$:.unshift(File.expand_path("../lib", __dir__)) -require "prism" - -require "irb" -IRB.start(__FILE__) diff --git a/bin/dot b/bin/dot deleted file mode 100755 index 548429166e0..00000000000 --- a/bin/dot +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -$:.unshift(File.expand_path("../lib", __dir__)) -require "prism" - -index = 0 -result = - if ARGV[index] == "-e" - Prism.parse(ARGV[index += 1]) - else - Prism.parse_file(ARGV[index] || "test.rb") - end - -node = result.value -path = - if ARGV[index += 1]&.match?(/\A[\w\.]+\z/) - ARGV[index].split(".") - else - %w[statements body first] - end - -path.each do |field| - node = node.public_send(field) -end - -File.write( - "out.svg", - IO.popen("dot -Tsvg", "w+") do |file| - file.write(node.to_dot) - file.close_write - file.read - end -) diff --git a/bin/encodings b/bin/encodings deleted file mode 100755 index 6a99f6d7d17..00000000000 --- a/bin/encodings +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -def table(encoding) - puts "//#{(0...16).map { |value| value.to_s(16).upcase }.join(" ")}" - - (0...256).each_slice(16).with_index do |slice, row_index| - row = - slice.map do |codepoint| - character = codepoint.chr(encoding) - - value = 0 - value |= (1 << 0) if character.match?(/[[:alpha:]]/) - value |= (1 << 1) if character.match?(/[[:alnum:]]/) - value |= (1 << 2) if character.match?(/[[:upper:]]/) - - "%d," % value - end - - puts "#{row.join(" ")} // #{row_index.to_s(16).upcase}x" - end -end - -def lists(name, range, encoding) - range = range.map { begin; _1.chr(encoding); _1; rescue RangeError; nil; end }.compact - - { alpha: /[[:alpha:]]/, alnum: /[[:alnum:]]/, isupper: /[[:upper:]]/ }.map do |kind, regex| - codepoints = range.select { _1.chr(encoding).match?(regex) } - - previous = nil - groups = - codepoints.slice_before do |codepoint| - (!previous.nil? && (codepoint - previous) != 1).tap { previous = codepoint } - end - - matched = - groups.flat_map do |group| - ["0x#{group.first.to_s(16).upcase}", "0x#{group.last.to_s(16).upcase}"] - end - - puts "\n#define #{name.upcase}_#{kind.upcase}_CODEPOINTS_LENGTH #{matched.length}" - puts "#{name}_codepoint_t #{name}_#{kind}_codepoints[#{name.upcase}_#{kind.upcase}_CODEPOINTS_LENGTH] = {" - matched.each_slice(2) { |slice| puts " #{slice.join(", ")}," } - puts "};" - end -end - -case ARGV[0].downcase -when "ascii" then table(Encoding::ASCII_8BIT) -when "ascii-8bit" then table(Encoding::ASCII_8BIT) -when "big5" then lists("big5", 0...0x10000, Encoding::Big5) -when "cp850" then table(Encoding::CP850) -when "cp852" then table(Encoding::CP852) -when "cp855" then table(Encoding::CP855) -when "euc-jp" then lists("euc-jp", 0...0x10000, Encoding::EUC_JP) -when "gb1988" then table(Encoding::GB1988) -when "gbk" then lists("gbk", 0...0x10000, Encoding::GBK) -when "ibm437" then table(Encoding::IBM437) -when "ibm720" then table(Encoding::IBM720) -when "ibm737" then table(Encoding::IBM737) -when "ibm775" then table(Encoding::IBM775) -when "ibm852" then table(Encoding::IBM852) -when "ibm855" then table(Encoding::IBM855) -when "ibm857" then table(Encoding::IBM857) -when "ibm860" then table(Encoding::IBM860) -when "ibm863" then table(Encoding::IBM863) -when "ibm861" then table(Encoding::IBM861) -when "ibm862" then table(Encoding::IBM862) -when "ibm864" then table(Encoding::IBM864) -when "ibm865" then table(Encoding::IBM865) -when "ibm866" then table(Encoding::IBM866) -when "ibm869" then table(Encoding::IBM869) -when "iso-8859-1" then table(Encoding::ISO8859_1) -when "iso-8859-2" then table(Encoding::ISO8859_2) -when "iso-8859-3" then table(Encoding::ISO8859_3) -when "iso-8859-4" then table(Encoding::ISO8859_4) -when "iso-8859-5" then table(Encoding::ISO8859_5) -when "iso-8859-6" then table(Encoding::ISO8859_6) -when "iso-8859-7" then table(Encoding::ISO8859_7) -when "iso-8859-8" then table(Encoding::ISO8859_8) -when "iso-8859-9" then table(Encoding::ISO8859_9) -when "iso-8859-10" then table(Encoding::ISO8859_10) -when "iso-8859-11" then table(Encoding::ISO8859_11) -when "iso-8859-13" then table(Encoding::ISO8859_13) -when "iso-8859-14" then table(Encoding::ISO8859_14) -when "iso-8859-15" then table(Encoding::ISO8859_15) -when "iso-8859-16" then table(Encoding::ISO8859_16) -when "koi8-r" then table(Encoding::KOI8_R) -when "koi8-u" then table(Encoding::KOI8_U) -when "maccenteuro" then table(Encoding::MACCENTEURO) -when "maccroatian" then table(Encoding::MACCROATIAN) -when "maccyrillic" then table(Encoding::MACCYRILLIC) -when "macgreek" then table(Encoding::MACGREEK) -when "maciceland" then table(Encoding::MACICELAND) -when "macjapanese" then lists("mac_japanese", 0...0x10000, Encoding::MACJAPANESE) -when "macroman" then table(Encoding::MACROMAN) -when "macromania" then table(Encoding::MACROMANIA) -when "macthai" then table(Encoding::MACTHAI) -when "macturkish" then table(Encoding::MACTURKISH) -when "macukraine" then table(Encoding::MACUKRAINE) -when "shift_jis" then lists("shift_jis", 0...0x10000, Encoding::Shift_JIS) -when "tis-620" then table(Encoding::TIS_620) -when "utf8" then table(Encoding::UTF_8) and lists("utf8", 0x100...0x110000, Encoding::UTF_8) -when "utf8-mac" then table(Encoding::UTF8_MAC) and lists("utf8-mac", 0x100...0x110000, Encoding::UTF8_MAC) -when "windows-1250" then table(Encoding::Windows_1250) -when "windows-1251" then table(Encoding::Windows_1251) -when "windows-1252" then table(Encoding::Windows_1252) -when "windows-1253" then table(Encoding::Windows_1253) -when "windows-1254" then table(Encoding::Windows_1254) -when "windows-1255" then table(Encoding::Windows_1255) -when "windows-1256" then table(Encoding::Windows_1256) -when "windows-1257" then table(Encoding::Windows_1257) -when "windows-1258" then table(Encoding::Windows_1258) -when "windows-31j" then lists("windows-31j", 0...0x10000, Encoding::Windows_31J) -when "windows-874" then table(Encoding::Windows_874) -else raise "Unknown encoding `#{ARGV[0]}'" -end diff --git a/bin/insns b/bin/insns deleted file mode 100755 index 98b3f3f3905..00000000000 --- a/bin/insns +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -exec ruby --dump=insns "$@" diff --git a/bin/lex b/bin/lex index 97fd998f096..9c5f4b33a6e 100755 --- a/bin/lex +++ b/bin/lex @@ -1,61 +1,3 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true +#!/bin/sh -# Usage: -# bin/lex # defaults to test.rb -# bin/lex -# bin/lex -e "" - -$:.unshift(File.expand_path("../lib", __dir__)) -require "ripper" -require "prism" - -if ARGV[0] == "-e" - source = ARGV[1] -else - filepath = ARGV.first || "test.rb" - source = File.read(filepath) -end - -pattern = "%-70s %-70s %-70s" - -ripper = - begin - Prism.lex_ripper(source) - rescue ArgumentError, SyntaxError - # If Ripper raises a syntax error, we want to continue as if it didn't - # return any tokens at all. prism won't raise a syntax error, so it's nicer - # to still be able to see the tokens that prism generated. - [] - end - -prism = Prism.lex_compat(source, filepath: filepath) -prism_new = Prism.lex(source, filepath: filepath) -if prism.errors.any? - puts "Errors lexing:" - prism.errors.map do |error| - print "- [#{error.location.start_line},#{error.location.start_column}-" - print "#{error.location.end_line},#{error.location.end_column}] " - puts "\e[1;31m#{error.message}\e[0m" - end - puts "\n" -end - -puts pattern % ["Ripper lex", "Prism compat lex", "Prism Lex"] -puts pattern % ["-" * 70, "-" * 70, "-" * 70] - -prism_value = prism.value -prism_new_value = prism_new.value - -[prism_value.length, ripper.length, prism_new_value.length].max.times do |index| - left = ripper[index] - right = prism_value[index] - new = prism_new_value[index] - - color = left == right ? "38;5;102" : "1;31" - - if ENV["VERBOSE"] || (left != right) - new_value = [new[0].type, [new[0].location.start_offset, new[0].location.length]] if new - puts "\033[#{color}m#{pattern}\033[0m" % [left.inspect, right.inspect, new_value.inspect] - end -end +exec bin/prism lex "$@" diff --git a/bin/locals b/bin/locals deleted file mode 100755 index ff85cb31ba8..00000000000 --- a/bin/locals +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -$:.unshift(File.expand_path("../lib", __dir__)) -require "prism" - -source = ARGV[0] == "-e" ? ARGV[1] : File.read(ARGV[0] || "test.rb") - -puts "CRuby:" -p Prism.const_get(:Debug).cruby_locals(source) - -puts "Prism:" -p Prism.const_get(:Debug).prism_locals(source) diff --git a/bin/memsize b/bin/memsize deleted file mode 100755 index 33b389bf907..00000000000 --- a/bin/memsize +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "yaml" - -filepath = File.expand_path("../config.yml", __dir__) -results = - YAML.load_file(filepath).fetch("nodes").map do |node| - [ - node["name"], - node.fetch("child_nodes", []).sum do |child_node| - case child_node["type"] - when "uint8" - 1 - when "uint32", "constant" - 4 - when "node", "node?" - 8 - when "location", "location?" - 16 - when "node[]", "string", "token", "token?", "constant[]" - 24 - when "flags" - 0 - else - raise "Unknown type: #{child_node["type"]}" - end - end - ] - end - -results.sort_by(&:last).reverse_each do |name, size| - puts "#{name}: #{size}" -end diff --git a/bin/parse b/bin/parse index b37ff9a9839..0dc5accc352 100755 --- a/bin/parse +++ b/bin/parse @@ -1,39 +1,3 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true +#!/bin/sh -# Usage: -# bin/parse # defaults to test.rb -# bin/parse -# bin/parse -e "" - -$:.unshift(File.expand_path("../lib", __dir__)) -require "prism" - -if ARGV[0] == "-e" - result = Prism.parse(ARGV[1]) -else - result = Prism.parse_file(ARGV[0] || "test.rb") -end - -result.mark_newlines! if ENV["MARK_NEWLINES"] - -value = result.value -value = value.accept(Prism::DesugarCompiler.new) if ENV["DESUGAR"] - -parts = {} -parts["Comments"] = result.comments if result.comments.any? -parts["Magic comments"] = result.magic_comments if result.magic_comments.any? -parts["Warnings"] = result.warnings if result.warnings.any? -parts["Errors"] = result.errors if result.errors.any? -parts["DATA"] = result.data_loc if result.data_loc - -if parts.empty? - puts value.inspect -else - parts["AST"] = value - parts.each_with_index do |(key, value), index| - puts if index > 0 - puts "#{key}:" - pp value - end -end +exec bin/prism parse "$@" diff --git a/bin/parser b/bin/parser deleted file mode 100755 index cef8a670971..00000000000 --- a/bin/parser +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "bundler/setup" -require "prism" -require "parser/current" - -source = (ARGV[0] == "-e") ? ARGV[1] : File.read(ARGV[0] || "test.rb") -prism = Prism::Translation::Parser.parse(source) -parser = Parser::CurrentRuby.parse(source) - -puts "Prism:" -pp prism - -puts "Parser:" -pp parser diff --git a/bin/prism b/bin/prism new file mode 100755 index 00000000000..834dc730312 --- /dev/null +++ b/bin/prism @@ -0,0 +1,306 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +$:.unshift(File.expand_path("../lib", __dir__)) +require "prism" + +module Prism + class CLI + def run(argv) + case argv.shift + when "console" then console + when "dot" then dot(argv) + when "encoding" then encoding(argv) + when "lex" then lex(argv) + when "locals" then locals(argv) + when "memsize" then memsize + when "parse" then parse(argv) + when "parser" then parser(argv) + else + puts <<~TXT + Usage: + bin/prism console + bin/prism dot [source] + bin/prism encoding [encoding] + bin/prism lex [source] + bin/prism locals [source] + bin/prism memsize + bin/prism parse [source] + bin/prism parser [source] + TXT + end + end + + private + + ############################################################################ + # Commands + ############################################################################ + + # bin/prism console + def console + require "irb" + IRB.start(__FILE__) + end + + # bin/prism dot [source] + def dot(argv) + result = parse_source(argv) + + node = result.value + path = + if argv.first&.match?(/\A[\w\.]+\z/) + argv.first.split(".") + else + %w[statements body first] + end + + path.each do |field| + node = node.public_send(field) + end + + File.write( + "out.svg", + IO.popen("dot -Tsvg", "w+") do |file| + file.write(node.to_dot) + file.close_write + file.read + end + ) + end + + # bin/prism encoding [encoding] + def encoding(argv) + found = Encoding.find(argv.shift) + found = Encoding::ASCII_8BIT if found == Encoding::US_ASCII + + if !found.ascii_compatible? + warn("Encoding `#{found.name}' is not ASCII compatible") + exit(1) + end + + lookup_table(found) + unicode_lists(found) if found == Encoding::UTF_8 || found == Encoding::UTF8_MAC + end + + # bin/prism lex [source] + def lex(argv) + source, filepath = read_source(argv) + + ripper_value = + begin + Prism.lex_ripper(source) + rescue ArgumentError, SyntaxError + # If Ripper raises a syntax error, we want to continue as if it didn't + # return any tokens at all. prism won't raise a syntax error, so it's + # nicer to still be able to see the tokens that prism generated. + [] + end + + prism_compat = Prism.lex_compat(source, filepath: filepath) + prism = Prism.lex(source, filepath: filepath) + + if prism_compat.failure? + puts "Errors lexing:" + + prism_compat.errors.map do |error| + print "- [#{error.location.start_line},#{error.location.start_column}-" + print "#{error.location.end_line},#{error.location.end_column}] " + puts "\e[1;31m#{error.message}\e[0m" + end + + puts "\n" + end + + pattern = "%-64s %-64s %-64s" + puts pattern % ["Ripper lex", "Prism compat lex", "Prism lex"] + puts pattern % ["-" * 64, "-" * 64, "-" * 64] + + prism_compat_value = prism_compat.value + prism_value = prism.value + + [ripper_value.length, prism_compat_value.length, prism_value.length].max.times do |index| + parts = [ripper_value[index], prism_compat_value[index], nil] + + unless prism_value[index].nil? + prism_token = prism_value[index][0] + location = prism_token.location + parts[2] = [[location.start_line, location.start_column], prism_token.type, location.slice] + end + + if parts[0] != parts[1] + puts "\033[1;31m#{pattern}\033[0m" % parts.map(&:inspect) + elsif ENV["VERBOSE"] + puts "\033[38;5;102m#{pattern}\033[0m" % parts.map(&:inspect) + end + end + end + + # bin/prism locals [source] + def locals(argv) + source, filepath = read_source(argv) + + puts "CRuby:" + p Debug.cruby_locals(source) + + puts "Prism:" + p Debug.prism_locals(source) + end + + # bin/prism parse [source] + def parse(argv) + result = parse_source(argv) + + parts = {} + parts["Comments"] = result.comments if result.comments.any? + parts["Magic comments"] = result.magic_comments if result.magic_comments.any? + parts["Warnings"] = result.warnings if result.warnings.any? + parts["Errors"] = result.errors if result.errors.any? + parts["DATA"] = result.data_loc if result.data_loc + + if parts.empty? + puts result.value.inspect + else + parts["AST"] = result.value + parts.each_with_index do |(key, value), index| + puts if index > 0 + puts "#{key}:" + pp value + end + end + end + + # bin/prism memsize + def memsize + require "yaml" + + filepath = File.expand_path("../config.yml", __dir__) + results = + YAML.load_file(filepath).fetch("nodes").map do |node| + [ + node["name"], + node.fetch("fields", []).sum do |field| + case field["type"] + when "uint8" then 1 + when "uint32", "constant", "constant?" then 4 + when "node", "node?" then 8 + when "location", "location?" then 16 + when "node[]", "string", "token", "token?", "constant[]" then 24 + when "flags" then 0 + else raise "Unknown type: #{field["type"]}" + end + end + ] + end + + results.sort_by(&:last).reverse_each do |name, size| + puts "#{name}: #{size}" + end + end + + # bin/prism parser [source] + def parser(argv) + require "parser/current" + source, filepath = read_source(argv) + + puts "Parser:" + pp Parser::CurrentRuby.parse(source, filepath) + + puts "Prism:" + pp Translation::Parser.parse(source, filepath) + end + + ############################################################################ + # Helpers + ############################################################################ + + # Generate the list of values that will be used in a lookup table for a + # given encoding. + def lookup_table_values(encoding) + (0...256).each_slice(16).map.with_index do |slice, row_index| + slice.map do |codepoint| + character = codepoint.chr(encoding) + + values = 0 + values |= (1 << 0) if character.match?(/[[:alpha:]]/) + values |= (1 << 1) if character.match?(/[[:alnum:]]/) + values |= (1 << 2) if character.match?(/[[:upper:]]/) + values + rescue RangeError + 0 + end + end + end + + # Generate a lookup table for a given encoding. + def lookup_table(encoding) + encoding_values = lookup_table_values(encoding) + if encoding_values == lookup_table_values(Encoding::US_ASCII) + puts "static const uint8_t pm_encoding_ascii_table[256] = {" + else + puts "static const uint8_t pm_encoding_#{encoding.name.downcase}_table[256] = {" + end + + puts "// #{(0...16).map { |value| value.to_s(16).upcase }.join(" ")}" + encoding_values.each_with_index do |row, row_index| + puts " #{row.join(", ")}, // #{row_index.to_s(16).upcase}x" + end + puts "};" + end + + # Generate lists of unicode codepoints for a given encoding. + def unicode_lists(encoding) + encoding = Encoding::UTF_8 + range = (0x100..0xD7FF).to_a.concat((0xE000..0x10FFFF).to_a) + + { alpha: /[[:alpha:]]/, alnum: /[[:alnum:]]/, isupper: /[[:upper:]]/ }.map do |kind, regex| + codepoints = range.select { |codepoint| codepoint.chr(encoding).match?(regex) } + + previous = nil + groups = + codepoints.slice_before do |codepoint| + (!previous.nil? && (codepoint - previous) != 1).tap { previous = codepoint } + end + + matched = + groups.flat_map do |group| + ["0x#{group.first.to_s(16).upcase}", "0x#{group.last.to_s(16).upcase}"] + end + + puts "\n#define UNICODE_#{kind.upcase}_CODEPOINTS_LENGTH #{matched.length}" + puts "unicode_codepoint_t unicode_#{kind}_codepoints[UNICODE_#{kind.upcase}_CODEPOINTS_LENGTH] = {" + matched.each_slice(2) { |slice| puts " #{slice.join(", ")}," } + puts "};" + end + end + + # Parse the source code indicated by the command-line arguments. + def parse_source(argv) + case argv.first + when "-e" + argv.shift + Prism.parse(argv.shift) + when nil + Prism.parse_file("test.rb") + else + Prism.parse_file(argv.shift) + end + end + + # Get the source code indicated by the command-line arguments. + def read_source(argv) + case argv.first + when "-e" + argv.shift + [argv.shift, "-e"] + when nil + [File.read("test.rb"), "test.rb"] + else + filepath = argv.shift + [File.read(filepath), filepath] + end + end + end +end + +Prism::CLI.new.run(ARGV) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 0cc63cd9448..c11903423d9 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "delegate" +require "ripper" module Prism # This class is responsible for lexing the source using prism and then