diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000000000..062096feee50a1 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,7 @@ +# YJIT sources and tests +yjit* @ruby/yjit +yjit/**/* @ruby/yjit +doc/yjit/* @ruby/yjit +bootstraptest/test_yjit* @ruby/yjit +test/ruby/test_yjit* @ruby/yjit +yjit/src/cruby_bindings.inc.rs diff --git a/.github/actions/launchable/record-test/action.yml b/.github/actions/launchable/record-test/action.yml index e12f1dc9f4cd15..51ad6b086ed9be 100644 --- a/.github/actions/launchable/record-test/action.yml +++ b/.github/actions/launchable/record-test/action.yml @@ -19,6 +19,13 @@ inputs: Test options that determine how tests are run. This value is used in the Launchable flavor. + srcdir: + required: false + default: ${{ github.workspace }} + description: >- + Directory to (re-)checkout source codes. Launchable retrives the commit information + from the directory. + outputs: {} # nothing? runs: @@ -26,7 +33,7 @@ runs: steps: - name: Launchable - record tests - working-directory: ${{ inputs.builddir }} + working-directory: ${{ inputs.srcdir }} shell: bash run: | test_opts="$(echo ${{ inputs.test-opts }} | sed 's/=/:/g' | sed 's/ //g')" diff --git a/.github/actions/launchable/setup/action.yml b/.github/actions/launchable/setup/action.yml index 6931d23ea440c7..1bc0b55d640208 100644 --- a/.github/actions/launchable/setup/action.yml +++ b/.github/actions/launchable/setup/action.yml @@ -14,6 +14,13 @@ inputs: Launchable token is needed if you want to run Launchable on your forked repository. See https://github.com/ruby/ruby/wiki/CI-Servers#launchable-ci for details. + srcdir: + required: false + default: ${{ github.workspace }} + description: >- + Directory to (re-)checkout source codes. Launchable retrives the commit information + from the directory. + outputs: enable-launchable: description: "The boolean value indicating whether Launchable is enabled or not" @@ -66,6 +73,7 @@ runs: - name: Set up Launchable shell: bash + working-directory: ${{ inputs.srcdir }} run: | set -x PATH=$PATH:$(python -msite --user-base)/bin diff --git a/.github/actions/setup/directories/action.yml b/.github/actions/setup/directories/action.yml index f3d7a1f3308396..e90567d2d016bc 100644 --- a/.github/actions/setup/directories/action.yml +++ b/.github/actions/setup/directories/action.yml @@ -44,6 +44,11 @@ inputs: description: >- If set to true, creates dummy files in build dir. + fetch-depth: + required: false + default: '1' + description: The depth of commit history fetched from the remote repository + outputs: {} # nothing? runs: @@ -79,6 +84,7 @@ runs: uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: path: ${{ inputs.srcdir }} + fetch-depth: ${{ inputs.fetch-depth }} - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 with: diff --git a/.github/auto_request_review.yml b/.github/auto_request_review.yml deleted file mode 100644 index 8726df577d39eb..00000000000000 --- a/.github/auto_request_review.yml +++ /dev/null @@ -1,13 +0,0 @@ -files: - 'yjit*': [team:yjit] - 'yjit/**/*': [team:yjit] - 'yjit/src/cruby_bindings.inc.rs': [] - 'doc/yjit/*': [team:yjit] - 'bootstraptest/test_yjit*': [team:yjit] - 'test/ruby/test_yjit*': [team:yjit] -options: - ignore_draft: true - # This currently doesn't work as intended. We want to skip reviews when only - # cruby_bingings.inc.rs is modified, but this skips reviews even when other - # yjit files are modified as well. To be enabled after fixing the behavior. - #last_files_match_only: true diff --git a/.github/workflows/auto_request_review.yml b/.github/workflows/auto_request_review.yml deleted file mode 100644 index ca27244b46547b..00000000000000 --- a/.github/workflows/auto_request_review.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: Auto Request Review -on: - pull_request_target: - types: [opened, ready_for_review, reopened] - -permissions: - contents: read - -jobs: - auto-request-review: - name: Auto Request Review - runs-on: ubuntu-latest - if: ${{ github.repository == 'ruby/ruby' && github.base_ref == 'master' }} - steps: - - name: Request review based on files changes and/or groups the author belongs to - uses: necojackarc/auto-request-review@e89da1a8cd7c8c16d9de9c6e763290b6b0e3d424 # v0.13.0 - with: - # scope: public_repo - token: ${{ secrets.MATZBOT_GITHUB_TOKEN }} diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index f8c5b21e75a0b6..fb2a60975bafa4 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -56,8 +56,6 @@ jobs: with: sparse-checkout-cone-mode: false sparse-checkout: /.github - # Set fetch-depth: 0 so that Launchable can receive commits information. - fetch-depth: 0 - name: Install libraries uses: ./.github/actions/setup/macos @@ -68,6 +66,8 @@ jobs: builddir: build makeup: true dummy-files: ${{ matrix.test_task == 'check' }} + # Set fetch-depth: 0 so that Launchable can receive commits information. + fetch-depth: 10 - name: Run configure run: ../src/configure -C --disable-install-doc @@ -89,6 +89,7 @@ jobs: uses: ./.github/actions/launchable/setup with: launchable-token: ${{ secrets.LAUNCHABLE_TOKEN }} + srcdir: src - name: Set extra test options run: echo "TESTS=$TESTS ${{ matrix.test_opts }}" >> $GITHUB_ENV @@ -118,9 +119,10 @@ jobs: with: # We need to configure the `build` directory because # this composite action is executed in the default working directory. - report-path: build/launchable_reports.json + report-path: ../build/launchable_reports.json os: ${{ matrix.os }} test-opts: ${{ matrix.test_opts }} + srcdir: src if: ${{ always() && steps.enable-launchable.outputs.enable-launchable }} - uses: ./.github/actions/slack diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 80af27bc1ac1b8..2e11cca98ee624 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -69,6 +69,8 @@ jobs: builddir: build makeup: true dummy-files: ${{ matrix.test_task == 'check' }} + # Set fetch-depth: 10 so that Launchable can receive commits information. + fetch-depth: 10 - uses: ruby/setup-ruby@d4526a55538b775af234ba4af27118ed6f8f6677 # v1.172.0 with: @@ -100,6 +102,7 @@ jobs: uses: ./.github/actions/launchable/setup with: launchable-token: ${{ secrets.LAUNCHABLE_TOKEN }} + srcdir: src - name: make ${{ matrix.test_task }} run: >- @@ -126,9 +129,10 @@ jobs: with: # We need to configure the `build` directory because # this composite action is executed in the default working directory. - report-path: build/launchable_reports.json + report-path: ../build/launchable_reports.json os: ubuntu-20.04 test-opts: ${{ matrix.configure }} + srcdir: src if: ${{ always() && steps.enable-launchable.outputs.enable-launchable }} - uses: ./.github/actions/slack diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index 1d98b92d3a425a..90f1c0b5f16bb5 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -4658,3 +4658,19 @@ def test_cfunc_vargs_splat(sub_instance, array_class, empty_kw_hash) test_cfunc_vargs_splat(Foo.new, Array, Hash.ruby2_keywords_hash({})) } + +# Class#new (arity=-1), splat, and ruby2_keywords +assert_equal '[0, {1=>1}]', %q{ + class KwInit + attr_reader :init_args + def initialize(x = 0, **kw) + @init_args = [x, kw] + end + end + + def test(klass, args) + klass.new(*args).init_args + end + + test(KwInit, [Hash.ruby2_keywords_hash({1 => 1})]) +} diff --git a/configure.ac b/configure.ac index b39f6ca136da1a..946b5775ad3aaa 100644 --- a/configure.ac +++ b/configure.ac @@ -1267,7 +1267,7 @@ main() [wasi*],[ LIBS="-lm -lwasi-emulated-mman -lwasi-emulated-signal -lwasi-emulated-getpid -lwasi-emulated-process-clocks $LIBS" RUBY_APPEND_OPTIONS(CFLAGS, -D_WASI_EMULATED_SIGNAL -D_WASI_EMULATED_MMAN -D_WASI_EMULATED_GETPID -D_WASI_EMULATED_PROCESS_CLOCKS) RUBY_APPEND_OPTIONS(CPPFLAGS, -D_WASI_EMULATED_SIGNAL -D_WASI_EMULATED_MMAN -D_WASI_EMULATED_GETPID -D_WASI_EMULATED_PROCESS_CLOCKS) - POSTLINK="\$(WASMOPT) --asyncify \$(wasmoptflags) --pass-arg=asyncify-ignore-imports -o \$@ \$@${POSTLINK:+; $POSTLINK}" + POSTLINK="\$(WASMOPT) --asyncify \$(wasmoptflags) -o \$@ \$@${POSTLINK:+; $POSTLINK}" # wasi-libc's sys/socket.h is not compatible with -std=gnu99, # so re-declare shutdown in include/ruby/missing.h ac_cv_func_shutdown=no diff --git a/gc.c b/gc.c index c8486313a87e03..5c4180c715d474 100644 --- a/gc.c +++ b/gc.c @@ -5293,7 +5293,7 @@ should_be_finalizable(VALUE obj) rb_check_frozen(obj); } -VALUE +static VALUE rb_define_finalizer_no_check(VALUE obj, VALUE block) { rb_objspace_t *objspace = &rb_objspace; @@ -5439,10 +5439,10 @@ rb_gc_copy_finalizer(VALUE dest, VALUE obj) WITH_OBJSPACE_OF_VALUE_ENTER(dest, objspace); { st_insert(finalizer_table, dest, table); + FL_SET(dest, FL_FINALIZE); } WITH_OBJSPACE_OF_VALUE_LEAVE(objspace); } - FL_SET(dest, FL_FINALIZE); } static VALUE @@ -5512,6 +5512,7 @@ run_final(rb_objspace_t *objspace, VALUE zombie) st_data_t key = (st_data_t)zombie; if (FL_TEST_RAW(zombie, FL_FINALIZE)) { + FL_UNSET(zombie, FL_FINALIZE); st_data_t table; if (st_delete(finalizer_table, &key, &table)) { run_finalizer(objspace, zombie, (VALUE)table); @@ -5701,11 +5702,12 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace) st_foreach(finalizer_table, force_chain_object, (st_data_t)&list); while (list) { struct force_finalize_list *curr = list; - run_finalizer(objspace, curr->obj, curr->table); - FL_UNSET(curr->obj, FL_FINALIZE); st_data_t obj = (st_data_t)curr->obj; st_delete(finalizer_table, &obj, 0); + FL_UNSET(curr->obj, FL_FINALIZE); + + run_finalizer(objspace, curr->obj, curr->table); list = curr->next; xfree(curr); @@ -6651,7 +6653,7 @@ static VALUE count_objects(int argc, VALUE *argv, VALUE os) { rb_objspace_t *objspace = &rb_objspace; - size_t counts[T_MASK+1]; + size_t counts[T_MASK + 1] = { 0 }; size_t freed = 0; size_t total = 0; size_t i; @@ -6663,10 +6665,6 @@ count_objects(int argc, VALUE *argv, VALUE os) rb_raise(rb_eTypeError, "non-hash given"); } - for (i = 0; i <= T_MASK; i++) { - counts[i] = 0; - } - rb_ractor_t *r = GET_RACTOR(); for (i = 0; i < heap_allocated_pages; i++) { struct heap_page *page = heap_pages_sorted[i]; diff --git a/gems/bundled_gems b/gems/bundled_gems index 44de8dfd844b8d..8a1cf4106e635f 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -17,7 +17,7 @@ net-pop 0.1.2 https://github.com/ruby/net-pop net-smtp 0.4.0.1 https://github.com/ruby/net-smtp matrix 0.4.2 https://github.com/ruby/matrix prime 0.1.2 https://github.com/ruby/prime -rbs 3.4.4 https://github.com/ruby/rbs +rbs 3.4.4 https://github.com/ruby/rbs 56ae86bb5f4864f5057778bd45b280248b012329 typeprof 0.21.11 https://github.com/ruby/typeprof debug 1.9.1 https://github.com/ruby/debug 2d602636d99114d55a32fedd652c9c704446a749 racc 1.7.3 https://github.com/ruby/racc diff --git a/internal/gc.h b/internal/gc.h index 639d3d6c3a1b4a..9b2d568127e522 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -290,7 +290,6 @@ size_t rb_gc_obj_slot_size(VALUE obj); bool rb_gc_size_allocatable_p(size_t size); int rb_objspace_garbage_object_p(VALUE obj); bool rb_gc_is_ptr_to_obj(const void *ptr); -VALUE rb_define_finalizer_no_check(VALUE obj, VALUE block); int rb_during_local_gc(void); int rb_during_global_gc(void); diff --git a/iseq.c b/iseq.c index dcf1a057d0cd09..273d79fc96fceb 100644 --- a/iseq.c +++ b/iseq.c @@ -1247,7 +1247,7 @@ pm_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, V VALUE error; if (RB_TYPE_P(src, T_FILE)) { VALUE filepath = rb_io_path(src); - error = pm_parse_file(&result, filepath); + error = pm_load_parse_file(&result, filepath); RB_GC_GUARD(filepath); } else { @@ -1650,7 +1650,7 @@ iseqw_s_compile_file_prism(int argc, VALUE *argv, VALUE self) pm_parse_result_t result = { 0 }; result.options.line = 1; - VALUE error = pm_parse_file(&result, file); + VALUE error = pm_load_parse_file(&result, file); if (error == Qnil) { make_compile_option(&option, opt); diff --git a/load.c b/load.c index d868c06cf42291..8963a2e751b5e6 100644 --- a/load.c +++ b/load.c @@ -747,7 +747,7 @@ load_iseq_eval(rb_execution_context_t *ec, VALUE fname) pm_parse_result_t result = { 0 }; result.options.line = 1; - VALUE error = pm_parse_file(&result, fname); + VALUE error = pm_load_parse_file(&result, fname); if (error == Qnil) { iseq = pm_iseq_new_top(&result.node, rb_fstring_lit(""), fname, realpath_internal_cached(realpath_map, fname), NULL); diff --git a/prism/config.yml b/prism/config.yml index a89052d7699992..4357663d48ece4 100644 --- a/prism/config.yml +++ b/prism/config.yml @@ -1556,6 +1556,9 @@ nodes: ^ - name: elements type: node[] + kind: + - AssocNode + - AssocSplatNode comment: | The elements of the hash. These can be either `AssocNode`s or `AssocSplatNode`s. @@ -1903,6 +1906,10 @@ nodes: type: location - name: parts type: node[] + kind: + - StringNode + - EmbeddedStatementsNode + - EmbeddedVariableNode - name: closing_loc type: location newline: parts @@ -1920,6 +1927,10 @@ nodes: type: location - name: parts type: node[] + kind: + - StringNode + - EmbeddedStatementsNode + - EmbeddedVariableNode - name: closing_loc type: location newline: parts @@ -1934,6 +1945,11 @@ nodes: type: location? - name: parts type: node[] + kind: + - StringNode + - EmbeddedStatementsNode + - EmbeddedVariableNode + - InterpolatedStringNode # `"a" "#{b}"` - name: closing_loc type: location? newline: parts @@ -1948,6 +1964,10 @@ nodes: type: location? - name: parts type: node[] + kind: + - StringNode + - EmbeddedStatementsNode + - EmbeddedVariableNode - name: closing_loc type: location? newline: parts @@ -1962,6 +1982,10 @@ nodes: type: location - name: parts type: node[] + kind: + - StringNode + - EmbeddedStatementsNode + - EmbeddedVariableNode - name: closing_loc type: location newline: parts @@ -1983,6 +2007,9 @@ nodes: kind: KeywordHashNodeFlags - name: elements type: node[] + kind: + - AssocNode + - AssocSplatNode comment: | Represents a hash literal without opening and closing braces. @@ -2190,6 +2217,7 @@ nodes: kind: CallNode - name: targets type: node[] + kind: LocalVariableTargetNode comment: | Represents writing local variables using a regular expression match with named capture groups. @@ -2221,10 +2249,35 @@ nodes: fields: - name: lefts type: node[] + kind: + - LocalVariableTargetNode + - InstanceVariableTargetNode + - ClassVariableTargetNode + - GlobalVariableTargetNode + - ConstantTargetNode + - ConstantPathTargetNode + - CallTargetNode + - IndexTargetNode + - MultiTargetNode + - RequiredParameterNode + - BackReferenceReadNode # On parsing error of `$',` + - NumberedReferenceReadNode # On parsing error of `$1,` - name: rest type: node? - name: rights type: node[] + kind: + - LocalVariableTargetNode + - InstanceVariableTargetNode + - ClassVariableTargetNode + - GlobalVariableTargetNode + - ConstantTargetNode + - ConstantPathTargetNode + - CallTargetNode + - IndexTargetNode + - MultiTargetNode + - RequiredParameterNode + - BackReferenceReadNode # On parsing error of `*,$'` - name: lparen_loc type: location? - name: rparen_loc @@ -2238,10 +2291,30 @@ nodes: fields: - name: lefts type: node[] + kind: + - LocalVariableTargetNode + - InstanceVariableTargetNode + - ClassVariableTargetNode + - GlobalVariableTargetNode + - ConstantTargetNode + - ConstantPathTargetNode + - CallTargetNode + - IndexTargetNode + - MultiTargetNode - name: rest type: node? - name: rights type: node[] + kind: + - LocalVariableTargetNode + - InstanceVariableTargetNode + - ClassVariableTargetNode + - GlobalVariableTargetNode + - ConstantTargetNode + - ConstantPathTargetNode + - CallTargetNode + - IndexTargetNode + - MultiTargetNode - name: lparen_loc type: location? - name: rparen_loc @@ -2820,6 +2893,9 @@ nodes: fields: - name: names type: node[] + kind: + - SymbolNode + - InterpolatedSymbolNode - name: keyword_loc type: location comment: | diff --git a/prism/static_literals.c b/prism/static_literals.c index 17be22c2e78121..64d6ffeec98877 100644 --- a/prism/static_literals.c +++ b/prism/static_literals.c @@ -61,6 +61,10 @@ node_hash(const pm_parser_t *parser, const pm_node_t *node) { hash ^= murmur_hash((const uint8_t *) value, sizeof(uint32_t)); } + if (integer->negative) { + hash ^= murmur_scramble((uint32_t) 1); + } + return hash; } case PM_SOURCE_LINE_NODE: { diff --git a/prism/templates/lib/prism/node.rb.erb b/prism/templates/lib/prism/node.rb.erb index 32134f8820894f..4762963bf6f584 100644 --- a/prism/templates/lib/prism/node.rb.erb +++ b/prism/templates/lib/prism/node.rb.erb @@ -110,6 +110,9 @@ module Prism @newline = false @location = location <%- node.fields.each do |field| -%> + <%- if Prism::CHECK_FIELD_KIND && field.respond_to?(:check_field_kind) -%> + raise <%= field.name %>.inspect unless <%= field.check_field_kind %> + <%- end -%> @<%= field.name %> = <%= field.name %> <%- end -%> end diff --git a/prism/templates/template.rb b/prism/templates/template.rb index d981b8959265c5..fd55d5228ba828 100755 --- a/prism/templates/template.rb +++ b/prism/templates/template.rb @@ -6,6 +6,7 @@ module Prism SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS", false) + CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false) JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "truffleruby" JAVA_STRING_TYPE = JAVA_BACKEND == "jruby" ? "org.jruby.RubySymbol" : "String" @@ -123,6 +124,14 @@ def rbs_class def rbi_class "Prism::#{ruby_type}" end + + def check_field_kind + if union_kind + "[#{union_kind.join(', ')}].include?(#{name}.class)" + else + "#{name}.is_a?(#{ruby_type})" + end + end end # This represents a field on a node that is itself a node and can be @@ -141,11 +150,19 @@ def rbs_class def rbi_class "T.nilable(Prism::#{ruby_type})" end + + def check_field_kind + if union_kind + "[#{union_kind.join(', ')}, NilClass].include?(#{name}.class)" + else + "#{name}.nil? || #{name}.is_a?(#{ruby_type})" + end + end end # This represents a field on a node that is a list of nodes. We pass them as # references and store them directly on the struct. - class NodeListField < Field + class NodeListField < NodeKindField def rbs_class if specific_kind "Array[#{specific_kind}]" @@ -157,20 +174,19 @@ def rbs_class end def rbi_class - "T::Array[Prism::Node]" + "T::Array[Prism::#{ruby_type}]" end def java_type - "Node[]" + "#{super}[]" end - # TODO: unduplicate with NodeKindField - def specific_kind - options[:kind] unless options[:kind].is_a?(Array) - end - - def union_kind - options[:kind] if options[:kind].is_a?(Array) + def check_field_kind + if union_kind + "#{name}.all? { |n| [#{union_kind.join(', ')}].include?(n.class) }" + else + "#{name}.all? { |n| n.is_a?(#{ruby_type}) }" + end end end diff --git a/prism_compile.c b/prism_compile.c index 06dd2a1f0e119a..0558325851f9e8 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -219,10 +219,9 @@ parse_imaginary(pm_imaginary_node_t *node) } static inline VALUE -parse_string(pm_string_t *string, const pm_parser_t *parser) +parse_string(const pm_scope_node_t *scope_node, const pm_string_t *string) { - rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(parser->encoding->name)); - return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), enc); + return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), scope_node->encoding); } /** @@ -231,7 +230,7 @@ parse_string(pm_string_t *string, const pm_parser_t *parser) * creating those strings based on the flags set on the owning node. */ static inline VALUE -parse_string_encoded(const pm_node_t *node, const pm_string_t *string, const pm_parser_t *parser) +parse_string_encoded(const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *string) { rb_encoding *encoding; @@ -240,40 +239,30 @@ parse_string_encoded(const pm_node_t *node, const pm_string_t *string, const pm_ } else if (node->flags & PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING) { encoding = rb_utf8_encoding(); } else { - encoding = rb_enc_from_index(rb_enc_find_index(parser->encoding->name)); + encoding = scope_node->encoding; } return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), encoding); } static inline ID -parse_symbol(const uint8_t *start, const uint8_t *end, const char *encoding) +parse_string_symbol(const pm_scope_node_t *scope_node, const pm_symbol_node_t *symbol) { - rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(encoding)); - return rb_intern3((const char *) start, end - start, enc); -} - -static inline ID -parse_string_symbol(const pm_symbol_node_t *symbol, const pm_parser_t *parser) -{ - const char *encoding = parser->encoding->name; + rb_encoding *encoding; if (symbol->base.flags & PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING) { - encoding = "UTF-8"; + encoding = rb_utf8_encoding(); } else if (symbol->base.flags & PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING) { - encoding = "ASCII-8BIT"; + encoding = rb_ascii8bit_encoding(); } else if (symbol->base.flags & PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING) { - encoding = "US-ASCII"; + encoding = rb_usascii_encoding(); + } + else { + encoding = scope_node->encoding; } - const uint8_t *start = pm_string_source(&symbol->unescaped); - return parse_symbol(start, start + pm_string_length(&symbol->unescaped), encoding); -} -static inline ID -parse_location_symbol(const pm_location_t *location, const pm_parser_t *parser) -{ - return parse_symbol(location->start, location->end, parser->encoding->name); + return rb_intern3((const char *) pm_string_source(&symbol->unescaped), pm_string_length(&symbol->unescaped), encoding); } static int @@ -331,7 +320,7 @@ pm_reg_flags(const pm_node_t *node) { } static rb_encoding * -pm_reg_enc(const pm_regular_expression_node_t *node, const pm_parser_t *parser) +pm_reg_enc(const pm_scope_node_t *scope_node, const pm_regular_expression_node_t *node) { if (node->base.flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) { return rb_ascii8bit_encoding(); @@ -349,7 +338,7 @@ pm_reg_enc(const pm_regular_expression_node_t *node, const pm_parser_t *parser) return rb_utf8_encoding(); } - return rb_enc_from_index(rb_enc_find_index(parser->encoding->name)); + return scope_node->encoding; } /** @@ -364,12 +353,12 @@ pm_static_literal_p(const pm_node_t *node) } static VALUE -pm_new_regex(pm_regular_expression_node_t * cast, const pm_parser_t * parser) +pm_new_regex(const pm_scope_node_t *scope_node, const pm_regular_expression_node_t *node) { - VALUE regex_str = parse_string(&cast->unescaped, parser); - rb_encoding * enc = pm_reg_enc(cast, parser); + VALUE regex_str = parse_string(scope_node, &node->unescaped); + rb_encoding *enc = pm_reg_enc(scope_node, node); - VALUE regex = rb_enc_reg_new(RSTRING_PTR(regex_str), RSTRING_LEN(regex_str), enc, pm_reg_flags((const pm_node_t *)cast)); + VALUE regex = rb_enc_reg_new(RSTRING_PTR(regex_str), RSTRING_LEN(regex_str), enc, pm_reg_flags((const pm_node_t *) node)); RB_GC_GUARD(regex_str); rb_obj_freeze(regex); @@ -433,27 +422,20 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node return Qnil; case PM_RATIONAL_NODE: return parse_rational((const pm_rational_node_t *) node); - case PM_REGULAR_EXPRESSION_NODE: { - pm_regular_expression_node_t *cast = (pm_regular_expression_node_t *) node; - - return pm_new_regex(cast, scope_node->parser); - } - case PM_SOURCE_ENCODING_NODE: { - const char *name = scope_node->parser->encoding->name; - rb_encoding *encoding = rb_find_encoding(rb_str_new_cstr(name)); - if (!encoding) rb_bug("Encoding not found %s!", name); - return rb_enc_from_encoding(encoding); - } + case PM_REGULAR_EXPRESSION_NODE: + return pm_new_regex(scope_node, (const pm_regular_expression_node_t *) node); + case PM_SOURCE_ENCODING_NODE: + return rb_enc_from_encoding(scope_node->encoding); case PM_SOURCE_FILE_NODE: { pm_source_file_node_t *cast = (pm_source_file_node_t *)node; - return cast->filepath.length ? parse_string(&cast->filepath, scope_node->parser) : rb_fstring_lit(""); + return cast->filepath.length ? parse_string(scope_node, &cast->filepath) : rb_fstring_lit(""); } case PM_SOURCE_LINE_NODE: return INT2FIX(pm_node_line_number(scope_node->parser, node)); case PM_STRING_NODE: - return parse_string_encoded(node, &((pm_string_node_t *)node)->unescaped, scope_node->parser); + return parse_string_encoded(scope_node, node, &((pm_string_node_t *)node)->unescaped); case PM_SYMBOL_NODE: - return ID2SYM(parse_string_symbol((pm_symbol_node_t *)node, scope_node->parser)); + return ID2SYM(parse_string_symbol(scope_node, (const pm_symbol_node_t *) node)); case PM_TRUE_NODE: return Qtrue; default: @@ -544,7 +526,23 @@ pm_compile_logical(rb_iseq_t *iseq, LINK_ANCHOR *const ret, pm_node_t *cond, LAB static void pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node); static void -pm_compile_flip_flop(pm_flip_flop_node_t *flip_flop_node, LABEL *else_label, LABEL *then_label, rb_iseq_t *iseq, const int lineno, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +pm_compile_flip_flop_bound(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + NODE dummy_line_node = generate_dummy_line_node(ISEQ_BODY(iseq)->location.first_lineno, -1); + + if (PM_NODE_TYPE_P(node, PM_INTEGER_NODE)) { + PM_COMPILE_NOT_POPPED(node); + ADD_INSN1(ret, &dummy_line_node, getglobal, ID2SYM(rb_intern("$."))); + ADD_SEND(ret, &dummy_line_node, idEq, INT2FIX(1)); + PM_POP_IF_POPPED; + } + else { + PM_COMPILE(node); + } +} + +static void +pm_compile_flip_flop(const pm_flip_flop_node_t *flip_flop_node, LABEL *else_label, LABEL *then_label, rb_iseq_t *iseq, const int lineno, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { NODE dummy_line_node = generate_dummy_line_node(ISEQ_BODY(iseq)->location.first_lineno, -1); LABEL *lend = NEW_LABEL(lineno); @@ -558,7 +556,7 @@ pm_compile_flip_flop(pm_flip_flop_node_t *flip_flop_node, LABEL *else_label, LAB ADD_INSNL(ret, &dummy_line_node, branchif, lend); if (flip_flop_node->left) { - PM_COMPILE(flip_flop_node->left); + pm_compile_flip_flop_bound(iseq, flip_flop_node->left, ret, popped, scope_node); } else { PM_PUTNIL; @@ -573,7 +571,7 @@ pm_compile_flip_flop(pm_flip_flop_node_t *flip_flop_node, LABEL *else_label, LAB ADD_LABEL(ret, lend); if (flip_flop_node->right) { - PM_COMPILE(flip_flop_node->right); + pm_compile_flip_flop_bound(iseq, flip_flop_node->right, ret, popped, scope_node); } else { PM_PUTNIL; @@ -624,9 +622,8 @@ pm_compile_branch_condition(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const pm_no ADD_INSNL(ret, &dummy_line_node, jump, then_label); return; case PM_FLIP_FLOP_NODE: - pm_compile_flip_flop((pm_flip_flop_node_t *)cond, else_label, then_label, iseq, lineno, ret, popped, scope_node); + pm_compile_flip_flop((const pm_flip_flop_node_t *) cond, else_label, then_label, iseq, lineno, ret, popped, scope_node); return; - // TODO: Several more nodes in this case statement case PM_DEFINED_NODE: { pm_defined_node_t *defined_node = (pm_defined_node_t *)cond; pm_compile_defined_expr(iseq, defined_node->value, ret, popped, scope_node, dummy_line_node, lineno, true); @@ -780,7 +777,7 @@ pm_interpolated_node_compile(pm_node_list_t *parts, rb_iseq_t *iseq, NODE dummy_ if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) { const pm_string_node_t *string_node = (const pm_string_node_t *)part; - VALUE string_value = parse_string_encoded((pm_node_t *)string_node, &string_node->unescaped, scope_node->parser); + VALUE string_value = parse_string_encoded(scope_node, (pm_node_t *)string_node, &string_node->unescaped); if (RTEST(current_string)) { current_string = rb_str_concat(current_string, string_value); @@ -794,7 +791,7 @@ pm_interpolated_node_compile(pm_node_list_t *parts, rb_iseq_t *iseq, NODE dummy_ ((const pm_embedded_statements_node_t *) part)->statements->body.size == 1 && PM_NODE_TYPE_P(((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0], PM_STRING_NODE)) { const pm_string_node_t *string_node = (const pm_string_node_t *) ((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0]; - VALUE string_value = parse_string_encoded((pm_node_t *)string_node, &string_node->unescaped, scope_node->parser); + VALUE string_value = parse_string_encoded(scope_node, (pm_node_t *)string_node, &string_node->unescaped); if (RTEST(current_string)) { current_string = rb_str_concat(current_string, string_value); @@ -805,8 +802,7 @@ pm_interpolated_node_compile(pm_node_list_t *parts, rb_iseq_t *iseq, NODE dummy_ } else { if (!RTEST(current_string)) { - rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(scope_node->parser->encoding->name)); - current_string = rb_enc_str_new(NULL, 0, enc); + current_string = rb_enc_str_new(NULL, 0, scope_node->encoding); } if (ISEQ_COMPILE_DATA(iseq)->option->frozen_string_literal) { @@ -2139,7 +2135,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t const pm_node_t *key = ((const pm_assoc_node_t *) element)->key; RUBY_ASSERT(PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)); - VALUE symbol = ID2SYM(parse_string_symbol((const pm_symbol_node_t *)key, scope_node->parser)); + VALUE symbol = ID2SYM(parse_string_symbol(scope_node, (const pm_symbol_node_t *) key)); rb_ary_push(keys, symbol); } } @@ -2184,7 +2180,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t const pm_node_t *key = assoc->key; RUBY_ASSERT(PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)); - VALUE symbol = ID2SYM(parse_string_symbol((const pm_symbol_node_t *)key, scope_node->parser)); + VALUE symbol = ID2SYM(parse_string_symbol(scope_node, (const pm_symbol_node_t *) key)); ADD_INSN(ret, &line.node, dup); ADD_INSN1(ret, &line.node, putobject, symbol); ADD_SEND(ret, &line.node, rb_intern("key?"), INT2FIX(1)); @@ -2491,7 +2487,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t // Generate a scope node from the given node. void -pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_t *previous, const pm_parser_t *parser) +pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_t *previous) { // This is very important, otherwise the scope node could be seen as having // certain flags set that _should not_ be set. @@ -2502,10 +2498,11 @@ pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_ scope->base.location.end = node->location.end; scope->previous = previous; - scope->parser = parser; scope->ast_node = (pm_node_t *)node; if (previous) { + scope->parser = previous->parser; + scope->encoding = previous->encoding; scope->constants = previous->constants; } @@ -2989,7 +2986,7 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c if (call_node->block != NULL && PM_NODE_TYPE_P(call_node->block, PM_BLOCK_NODE)) { // Scope associated with the block pm_scope_node_t next_scope_node; - pm_scope_node_init(call_node->block, &next_scope_node, scope_node, scope_node->parser); + pm_scope_node_init(call_node->block, &next_scope_node, scope_node); block_iseq = NEW_CHILD_ISEQ(&next_scope_node, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, pm_node_line_number(scope_node->parser, call_node->block)); pm_scope_node_destroy(&next_scope_node); @@ -3823,7 +3820,7 @@ pm_compile_rescue(rb_iseq_t *iseq, pm_begin_node_t *begin_node, LINK_ANCHOR *con LABEL *lcont = NEW_LABEL(lineno); pm_scope_node_t rescue_scope_node; - pm_scope_node_init((pm_node_t *) begin_node->rescue_clause, &rescue_scope_node, scope_node, parser); + pm_scope_node_init((pm_node_t *) begin_node->rescue_clause, &rescue_scope_node, scope_node); rb_iseq_t *rescue_iseq = NEW_CHILD_ISEQ( &rescue_scope_node, @@ -3898,7 +3895,7 @@ pm_compile_ensure(rb_iseq_t *iseq, pm_begin_node_t *begin_node, LINK_ANCHOR *con ADD_LABEL(ret, econt); pm_scope_node_t next_scope_node; - pm_scope_node_init((pm_node_t *)begin_node->ensure_clause, &next_scope_node, scope_node, parser); + pm_scope_node_init((pm_node_t *)begin_node->ensure_clause, &next_scope_node, scope_node); rb_iseq_t *child_iseq = NEW_CHILD_ISEQ( &next_scope_node, @@ -4119,7 +4116,7 @@ pm_compile_case_node_dispatch(VALUE dispatch, const pm_node_t *node, LABEL *labe break; case PM_STRING_NODE: { const pm_string_node_t *cast = (const pm_string_node_t *) node; - key = rb_fstring(parse_string_encoded(node, &cast->unescaped, scope_node->parser)); + key = rb_fstring(parse_string_encoded(scope_node, node, &cast->unescaped)); break; } default: @@ -4167,10 +4164,13 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // alias $foo $bar // ^^^^^^^^^^^^^^^ const pm_alias_global_variable_node_t *cast = (const pm_alias_global_variable_node_t *) node; - PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); - PUSH_INSN1(ret, location, putobject, ID2SYM(parse_location_symbol(&cast->new_name->location, parser))); - PUSH_INSN1(ret, location, putobject, ID2SYM(parse_location_symbol(&cast->old_name->location, parser))); + + const pm_location_t *new_name_loc = &cast->new_name->location; + PUSH_INSN1(ret, location, putobject, ID2SYM(rb_intern3((const char *) new_name_loc->start, new_name_loc->end - new_name_loc->start, scope_node->encoding))); + + const pm_location_t *old_name_loc = &cast->old_name->location; + PUSH_INSN1(ret, location, putobject, ID2SYM(rb_intern3((const char *) old_name_loc->start, old_name_loc->end - old_name_loc->start, scope_node->encoding))); PUSH_SEND(ret, location, id_core_set_variable_alias, INT2FIX(2)); if (popped) PUSH_INSN(ret, location, pop); @@ -4486,7 +4486,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, switch (method_id) { case idUMinus: { if (pm_opt_str_freeze_p(iseq, call_node)) { - VALUE value = rb_fstring(parse_string_encoded(call_node->receiver, &((const pm_string_node_t * )call_node->receiver)->unescaped, parser)); + VALUE value = rb_fstring(parse_string_encoded(scope_node, call_node->receiver, &((const pm_string_node_t * )call_node->receiver)->unescaped)); ADD_INSN2(ret, &dummy_line_node, opt_str_uminus, value, new_callinfo(iseq, idUMinus, 0, 0, NULL, FALSE)); return; } @@ -4494,7 +4494,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } case idFreeze: { if (pm_opt_str_freeze_p(iseq, call_node)) { - VALUE value = rb_fstring(parse_string_encoded(call_node->receiver, &((const pm_string_node_t * )call_node->receiver)->unescaped, parser)); + VALUE value = rb_fstring(parse_string_encoded(scope_node, call_node->receiver, &((const pm_string_node_t * )call_node->receiver)->unescaped)); ADD_INSN2(ret, &dummy_line_node, opt_str_freeze, value, new_callinfo(iseq, idFreeze, 0, 0, NULL, FALSE)); return; } @@ -4503,7 +4503,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, case idAREF: { if (pm_opt_aref_with_p(iseq, call_node)) { const pm_string_node_t *string = (const pm_string_node_t *) ((const pm_arguments_node_t *) call_node->arguments)->arguments.nodes[0]; - VALUE value = rb_fstring(parse_string_encoded((const pm_node_t *) string, &string->unescaped, parser)); + VALUE value = rb_fstring(parse_string_encoded(scope_node, (const pm_node_t *) string, &string->unescaped)); PM_COMPILE_NOT_POPPED(call_node->receiver); ADD_INSN2(ret, &dummy_line_node, opt_aref_with, value, new_callinfo(iseq, idAREF, 1, 0, NULL, FALSE)); @@ -4518,7 +4518,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, case idASET: { if (pm_opt_aset_with_p(iseq, call_node)) { const pm_string_node_t *string = (const pm_string_node_t *) ((const pm_arguments_node_t *) call_node->arguments)->arguments.nodes[0]; - VALUE value = rb_fstring(parse_string_encoded((const pm_node_t *) string, &string->unescaped, parser)); + VALUE value = rb_fstring(parse_string_encoded(scope_node, (const pm_node_t *) string, &string->unescaped)); PM_COMPILE_NOT_POPPED(call_node->receiver); PM_COMPILE_NOT_POPPED(((const pm_arguments_node_t *) call_node->arguments)->arguments.nodes[1]); @@ -4736,7 +4736,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, else { if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) { const pm_string_node_t *string = (const pm_string_node_t *) condition; - VALUE value = rb_fstring(parse_string_encoded((const pm_node_t *) string, &string->unescaped, parser)); + VALUE value = rb_fstring(parse_string_encoded(scope_node, (const pm_node_t *) string, &string->unescaped)); ADD_INSN1(cond_seq, &dummy_line_node, putobject, value); } else { @@ -4965,7 +4965,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, VALUE class_name = rb_str_freeze(rb_sprintf("", rb_id2str(class_id))); pm_scope_node_t next_scope_node; - pm_scope_node_init((pm_node_t *)class_node, &next_scope_node, scope_node, parser); + pm_scope_node_init((pm_node_t *)class_node, &next_scope_node, scope_node); const rb_iseq_t *class_iseq = NEW_CHILD_ISEQ(&next_scope_node, class_name, ISEQ_TYPE_CLASS, lineno); pm_scope_node_destroy(&next_scope_node); @@ -5379,7 +5379,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, ID method_name = pm_constant_id_lookup(scope_node, def_node->name); pm_scope_node_t next_scope_node; - pm_scope_node_init((pm_node_t *)def_node, &next_scope_node, scope_node, parser); + pm_scope_node_init((pm_node_t *)def_node, &next_scope_node, scope_node); rb_iseq_t *method_iseq = NEW_ISEQ(&next_scope_node, rb_id2str(method_name), ISEQ_TYPE_METHOD, lineno); pm_scope_node_destroy(&next_scope_node); @@ -5487,7 +5487,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // Next, create the new scope that is going to contain the block that // will be passed to the each method. pm_scope_node_t next_scope_node; - pm_scope_node_init((pm_node_t *) cast, &next_scope_node, scope_node, parser); + pm_scope_node_init((pm_node_t *) cast, &next_scope_node, scope_node); const rb_iseq_t *child_iseq = NEW_CHILD_ISEQ(&next_scope_node, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, lineno); pm_scope_node_destroy(&next_scope_node); @@ -5543,7 +5543,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, if (forwarding_super_node->block) { pm_scope_node_t next_scope_node; - pm_scope_node_init((pm_node_t *)forwarding_super_node->block, &next_scope_node, scope_node, parser); + pm_scope_node_init((pm_node_t *)forwarding_super_node->block, &next_scope_node, scope_node); block = NEW_CHILD_ISEQ(&next_scope_node, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, lineno); pm_scope_node_destroy(&next_scope_node); @@ -5941,7 +5941,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, int ic_index = ISEQ_BODY(iseq)->ise_size++; pm_scope_node_t next_scope_node; - pm_scope_node_init((pm_node_t*)node, &next_scope_node, scope_node, parser); + pm_scope_node_init((pm_node_t*)node, &next_scope_node, scope_node); block_iseq = NEW_CHILD_ISEQ(&next_scope_node, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, lineno); pm_scope_node_destroy(&next_scope_node); @@ -6019,7 +6019,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, const pm_lambda_node_t *cast = (const pm_lambda_node_t *) node; pm_scope_node_t next_scope_node; - pm_scope_node_init(node, &next_scope_node, scope_node, parser); + pm_scope_node_init(node, &next_scope_node, scope_node); int opening_lineno = pm_location_line_number(parser, &cast->opening_loc); const rb_iseq_t *block = NEW_CHILD_ISEQ(&next_scope_node, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, opening_lineno); @@ -6133,7 +6133,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, if (!popped) { pm_match_last_line_node_t *cast = (pm_match_last_line_node_t *) node; - VALUE regex_str = parse_string(&cast->unescaped, parser); + VALUE regex_str = parse_string(scope_node, &cast->unescaped); VALUE regex = rb_reg_new(RSTRING_PTR(regex_str), RSTRING_LEN(regex_str), pm_reg_flags(node)); RB_GC_GUARD(regex_str); @@ -6328,7 +6328,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, VALUE module_name = rb_str_freeze(rb_sprintf("", rb_id2str(module_id))); pm_scope_node_t next_scope_node; - pm_scope_node_init((pm_node_t *)module_node, &next_scope_node, scope_node, parser); + pm_scope_node_init((pm_node_t *)module_node, &next_scope_node, scope_node); const rb_iseq_t *module_iseq = NEW_CHILD_ISEQ(&next_scope_node, module_name, ISEQ_TYPE_CLASS, lineno); pm_scope_node_destroy(&next_scope_node); @@ -6561,7 +6561,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, const rb_iseq_t *prevblock = ISEQ_COMPILE_DATA(iseq)->current_block; pm_scope_node_t next_scope_node; - pm_scope_node_init(node, &next_scope_node, scope_node, parser); + pm_scope_node_init(node, &next_scope_node, scope_node); child_iseq = NEW_CHILD_ISEQ(&next_scope_node, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, lineno); pm_scope_node_destroy(&next_scope_node); @@ -6795,7 +6795,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, const pm_rescue_modifier_node_t *cast = (const pm_rescue_modifier_node_t *) node; pm_scope_node_t rescue_scope_node; - pm_scope_node_init((const pm_node_t *) cast, &rescue_scope_node, scope_node, parser); + pm_scope_node_init((const pm_node_t *) cast, &rescue_scope_node, scope_node); rb_iseq_t *rescue_iseq = NEW_CHILD_ISEQ( &rescue_scope_node, @@ -7686,7 +7686,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // We create another ScopeNode from the statements within the PostExecutionNode pm_scope_node_t next_scope_node; - pm_scope_node_init((pm_node_t *)post_execution_node->statements, &next_scope_node, scope_node, parser); + pm_scope_node_init((pm_node_t *)post_execution_node->statements, &next_scope_node, scope_node); const rb_iseq_t *block = NEW_CHILD_ISEQ(&next_scope_node, make_name_for_block(body->parent_iseq), ISEQ_TYPE_BLOCK, lineno); pm_scope_node_destroy(&next_scope_node); @@ -7793,7 +7793,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, const pm_singleton_class_node_t *cast = (const pm_singleton_class_node_t *) node; pm_scope_node_t next_scope_node; - pm_scope_node_init((const pm_node_t *) cast, &next_scope_node, scope_node, parser); + pm_scope_node_init((const pm_node_t *) cast, &next_scope_node, scope_node); const rb_iseq_t *child_iseq = NEW_ISEQ(&next_scope_node, rb_fstring_lit("singleton class"), ISEQ_TYPE_CLASS, location.line); pm_scope_node_destroy(&next_scope_node); @@ -7870,7 +7870,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // ^^^^^ if (!popped) { const pm_string_node_t *cast = (const pm_string_node_t *) node; - VALUE value = rb_fstring(parse_string_encoded(node, &cast->unescaped, parser)); + VALUE value = rb_fstring(parse_string_encoded(scope_node, node, &cast->unescaped)); if (PM_NODE_FLAG_P(node, PM_STRING_FLAGS_FROZEN)) { PUSH_INSN1(ret, location, putobject, value); @@ -7900,7 +7900,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, const rb_iseq_t *parent_block = ISEQ_COMPILE_DATA(iseq)->current_block; if (cast->block && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)) { pm_scope_node_t next_scope_node; - pm_scope_node_init(cast->block, &next_scope_node, scope_node, parser); + pm_scope_node_init(cast->block, &next_scope_node, scope_node); parent_block = NEW_CHILD_ISEQ(&next_scope_node, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, lineno); pm_scope_node_destroy(&next_scope_node); } @@ -7991,7 +7991,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // `foo` // ^^^^^ const pm_x_string_node_t *cast = (const pm_x_string_node_t *) node; - VALUE value = parse_string_encoded(node, &cast->unescaped, parser); + VALUE value = parse_string_encoded(scope_node, node, &cast->unescaped); PUSH_INSN(ret, location, putself); PUSH_INSN1(ret, location, putobject, value); @@ -8163,8 +8163,9 @@ pm_parse_input(pm_parse_result_t *result, VALUE filepath) pm_options_filepath_set(&result->options, RSTRING_PTR(filepath)); RB_GC_GUARD(filepath); - pm_parser_init(&result->parser, pm_string_source(&result->input), pm_string_length(&result->input), &result->options); - const pm_node_t *node = pm_parse(&result->parser); + pm_parser_t *parser = &result->parser; + pm_parser_init(parser, pm_string_source(&result->input), pm_string_length(&result->input), &result->options); + const pm_node_t *node = pm_parse(parser); // If there are errors, raise an appropriate error and free the result. if (result->parser.error_list.size > 0) { @@ -8177,10 +8178,10 @@ pm_parse_input(pm_parse_result_t *result, VALUE filepath) // Emit all of the various warnings from the parse. const pm_diagnostic_t *warning; - const char *warning_filepath = (const char *) pm_string_source(&result->parser.filepath); + const char *warning_filepath = (const char *) pm_string_source(&parser->filepath); - for (warning = (pm_diagnostic_t *) result->parser.warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) { - int line = pm_location_line_number(&result->parser, &warning->location); + for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) { + int line = pm_location_line_number(parser, &warning->location); if (warning->level == PM_WARNING_LEVEL_VERBOSE) { rb_compile_warning(warning_filepath, line, "%s", warning->message); @@ -8192,20 +8193,24 @@ pm_parse_input(pm_parse_result_t *result, VALUE filepath) // Now set up the constant pool and intern all of the various constants into // their corresponding IDs. - pm_scope_node_init(node, &result->node, NULL, &result->parser); + pm_scope_node_t *scope_node = &result->node; + pm_scope_node_init(node, scope_node, NULL); + + scope_node->encoding = rb_enc_find(parser->encoding->name); + if (!scope_node->encoding) rb_bug("Encoding not found %s!", parser->encoding->name); - result->node.constants = calloc(result->parser.constant_pool.size, sizeof(ID)); - rb_encoding *encoding = rb_enc_find(result->parser.encoding->name); + scope_node->parser = parser; + scope_node->constants = calloc(parser->constant_pool.size, sizeof(ID)); - for (uint32_t index = 0; index < result->parser.constant_pool.size; index++) { - pm_constant_t *constant = &result->parser.constant_pool.constants[index]; - result->node.constants[index] = rb_intern3((const char *) constant->start, constant->length, encoding); + for (uint32_t index = 0; index < parser->constant_pool.size; index++) { + pm_constant_t *constant = &parser->constant_pool.constants[index]; + scope_node->constants[index] = rb_intern3((const char *) constant->start, constant->length, scope_node->encoding); } - result->node.index_lookup_table = st_init_numtable(); - pm_constant_id_list_t *locals = &result->node.locals; + scope_node->index_lookup_table = st_init_numtable(); + pm_constant_id_list_t *locals = &scope_node->locals; for (size_t index = 0; index < locals->size; index++) { - st_insert(result->node.index_lookup_table, locals->ids[index], index); + st_insert(scope_node->index_lookup_table, locals->ids[index], index); } // If we got here, this is a success and we can return Qnil to indicate that @@ -8219,14 +8224,12 @@ pm_parse_input(pm_parse_result_t *result, VALUE filepath) * source file that the given parser parsed. */ static inline VALUE -pm_parse_file_script_lines(const pm_parser_t *parser) +pm_parse_file_script_lines(const pm_scope_node_t *scope_node, const pm_parser_t *parser) { + const pm_newline_list_t *newline_list = &parser->newline_list; const char *start = (const char *) parser->start; const char *end = (const char *) parser->end; - rb_encoding *encoding = rb_enc_find(parser->encoding->name); - const pm_newline_list_t *newline_list = &parser->newline_list; - // If we end exactly on a newline, then there's no need to push on a final // segment. If we don't, then we need to push on the last offset up to the // end of the string. @@ -8240,27 +8243,23 @@ pm_parse_file_script_lines(const pm_parser_t *parser) size_t offset = newline_list->offsets[index]; size_t length = newline_list->offsets[index + 1] - offset; - rb_ary_push(lines, rb_enc_str_new(start + offset, length, encoding)); + rb_ary_push(lines, rb_enc_str_new(start + offset, length, scope_node->encoding)); } // Push on the last line if we need to. if (last_push) { - rb_ary_push(lines, rb_enc_str_new(start + last_offset, end - (start + last_offset), encoding)); + rb_ary_push(lines, rb_enc_str_new(start + last_offset, end - (start + last_offset), scope_node->encoding)); } return lines; } /** - * Parse the given filepath and store the resulting scope node in the given - * parse result struct. It returns a Ruby error if the file cannot be read or - * if it cannot be parsed properly. It is assumed that the parse result object - * is zeroed out. - * - * TODO: This should raise a better error when the file cannot be read. + * Attempt to load the file into memory. Return a Ruby error if the file cannot + * be read. */ VALUE -pm_parse_file(pm_parse_result_t *result, VALUE filepath) +pm_load_file(pm_parse_result_t *result, VALUE filepath) { if (!pm_string_mapped_init(&result->input, RSTRING_PTR(filepath))) { #ifdef _WIN32 @@ -8274,6 +8273,18 @@ pm_parse_file(pm_parse_result_t *result, VALUE filepath) return err; } + return Qnil; +} + +/** + * Parse the given filepath and store the resulting scope node in the given + * parse result struct. It returns a Ruby error if the file cannot be read or + * if it cannot be parsed properly. It is assumed that the parse result object + * is zeroed out. + */ +VALUE +pm_parse_file(pm_parse_result_t *result, VALUE filepath) +{ VALUE error = pm_parse_input(result, filepath); // If we're parsing a filepath, then we need to potentially support the @@ -8285,13 +8296,28 @@ pm_parse_file(pm_parse_result_t *result, VALUE filepath) VALUE script_lines = rb_const_get_at(rb_cObject, id_script_lines); if (RB_TYPE_P(script_lines, T_HASH)) { - rb_hash_aset(script_lines, filepath, pm_parse_file_script_lines(&result->parser)); + rb_hash_aset(script_lines, filepath, pm_parse_file_script_lines(&result->node, &result->parser)); } } return error; } +/** + * Load and then parse the given filepath. It returns a Ruby error if the file + * cannot be read or if it cannot be parsed properly. + */ +VALUE +pm_load_parse_file(pm_parse_result_t *result, VALUE filepath) +{ + VALUE error = pm_load_file(result, filepath); + if (NIL_P(error)) { + error = pm_parse_file(result, filepath); + } + + return error; +} + /** * Parse the given source that corresponds to the given filepath and store the * resulting scope node in the given parse result struct. This function could diff --git a/prism_compile.h b/prism_compile.h index 2080db77393031..d170e1b7291981 100644 --- a/prism_compile.h +++ b/prism_compile.h @@ -1,4 +1,5 @@ #include "prism/prism.h" +#include "ruby/encoding.h" /** * the getlocal and setlocal instructions require two parameters. level is how @@ -21,7 +22,9 @@ typedef struct pm_scope_node { pm_node_t *parameters; pm_node_t *body; pm_constant_id_list_t locals; + const pm_parser_t *parser; + rb_encoding *encoding; // The size of the local table // on the iseq which includes @@ -32,7 +35,7 @@ typedef struct pm_scope_node { st_table *index_lookup_table; } pm_scope_node_t; -void pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_t *previous, const pm_parser_t *parser); +void pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_t *previous); void pm_scope_node_destroy(pm_scope_node_t *scope_node); bool *rb_ruby_prism_ptr(void); @@ -44,7 +47,9 @@ typedef struct { bool parsed; } pm_parse_result_t; +VALUE pm_load_file(pm_parse_result_t *result, VALUE filepath); VALUE pm_parse_file(pm_parse_result_t *result, VALUE filepath); +VALUE pm_load_parse_file(pm_parse_result_t *result, VALUE filepath); VALUE pm_parse_string(pm_parse_result_t *result, VALUE source, VALUE filepath); void pm_parse_result_free(pm_parse_result_t *result); diff --git a/ruby.c b/ruby.c index fde179ef7ced19..a67ad82ead5659 100644 --- a/ruby.c +++ b/ruby.c @@ -2080,11 +2080,15 @@ process_script(ruby_cmdline_options_t *opt) return ast; } +/** + * Call ruby_opt_init to set up the global state based on the command line + * options, and then warn if prism is enabled and the experimental warning + * category is enabled. + */ static void -prism_script(ruby_cmdline_options_t *opt, pm_parse_result_t *result) +prism_opt_init(ruby_cmdline_options_t *opt) { ruby_opt_init(opt); - memset(result, 0, sizeof(pm_parse_result_t)); if (rb_warning_category_enabled_p(RB_WARN_CATEGORY_EXPERIMENTAL)) { rb_category_warn( @@ -2095,8 +2099,18 @@ prism_script(ruby_cmdline_options_t *opt, pm_parse_result_t *result) "issue tracker." ); } +} +/** + * Process the command line options and parse the script into the given result. + * Raise an error if the script cannot be parsed. + */ +static void +prism_script(ruby_cmdline_options_t *opt, pm_parse_result_t *result) +{ + memset(result, 0, sizeof(pm_parse_result_t)); pm_options_t *options = &result->options; + pm_options_line_set(options, 1); pm_options_command_line_p_set(options, opt->do_print); pm_options_command_line_n_set(options, opt->do_loop); @@ -2108,21 +2122,32 @@ prism_script(ruby_cmdline_options_t *opt, pm_parse_result_t *result) rb_raise(rb_eRuntimeError, "Prism support for streaming code from stdin is not currently supported"); } else if (opt->e_script) { + prism_opt_init(opt); error = pm_parse_string(result, opt->e_script, rb_str_new2("-e")); } else { - error = pm_parse_file(result, opt->script_name); + error = pm_load_file(result, opt->script_name); - // If we found an __END__ marker, then we're going to define a - // global DATA constant that is a file object that can be read - // to read the contents after the marker. + // If reading the file did not error, at that point we load the command + // line options. We do it in this order so that if the main script fails + // to load, it doesn't require files required by -r. + if (NIL_P(error)) { + prism_opt_init(opt); + error = pm_parse_file(result, opt->script_name); + } + + // If we found an __END__ marker, then we're going to define a global + // DATA constant that is a file object that can be read to read the + // contents after the marker. if (NIL_P(error) && result->parser.data_loc.start != NULL) { int xflag = opt->xflag; VALUE file = open_load_file(opt->script_name, &xflag); - size_t offset = result->parser.data_loc.start - result->parser.start + 7; - if ((result->parser.start + offset < result->parser.end) && result->parser.start[offset] == '\r') offset++; - if ((result->parser.start + offset < result->parser.end) && result->parser.start[offset] == '\n') offset++; + const pm_parser_t *parser = &result->parser; + size_t offset = parser->data_loc.start - parser->start + 7; + + if ((parser->start + offset < parser->end) && parser->start[offset] == '\r') offset++; + if ((parser->start + offset < parser->end) && parser->start[offset] == '\n') offset++; rb_funcall(file, rb_intern_const("seek"), 2, SIZET2NUM(offset), INT2FIX(SEEK_SET)); rb_define_global_const("DATA", file); diff --git a/tool/lib/test/unit.rb b/tool/lib/test/unit.rb index a068ae6b9d44e4..01c53e929cfe31 100644 --- a/tool/lib/test/unit.rb +++ b/tool/lib/test/unit.rb @@ -716,7 +716,15 @@ def _run_parallel suites, type, result del_status_line or puts error, suites = suites.partition {|r| r[:error]} unless suites.empty? - puts "\n""Retrying..." + puts "\n" + @failed_output.puts "Failed tests:" + suites.each {|r| + r[:report].each {|c, m, e| + @failed_output.puts "#{c}##{m}: #{e&.class}: #{e&.message&.slice(/\A.*/)}" + } + } + @failed_output.puts "\n" + puts "Retrying..." @verbose = options[:verbose] suites.map! {|r| ::Object.const_get(r[:testcase])} _run_suites(suites, type) diff --git a/yjit.c b/yjit.c index 8632361699dde8..13707900c09751 100644 --- a/yjit.c +++ b/yjit.c @@ -903,13 +903,22 @@ rb_yjit_splat_varg_checks(VALUE *sp, VALUE splat_array, rb_control_frame_t *cfp) // Would we overflow if we put the contents of the array onto the stack? if (sp + len > (VALUE *)(cfp - 2)) return Qfalse; + // Reject keywords hash since that requires duping it sometimes + if (len > 0) { + VALUE last_hash = RARRAY_AREF(splat_array, len - 1); + if (RB_TYPE_P(last_hash, T_HASH) && + FL_TEST_RAW(last_hash, RHASH_PASS_AS_KEYWORDS)) { + return Qfalse; + } + } + return Qtrue; } // Push array elements to the stack for a C method that has a variable number // of parameters. Returns the number of arguments the splat array contributes. int -rb_yjit_splat_varg_cfunc(VALUE *stack_splat_array, bool sole_splat) +rb_yjit_splat_varg_cfunc(VALUE *stack_splat_array) { VALUE splat_array = *stack_splat_array; int len; @@ -918,17 +927,6 @@ rb_yjit_splat_varg_cfunc(VALUE *stack_splat_array, bool sole_splat) RUBY_ASSERT(RB_TYPE_P(splat_array, T_ARRAY)); len = (int)RARRAY_LEN(splat_array); - // If this is a splat call without any keyword arguments, exclude the - // ruby2_keywords hash if it's empty - if (sole_splat && len > 0) { - VALUE last_hash = RARRAY_AREF(splat_array, len - 1); - if (RB_TYPE_P(last_hash, T_HASH) && - FL_TEST_RAW(last_hash, RHASH_PASS_AS_KEYWORDS) && - RHASH_EMPTY_P(last_hash)) { - len--; - } - } - // Push the contents of the array onto the stack MEMCPY(stack_splat_array, RARRAY_CONST_PTR(splat_array), VALUE, len); diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 2e946441f15100..1e8f04640325ca 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -6324,9 +6324,8 @@ fn gen_send_cfunc( // Push a dynamic number of items from the splat array to the stack when calling a vargs method let dynamic_splat_size = if variable_splat { asm_comment!(asm, "variable length splat"); - let just_splat = usize::from(!kw_splat && kw_arg.is_null()).into(); let stack_splat_array = asm.lea(asm.stack_opnd(0)); - Some(asm.ccall(rb_yjit_splat_varg_cfunc as _, vec![stack_splat_array, just_splat])) + Some(asm.ccall(rb_yjit_splat_varg_cfunc as _, vec![stack_splat_array])) } else { None }; @@ -8007,6 +8006,12 @@ fn gen_send_dynamic Opnd>( // Save PC and SP to prepare for dynamic dispatch jit_prepare_non_leaf_call(jit, asm); + // Squash stack canary that might be left over from elsewhere + assert_eq!(false, asm.get_leaf_ccall()); + if cfg!(debug_assertions) { + asm.store(asm.ctx.sp_opnd(0), 0.into()); + } + // Dispatch a method let ret = vm_sendish(asm); diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index b131b62bfd1b1c..e1cfeec22bead0 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1199,10 +1199,7 @@ extern "C" { splat_array: VALUE, cfp: *mut rb_control_frame_t, ) -> VALUE; - pub fn rb_yjit_splat_varg_cfunc( - stack_splat_array: *mut VALUE, - sole_splat: bool, - ) -> ::std::os::raw::c_int; + pub fn rb_yjit_splat_varg_cfunc(stack_splat_array: *mut VALUE) -> ::std::os::raw::c_int; pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32); pub fn rb_yjit_iseq_inspect(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_char; pub fn rb_FL_TEST(obj: VALUE, flags: VALUE) -> VALUE;