From 7a96f2cb94af4805a3ac45ab6d5dfc090d06347b Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Thu, 12 Jan 2023 13:43:27 +0100 Subject: [PATCH] Fix String#casecmp? for empty strings of different encodings * Fixes https://github.com/oracle/truffleruby/issues/2826 --- CHANGELOG.md | 1 + spec/ruby/core/string/casecmp_spec.rb | 10 ++++++++++ spec/ruby/core/symbol/casecmp_spec.rb | 8 ++++++++ .../org/truffleruby/core/encoding/EncodingNodes.java | 8 +++----- src/main/ruby/truffleruby/core/string.rb | 2 +- 5 files changed, 23 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22d4cc9be4a4..f13f41132882 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,7 @@ Compatibility: * Fix `Array#unshift` to not depend on `Array#[]=` and allow overriding `#[]=` in a subclass (#2772, @andrykonchin). * Fix syntactic check for `void value expression` (#2821, @eregon). * Fix `Range#step` with no block and non-`Numeric` values (#2824, @eregon). +* Fix `String#casecmp?` for empty strings of different encodings (#2826, @eregon). Performance: diff --git a/spec/ruby/core/string/casecmp_spec.rb b/spec/ruby/core/string/casecmp_spec.rb index 986fbc87181d..81ebea557c74 100644 --- a/spec/ruby/core/string/casecmp_spec.rb +++ b/spec/ruby/core/string/casecmp_spec.rb @@ -117,6 +117,11 @@ "B".casecmp(a).should == 1 end end + + it "returns 0 for empty strings in different encodings" do + ''.b.casecmp('').should == 0 + ''.b.casecmp(''.encode("UTF-32LE")).should == 0 + end end describe 'String#casecmp? independent of case' do @@ -191,4 +196,9 @@ it "returns nil if other can't be converted to a string" do "abc".casecmp?(mock('abc')).should be_nil end + + it "returns true for empty strings in different encodings" do + ''.b.should.casecmp?('') + ''.b.should.casecmp?(''.encode("UTF-32LE")) + end end diff --git a/spec/ruby/core/symbol/casecmp_spec.rb b/spec/ruby/core/symbol/casecmp_spec.rb index 80ea51e9109f..662a29a2847a 100644 --- a/spec/ruby/core/symbol/casecmp_spec.rb +++ b/spec/ruby/core/symbol/casecmp_spec.rb @@ -56,6 +56,10 @@ lower_a_tilde.casecmp(upper_a_tilde).should == 1 lower_a_umlaut.casecmp(upper_a_umlaut).should == 1 end + + it "returns 0 for empty strings in different encodings" do + ''.to_sym.casecmp(''.encode("UTF-32LE").to_sym).should == 0 + end end describe "Symbol#casecmp" do @@ -141,4 +145,8 @@ upper_a_tilde.casecmp?(lower_a_tilde).should == nil lower_a_tilde.casecmp?(upper_a_tilde).should == nil end + + it "returns true for empty symbols in different encodings" do + ''.to_sym.should.casecmp?(''.encode("UTF-32LE").to_sym) + end end diff --git a/src/main/java/org/truffleruby/core/encoding/EncodingNodes.java b/src/main/java/org/truffleruby/core/encoding/EncodingNodes.java index a81b15c64d9e..22d7040d87d3 100644 --- a/src/main/java/org/truffleruby/core/encoding/EncodingNodes.java +++ b/src/main/java/org/truffleruby/core/encoding/EncodingNodes.java @@ -417,6 +417,7 @@ protected Object isCompatible(Object first, Object second, } // encoding_compatible? but only accepting Strings for better footprint + // Like Primitive.encoding_ensure_compatible_str but returns nil if incompatible @Primitive(name = "strings_compatible?") public abstract static class AreStringsCompatibleNode extends PrimitiveArrayArgumentsNode { public static AreStringsCompatibleNode create() { @@ -748,6 +749,7 @@ protected int getIndex(RubyEncoding encoding) { } // MRI: rb_enc_check_str / rb_encoding_check (with Ruby String arguments) + // Like strings_compatible? but raises if incompatible @Primitive(name = "encoding_ensure_compatible_str") public abstract static class CheckStringEncodingPrimitiveNode extends PrimitiveArrayArgumentsNode { @Specialization(guards = { "libFirst.isRubyString(first)", "libSecond.isRubyString(second)", }, limit = "1") @@ -760,11 +762,7 @@ protected RubyEncoding checkEncodingStringString(Object first, Object second, final RubyEncoding secondEncoding = libSecond.getEncoding(second); final RubyEncoding negotiatedEncoding = negotiateCompatibleStringEncodingNode - .execute( - libFirst.getTString(first), - firstEncoding, - libSecond.getTString(second), - secondEncoding); + .execute(libFirst.getTString(first), firstEncoding, libSecond.getTString(second), secondEncoding); if (negotiatedEncoding == null) { errorProfile.enter(); diff --git a/src/main/ruby/truffleruby/core/string.rb b/src/main/ruby/truffleruby/core/string.rb index 3658e90569b1..4e54f6167d49 100644 --- a/src/main/ruby/truffleruby/core/string.rb +++ b/src/main/ruby/truffleruby/core/string.rb @@ -1206,7 +1206,7 @@ def casecmp?(other) other = Truffle::Type.rb_check_convert_type(other , String, :to_str) return nil if Primitive.nil? other - enc = Primitive.encoding_compatible?(encoding, other.encoding) + enc = Primitive.strings_compatible?(self, other) if Primitive.nil? enc return nil end