diff --git a/CHANGELOG.md b/CHANGELOG.md index edfe275cce..245f7f225d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,15 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA --- +## 1.14.1 / unreleased + +### Fixed + +* Serializing documents now works again with pseudo-IO objects that don't support IO's encoding API (like rubyzip's `Zip::OutputStream`). This was a regression in v1.14.0 due to the fix for [#752](https://github.com/sparklemotion/nokogiri/issues/752) in [#2434](https://github.com/sparklemotion/nokogiri/issues/2434), and was not completely fixed by [#2753](https://github.com/sparklemotion/nokogiri/issues/2753). [[#2773](https://github.com/sparklemotion/nokogiri/issues/2773)] + +2e260f53e6b84b8f9c1b115b0ded85eebc8155d7 + + ## 1.14.0 / 2023-01-12 ### Notable Changes diff --git a/Gemfile b/Gemfile index 52c0c43c32..0780fcab12 100644 --- a/Gemfile +++ b/Gemfile @@ -24,6 +24,7 @@ group :development do gem "minitest-reporters", "= 1.5.0" gem "ruby_memcheck", "1.2.0" unless RUBY_PLATFORM == "java" gem "simplecov", "= 0.21.2" + gem "rubyzip", "~> 2.3.2" # rubocop if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION)) diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c index 3844c856a0..ca7240f831 100644 --- a/ext/nokogiri/nokogiri.c +++ b/ext/nokogiri/nokogiri.c @@ -112,8 +112,13 @@ noko_io_write(void *io, char *c_buffer, int c_buffer_len) { VALUE rb_args[2], rb_n_bytes_written; VALUE rb_io = (VALUE)io; - VALUE rb_enc = rb_funcall(rb_io, id_external_encoding, 0); - rb_encoding *io_encoding = RB_NIL_P(rb_enc) ? rb_ascii8bit_encoding() : rb_to_encoding(rb_enc); + VALUE rb_enc = Qnil; + rb_encoding *io_encoding; + + if (rb_respond_to(rb_io, id_external_encoding)) { + rb_enc = rb_funcall(rb_io, id_external_encoding, 0); + } + io_encoding = RB_NIL_P(rb_enc) ? rb_ascii8bit_encoding() : rb_to_encoding(rb_enc); rb_args[0] = rb_io; rb_args[1] = rb_enc_str_new(c_buffer, (long)c_buffer_len, io_encoding); diff --git a/test/xml/test_document_encoding.rb b/test/xml/test_document_encoding.rb index e74592b4fa..cd8b1b4da2 100644 --- a/test/xml/test_document_encoding.rb +++ b/test/xml/test_document_encoding.rb @@ -87,6 +87,41 @@ class TestDocumentEncoding < Nokogiri::TestCase assert_equal(Encoding::UTF_16, output.encoding) assert_equal(utf16_document.bytesize, output.bytesize) end + + describe "pseudo-IO" do + it "serializes correctly with Zip::OutputStream objects" do + # https://github.com/sparklemotion/nokogiri/issues/2773 + require "zip" + + xml = <<~XML + + + A + + XML + + Dir.mktmpdir do |tmpdir| + zipfile_path = File.join(tmpdir, "test.zip") + + Zip::OutputStream.open(zipfile_path) do |io| + io.put_next_entry("test-utf8.xml") + Nokogiri::XML(xml).write_to(io, encoding: "UTF-8") + end + + Zip::InputStream.open(zipfile_path) do |io| + entry = io.get_next_entry + assert_equal("test-utf8.xml", entry.name) + output = io.read + + # no final newline on jruby. descriptive, not prescriptive. + expected_length = Nokogiri.jruby? ? xml.bytesize - 1 : xml.bytesize + + assert_equal(Encoding::UTF_8, output.encoding) + assert_equal(expected_length, output.bytesize) + end + end + end + end end end end