From 97cde04bb5ddf150b30087d55a8f1d5fd11e2e92 Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike.dalessio@gmail.com>
Date: Sat, 21 Jan 2023 16:44:26 -0500
Subject: [PATCH 1/3] refactor: test using Encoding instead of names

---
 test/xml/test_document_encoding.rb | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/test/xml/test_document_encoding.rb b/test/xml/test_document_encoding.rb
index 67b5cf05ae..33796e26bd 100644
--- a/test/xml/test_document_encoding.rb
+++ b/test/xml/test_document_encoding.rb
@@ -31,17 +31,19 @@ class TestDocumentEncoding < Nokogiri::TestCase
         end
 
         it "encodes the URL as UTF-8" do
-          assert_equal("UTF-8", shift_jis_document.url.encoding.name)
+          assert_equal(Encoding::UTF_8, shift_jis_document.url.encoding)
         end
 
         it "encodes the encoding name as UTF-8" do
-          assert_equal("UTF-8", shift_jis_document.encoding.encoding.name)
+          assert_equal(Encoding::UTF_8, shift_jis_document.encoding.encoding)
         end
 
         it "encodes the library versions as UTF-8" do
           skip_unless_libxml2
-          assert_equal("UTF-8", Nokogiri::LIBXML_COMPILED_VERSION.encoding.name)
-          assert_equal("UTF-8", Nokogiri::LIBXSLT_COMPILED_VERSION.encoding.name)
+
+          assert_equal(Encoding::UTF_8, Nokogiri::LIBXML_COMPILED_VERSION.encoding)
+          assert_equal(Encoding::UTF_8, Nokogiri::LIBXSLT_COMPILED_VERSION.encoding)
+        end
         end
 
         it "serializes UTF-16 correctly across libxml2 buffer flushes" do

From 9e137bba6557f6f5291b4047b9141f7d1d1d7f14 Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike.dalessio@gmail.com>
Date: Sat, 21 Jan 2023 16:45:43 -0500
Subject: [PATCH 2/3] test: expand UTF-16 testing to JRuby

and make sure we're using the BOM correctly
---
 test/xml/test_document_encoding.rb | 47 ++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 12 deletions(-)

diff --git a/test/xml/test_document_encoding.rb b/test/xml/test_document_encoding.rb
index 33796e26bd..e74592b4fa 100644
--- a/test/xml/test_document_encoding.rb
+++ b/test/xml/test_document_encoding.rb
@@ -8,6 +8,17 @@ class TestDocumentEncoding < Nokogiri::TestCase
       describe "Nokogiri::XML::Document encoding" do
         let(:shift_jis_document) { Nokogiri::XML(File.read(SHIFT_JIS_XML), SHIFT_JIS_XML) }
         let(:ascii_document) { Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) }
+        let(:utf16_document) do
+          # the document needs to be large enough to trigger a libxml2 buffer flush. the buffer size
+          # is determined by MINLEN in xmlIO.c, which is hardcoded to 4000 code points.
+          size = 8000
+          <<~XML.encode(Encoding::UTF_16)
+            <?xml version="1.0" encoding="UTF-16"?>
+            <root>
+              <bar>#{"A" * size}</bar>
+            </root>
+          XML
+        end
 
         describe "#encoding" do
           it "describes the document's encoding correctly" do
@@ -44,25 +55,37 @@ class TestDocumentEncoding < Nokogiri::TestCase
           assert_equal(Encoding::UTF_8, Nokogiri::LIBXML_COMPILED_VERSION.encoding)
           assert_equal(Encoding::UTF_8, Nokogiri::LIBXSLT_COMPILED_VERSION.encoding)
         end
+
+        it "parses and serializes UTF-16 correctly" do
+          xml = <<~XML.encode(Encoding::UTF_16)
+            <?xml version="1.0" encoding="UTF-16"?>
+            <root><bar>A</bar></root>
+          XML
+          output = Nokogiri::XML(xml).to_xml
+          output_doc = Nokogiri::XML(output)
+
+          # these are descriptive, not prescriptive. the difference is whitespace. this may change
+          # as implementations change. the intention is to verify that they're _roughly_ the right
+          # length, they're not zero or half-width or double-width.
+          expected_bytesize = Nokogiri.jruby? ? 132 : 142
+
+          assert_equal(Encoding::UTF_16, output.encoding)
+          assert_equal("UTF-16", output_doc.encoding)
+          assert_equal(expected_bytesize, output.bytesize)
+          output_doc.at_xpath("/root/bar/text()").tap do |node|
+            assert(node, "unexpected DOM structure in #{output.inspect}")
+            assert_equal("A", node.content)
+          end
         end
 
         it "serializes UTF-16 correctly across libxml2 buffer flushes" do
           # https://github.com/sparklemotion/nokogiri/issues/752
           skip_unless_libxml2
 
-          # the document needs to be large enough to trigger a libxml2 buffer flush. the buffer size
-          # is determined by MINLEN in xmlIO.c, which is hardcoded to 4000 code points.
-          size = 8000
-          input = String.new(<<~XML, encoding: "UTF-16")
-            <?xml version="1.0" encoding="UTF-16"?>
-            <root>
-              <bar>#{"A" * size}</bar>
-            </root>
-          XML
-          expected_length = (input.bytesize * 2) + 2 # double character width, add BOM bytes 0xFEFF
+          output = Nokogiri::XML(utf16_document).to_xml
 
-          output = Nokogiri::XML(input).to_xml
-          assert_equal(expected_length, output.bytesize)
+          assert_equal(Encoding::UTF_16, output.encoding)
+          assert_equal(utf16_document.bytesize, output.bytesize)
         end
       end
     end

From 952ff446ad11e07f8b275352d589cb87b0de0779 Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike.dalessio@gmail.com>
Date: Sat, 21 Jan 2023 16:58:23 -0500
Subject: [PATCH 3/3] fix: ensure serialization still works with pseudo-IO
 classes

Fixes #2773
---
 CHANGELOG.md                       |  9 ++++++++
 Gemfile                            |  1 +
 ext/nokogiri/nokogiri.c            |  9 ++++++--
 test/xml/test_document_encoding.rb | 35 ++++++++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index edfe275cce..245f7f225d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,15 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA
 
 ---
 
+## 1.14.1 / unreleased
+
+### Fixed
+
+* Serializing documents now works again with pseudo-IO objects that don't support IO's encoding API (like rubyzip's `Zip::OutputStream`). This was a regression in v1.14.0 due to the fix for [#752](https://github.com/sparklemotion/nokogiri/issues/752) in [#2434](https://github.com/sparklemotion/nokogiri/issues/2434), and was not completely fixed by [#2753](https://github.com/sparklemotion/nokogiri/issues/2753). [[#2773](https://github.com/sparklemotion/nokogiri/issues/2773)]
+
+2e260f53e6b84b8f9c1b115b0ded85eebc8155d7
+
+
 ## 1.14.0 / 2023-01-12
 
 ### Notable Changes
diff --git a/Gemfile b/Gemfile
index 52c0c43c32..0780fcab12 100644
--- a/Gemfile
+++ b/Gemfile
@@ -24,6 +24,7 @@ group :development do
   gem "minitest-reporters", "= 1.5.0"
   gem "ruby_memcheck", "1.2.0" unless RUBY_PLATFORM == "java"
   gem "simplecov", "= 0.21.2"
+  gem "rubyzip", "~> 2.3.2"
 
   # rubocop
   if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c
index 3844c856a0..ca7240f831 100644
--- a/ext/nokogiri/nokogiri.c
+++ b/ext/nokogiri/nokogiri.c
@@ -112,8 +112,13 @@ noko_io_write(void *io, char *c_buffer, int c_buffer_len)
 {
   VALUE rb_args[2], rb_n_bytes_written;
   VALUE rb_io = (VALUE)io;
-  VALUE rb_enc = rb_funcall(rb_io, id_external_encoding, 0);
-  rb_encoding *io_encoding = RB_NIL_P(rb_enc) ? rb_ascii8bit_encoding() : rb_to_encoding(rb_enc);
+  VALUE rb_enc = Qnil;
+  rb_encoding *io_encoding;
+
+  if (rb_respond_to(rb_io, id_external_encoding)) {
+    rb_enc = rb_funcall(rb_io, id_external_encoding, 0);
+  }
+  io_encoding = RB_NIL_P(rb_enc) ? rb_ascii8bit_encoding() : rb_to_encoding(rb_enc);
 
   rb_args[0] = rb_io;
   rb_args[1] = rb_enc_str_new(c_buffer, (long)c_buffer_len, io_encoding);
diff --git a/test/xml/test_document_encoding.rb b/test/xml/test_document_encoding.rb
index e74592b4fa..cd8b1b4da2 100644
--- a/test/xml/test_document_encoding.rb
+++ b/test/xml/test_document_encoding.rb
@@ -87,6 +87,41 @@ class TestDocumentEncoding < Nokogiri::TestCase
           assert_equal(Encoding::UTF_16, output.encoding)
           assert_equal(utf16_document.bytesize, output.bytesize)
         end
+
+        describe "pseudo-IO" do
+          it "serializes correctly with Zip::OutputStream objects" do
+            # https://github.com/sparklemotion/nokogiri/issues/2773
+            require "zip"
+
+            xml = <<~XML
+              <?xml version="1.0" encoding="UTF-8"?>
+              <root>
+                <bar>A</bar>
+              </root>
+            XML
+
+            Dir.mktmpdir do |tmpdir|
+              zipfile_path = File.join(tmpdir, "test.zip")
+
+              Zip::OutputStream.open(zipfile_path) do |io|
+                io.put_next_entry("test-utf8.xml")
+                Nokogiri::XML(xml).write_to(io, encoding: "UTF-8")
+              end
+
+              Zip::InputStream.open(zipfile_path) do |io|
+                entry = io.get_next_entry
+                assert_equal("test-utf8.xml", entry.name)
+                output = io.read
+
+                # no final newline on jruby. descriptive, not prescriptive.
+                expected_length = Nokogiri.jruby? ? xml.bytesize - 1 : xml.bytesize
+
+                assert_equal(Encoding::UTF_8, output.encoding)
+                assert_equal(expected_length, output.bytesize)
+              end
+            end
+          end
+        end
       end
     end
   end