From 41e13e9c9fb5435d0c1d4230484a1a0f335fa40d Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Wed, 5 Jun 2024 16:49:23 +0900 Subject: [PATCH 1/5] Improve `Node#each_recursive` performance Add `benchmark/each_recursive.yaml` Rewrite `Node#each_recursive` implementation for performance Add a test for `Node#each_recursive` --- benchmark/each_recursive.yaml | 40 +++++++++++++++++++++++++++++++++++ lib/rexml/node.rb | 12 +++++++---- test/test_document.rb | 28 ++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 4 deletions(-) create mode 100644 benchmark/each_recursive.yaml diff --git a/benchmark/each_recursive.yaml b/benchmark/each_recursive.yaml new file mode 100644 index 00000000..c745f8ce --- /dev/null +++ b/benchmark/each_recursive.yaml @@ -0,0 +1,40 @@ +loop_count: 100 +contexts: + - gems: + rexml: 3.2.6 + require: false + prelude: require 'rexml' + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("lib")) + require 'rexml' + - name: 3.2.6(YJIT) + gems: + rexml: 3.2.6 + require: false + prelude: | + require 'rexml' + RubyVM::YJIT.enable + - name: master(YJIT) + prelude: | + $LOAD_PATH.unshift(File.expand_path("lib")) + require 'rexml' + RubyVM::YJIT.enable + +prelude: | + require 'rexml/document' + + xml_source = +"" + 100.times do + x_node_source = "" + 100.times do + x_node_source = "#{x_node_source}" + end + xml_source << x_node_source + end + xml_source << "" + + document = REXML::Document.new(xml_source) + +benchmark: + each_recursive: document.each_recursive { |_| } diff --git a/lib/rexml/node.rb b/lib/rexml/node.rb index 081caba6..c771db70 100644 --- a/lib/rexml/node.rb +++ b/lib/rexml/node.rb @@ -52,10 +52,14 @@ def parent? # Visit all subnodes of +self+ recursively def each_recursive(&block) # :yields: node - self.elements.each {|node| - block.call(node) - node.each_recursive(&block) - } + stack = [] + each { |child| stack.unshift child if child.node_type == :element } + until stack.empty? + child = stack.pop + yield child + n = stack.size + child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element } + end end # Find (and return) first subnode (recursively) for which the block diff --git a/test/test_document.rb b/test/test_document.rb index f96bfd5d..3c72971d 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -417,6 +417,34 @@ def test_utf_16 assert_equal(expected_xml, actual_xml) end end + + class EachRecursiveTest < Test::Unit::TestCase + def test_each_recursive + xml_source = +'' + xml_source << '' + + 100.times do + x_node_source = '' + 100.times do + x_node_source = "#{x_node_source}" + end + xml_source << x_node_source + end + + xml_source << '' + xml_source << '' + + xml_source << '' + + document = REXML::Document.new(xml_source) + + count = 0 + document.each_recursive { count += 1 } + # Node#each_recursive iterates elements only. + # This does not iterate XML declerations, comments, attributes, CDATA sections, etc. + assert_equal(100 * 100 + 1, count) + end + end end end end From 5d93a49f7fed048e4e3efcc2ae960146da49d152 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Thu, 6 Jun 2024 13:16:41 +0900 Subject: [PATCH 2/5] Update `each_recursive` test Use the small iteration numbers for test. Check names instead of counting elements. https://github.com/ruby/rexml/pull/139#discussion_r1628599819 https://github.com/ruby/rexml/pull/139#discussion_r1628600815 --- test/test_document.rb | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/test/test_document.rb b/test/test_document.rb index 3c72971d..e5a42a6c 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -421,13 +421,19 @@ def test_utf_16 class EachRecursiveTest < Test::Unit::TestCase def test_each_recursive xml_source = +'' - xml_source << '' + xml_source << '' - 100.times do + expected_names = ["root"] + + 2.times do |x| x_node_source = '' - 100.times do - x_node_source = "#{x_node_source}" + inner_names = [] + 3.times do |y| + name = "#{x}_#{y}" + x_node_source = "#{x_node_source}" + inner_names << name end + expected_names.concat(inner_names.reverse) xml_source << x_node_source end @@ -438,11 +444,13 @@ def test_each_recursive document = REXML::Document.new(xml_source) - count = 0 - document.each_recursive { count += 1 } # Node#each_recursive iterates elements only. # This does not iterate XML declerations, comments, attributes, CDATA sections, etc. - assert_equal(100 * 100 + 1, count) + actual_names = [] + document.each_recursive do |element| + actual_names << element.attributes["name"] + end + assert_equal(expected_names, actual_names) end end end From 3640dccc2bd3d3de11b75fd21f230232e4ccf2a5 Mon Sep 17 00:00:00 2001 From: Hiroya Fujinami Date: Thu, 6 Jun 2024 16:58:21 +0900 Subject: [PATCH 3/5] Update test/test_document.rb Co-authored-by: Sutou Kouhei --- test/test_document.rb | 44 ++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/test/test_document.rb b/test/test_document.rb index e5a42a6c..ded26c54 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -420,27 +420,29 @@ def test_utf_16 class EachRecursiveTest < Test::Unit::TestCase def test_each_recursive - xml_source = +'' - xml_source << '' - - expected_names = ["root"] - - 2.times do |x| - x_node_source = '' - inner_names = [] - 3.times do |y| - name = "#{x}_#{y}" - x_node_source = "#{x_node_source}" - inner_names << name - end - expected_names.concat(inner_names.reverse) - xml_source << x_node_source - end - - xml_source << '' - xml_source << '' - - xml_source << '' + xml_source = <<-XML + + + + + + + + + + + + + + + + XML + + expected_names = [ + "root", + "0_2", "0_1", "0_0", + "1_2", "1_1", "1_0", + ] document = REXML::Document.new(xml_source) From 81f72277db0ff8e93e6fedad0d3d6991b22eee52 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Thu, 6 Jun 2024 17:24:24 +0900 Subject: [PATCH 4/5] Fix some code stylings --- test/test_document.rb | 46 +++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/test/test_document.rb b/test/test_document.rb index ded26c54..2699b4e0 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -420,29 +420,29 @@ def test_utf_16 class EachRecursiveTest < Test::Unit::TestCase def test_each_recursive - xml_source = <<-XML - - - - - - - - - - - - - - - - XML - - expected_names = [ - "root", - "0_2", "0_1", "0_0", - "1_2", "1_1", "1_0", - ] + xml_source = <<~XML + + + + + + + + + + + + + + + + XML + + expected_names = %w[ + root + 1_1 1_2 1_3 + 2_1 2_2 2_3 + ] document = REXML::Document.new(xml_source) From f6efe5f7936686cbd4183aae2d8298f3b69eba27 Mon Sep 17 00:00:00 2001 From: Hiroya Fujinami Date: Fri, 7 Jun 2024 11:12:15 +0900 Subject: [PATCH 5/5] Move `test_each_recursive` Co-authored-by: Sutou Kouhei --- test/test_document.rb | 74 +++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/test/test_document.rb b/test/test_document.rb index 2699b4e0..23e04ad0 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -209,6 +209,42 @@ def test_gt_linear_performance end end + def test_each_recursive + xml_source = <<~XML + + + + + + + + + + + + + + + + XML + + expected_names = %w[ + root + 1_1 1_2 1_3 + 2_1 2_2 2_3 + ] + + document = REXML::Document.new(xml_source) + + # Node#each_recursive iterates elements only. + # This does not iterate XML declerations, comments, attributes, CDATA sections, etc. + actual_names = [] + document.each_recursive do |element| + actual_names << element.attributes["name"] + end + assert_equal(expected_names, actual_names) + end + class WriteTest < Test::Unit::TestCase def setup @document = REXML::Document.new(<<-EOX) @@ -417,44 +453,6 @@ def test_utf_16 assert_equal(expected_xml, actual_xml) end end - - class EachRecursiveTest < Test::Unit::TestCase - def test_each_recursive - xml_source = <<~XML - - - - - - - - - - - - - - - - XML - - expected_names = %w[ - root - 1_1 1_2 1_3 - 2_1 2_2 2_3 - ] - - document = REXML::Document.new(xml_source) - - # Node#each_recursive iterates elements only. - # This does not iterate XML declerations, comments, attributes, CDATA sections, etc. - actual_names = [] - document.each_recursive do |element| - actual_names << element.attributes["name"] - end - assert_equal(expected_names, actual_names) - end - end end end end