diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 5791ab1d..a003ac29 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -204,6 +204,8 @@ def peek depth=0 # Returns the next event. This is a +PullEvent+ object. def pull + @source.drop_parsed_content + pull_event.tap do |event| @listeners.each do |listener| listener.receive event diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 67154832..216e6157 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -55,6 +55,7 @@ class Source attr_reader :encoding module Private + SCANNER_RESET_SIZE = 100000 PRE_DEFINED_TERM_PATTERNS = {} pre_defined_terms = ["'", '"', "<"] pre_defined_terms.each do |term| @@ -84,6 +85,12 @@ def buffer @scanner.rest end + def drop_parsed_content + if @scanner.pos > SCANNER_RESET_SIZE + @scanner.string = @scanner.rest + end + end + def buffer_encoding=(encoding) @scanner.string.force_encoding(encoding) end diff --git a/test/test_pullparser.rb b/test/test_pullparser.rb index 53a985ba..e982776f 100644 --- a/test/test_pullparser.rb +++ b/test/test_pullparser.rb @@ -98,5 +98,28 @@ def test_peek end assert_equal( 0, names.length ) end + + N_ELEMENTS = 50000 + N_STRING = 'a' * 50000 + def build_xml(n_elements) + xml = '' + + n_elements.times do |i| + xml << '' + xml << N_STRING + xml << '' + end + xml << '' + end + + # NOTE: this test is too slow. + def test_parse_large_xml + xml = build_xml(N_ELEMENTS) + + parser = REXML::Parsers::PullParser.new(xml) + while parser.has_next? + parser.pull + end + end end end