Skip to content

Commit

Permalink
Reject unclosed DOCTYPE on parsing (#153)
Browse files Browse the repository at this point in the history
Fix #152

---------

Co-authored-by: Sutou Kouhei <kou@clear-code.com>
  • Loading branch information
makenowjust and kou committed Jun 19, 2024
1 parent d906ae2 commit f704011
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 15 deletions.
10 changes: 9 additions & 1 deletion lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,12 @@ def pull_event
x, @closed = @closed, nil
return [ :end_element, x ]
end
return [ :end_document ] if empty?
if empty?
if @document_status == :in_doctype
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
end
return [ :end_document ]
end
return @stack.shift if @stack.size > 0
#STDERR.puts @source.encoding
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
Expand Down Expand Up @@ -373,6 +378,9 @@ def pull_event
@document_status = :after_doctype
return [ :end_doctype ]
end
if @document_status == :in_doctype
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
end
end
if @document_status == :after_doctype
@source.match(/\s*/um, true)
Expand Down
23 changes: 9 additions & 14 deletions lib/rexml/parsers/treeparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ def add_listener( listener )

def parse
tag_stack = []
in_doctype = false
entities = nil
begin
while true
Expand All @@ -39,17 +38,15 @@ def parse
tag_stack.pop
@build_context = @build_context.parent
when :text
if not in_doctype
if @build_context[-1].instance_of? Text
@build_context[-1] << event[1]
else
@build_context.add(
Text.new(event[1], @build_context.whitespace, nil, true)
) unless (
@build_context.ignore_whitespace_nodes and
event[1].strip.size==0
)
end
if @build_context[-1].instance_of? Text
@build_context[-1] << event[1]
else
@build_context.add(
Text.new(event[1], @build_context.whitespace, nil, true)
) unless (
@build_context.ignore_whitespace_nodes and
event[1].strip.size==0
)
end
when :comment
c = Comment.new( event[1] )
Expand All @@ -60,14 +57,12 @@ def parse
when :processing_instruction
@build_context.add( Instruction.new( event[1], event[2] ) )
when :end_doctype
in_doctype = false
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
@build_context = @build_context.parent
when :start_doctype
doctype = DocType.new( event[1..-1], @build_context )
@build_context = doctype
entities = {}
in_doctype = true
when :attlistdecl
n = AttlistDecl.new( event[1..-1] )
@build_context.add( n )
Expand Down
45 changes: 45 additions & 0 deletions test/parse/test_document_type_declaration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,51 @@ def test_no_name
end
end

class TestUnclosed < self
def test_no_extra_node
exception = assert_raise(REXML::ParseException) do
REXML::Document.new("<!DOCTYPE foo [")
end
assert_equal(<<~DETAIL.chomp, exception.to_s)
Malformed DOCTYPE: unclosed
Line: 1
Position: 15
Last 80 unconsumed characters:
DETAIL
end

def test_start_element
exception = assert_raise(REXML::ParseException) do
REXML::Document.new(<<~DOCTYPE)
<!DOCTYPE foo [ <r>
DOCTYPE
end
assert_equal(<<~DETAIL.chomp, exception.to_s)
Malformed DOCTYPE: invalid declaration
Line: 1
Position: 20
Last 80 unconsumed characters:
<r>#{' '}
DETAIL
end

def test_text
exception = assert_raise(REXML::ParseException) do
REXML::Document.new(<<~DOCTYPE)
<!DOCTYPE foo [ text
DOCTYPE
end
assert_equal(<<~DETAIL.chomp, exception.to_s)
Malformed DOCTYPE: invalid declaration
Line: 1
Position: 21
Last 80 unconsumed characters:
text#{' '}
DETAIL
end
end

class TestExternalID < self
class TestSystem < self
def test_left_bracket_in_system_literal
Expand Down

0 comments on commit f704011

Please sign in to comment.