Skip to content

Commit

Permalink
Speed improvement: cap number of cloned active formatting elements
Browse files Browse the repository at this point in the history
Also saves on memory allocations for empty attribute lists.

Fixes #1613
  • Loading branch information
jhy committed Aug 15, 2021
1 parent 0dcb53a commit d2c455c
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 6 deletions.
5 changes: 5 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ jsoup changelog
* Bugfix [Fuzz]: Fix an IOOB when the HTML root was cleared from the stack and then attributes were merged onto it.
<https://github.com/jhy/jsoup/issues/1611>

* Bugfix [Fuzz]: Improved the speed of parsing when crafted HTML contains hundreds of active formatting elements
that were copied for all new elements (similar to an amplification attack). The number of considered active
formatting elements that will be cloned when mis-nested is now capped to 12.
<https://github.com/jhy/jsoup/issues/1613>

*** Release 1.14.1 [2021-Jul-10]
* Change: updated the minimum supported Java version from Java 7 to Java 8.

Expand Down
1 change: 0 additions & 1 deletion src/main/java/org/jsoup/nodes/Attributes.java
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,6 @@ public void addAll(Attributes incoming) {
// todo - should this be case insensitive?
put(attr);
}

}

public Iterator<Attribute> iterator() {
Expand Down
12 changes: 11 additions & 1 deletion src/main/java/org/jsoup/nodes/Node.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ else if (attributeKey.startsWith("abs:"))
*/
public abstract Attributes attributes();

/**
Get the number of attributes that this Node has.
@return the number of attributes
@since 1.14.2
*/
public int attributesSize() {
// added so that we can test how many attributes exist without implicitly creating the Attributes object
return hasAttributes() ? attributes().size() : 0;
}

/**
* Set an attribute (key=value). If the attribute already exists, it is replaced. The attribute key comparison is
* <b>case insensitive</b>. The key will be set with case sensitivity as set in the parser settings.
Expand All @@ -100,7 +110,7 @@ public Node attr(String attributeKey, String attributeValue) {
}

/**
* Test if this element has an attribute. <b>Case insensitive</b>
* Test if this Node has an attribute. <b>Case insensitive</b>.
* @param attributeKey The attribute key to check.
* @return true if the attribute exists, false if not.
*/
Expand Down
7 changes: 5 additions & 2 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -690,10 +690,11 @@ void reconstructFormattingElements() {

Element entry = last;
int size = formattingElements.size();
int ceil = size - maxUsedFormattingElements; if (ceil <0) ceil = 0;
int pos = size - 1;
boolean skip = false;
while (true) {
if (pos == 0) { // step 4. if none before, skip to 8
if (pos == ceil) { // step 4. if none before, skip to 8
skip = true;
break;
}
Expand All @@ -710,7 +711,8 @@ void reconstructFormattingElements() {
skip = false; // can only skip increment from 4.
Element newEl = insertStartTag(entry.normalName()); // todo: avoid fostering here?
// newEl.namespace(entry.namespace()); // todo: namespaces
newEl.attributes().addAll(entry.attributes());
if (entry.attributesSize() > 0)
newEl.attributes().addAll(entry.attributes());

// 10. replace entry with new entry
formattingElements.set(pos, newEl);
Expand All @@ -720,6 +722,7 @@ void reconstructFormattingElements() {
break;
}
}
private static final int maxUsedFormattingElements = 12; // limit how many elements get recreated

void clearFormattingElementsToLastMarker() {
while (!formattingElements.isEmpty()) {
Expand Down
3 changes: 1 addition & 2 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
Original file line number Diff line number Diff line change
Expand Up @@ -815,8 +815,7 @@ else if (!tb.onStack(formatEl)) {
Element furthestBlock = null;
Element commonAncestor = null;
boolean seenFormattingElement = false;
// the spec doesn't limit to < 64, but in degenerate cases (9000+ stack depth) this prevents
// run-aways
// the spec doesn't limit to < 64, but in degenerate cases (9000+ stack depth) this prevents run-aways
final int stackSize = stack.size();
int bookmark = -1;
for (int si = 1; si < stackSize && si < 64; si++) {
Expand Down
9 changes: 9 additions & 0 deletions src/test/java/org/jsoup/integration/FuzzFixesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,13 @@ public void unconsume() throws IOException {
Document docXml = Jsoup.parse(new FileInputStream(in), "UTF-8", "https://example.com", Parser.xmlParser());
assertNotNull(docXml);
}

@Test
public void test36916() throws IOException {
// https://github.com/jhy/jsoup/issues/1613
File in = ParseTest.getFile("/fuzztests/1613.html.gz");

Document doc = Jsoup.parse(in, "UTF-8");
assertNotNull(doc);
}
}
16 changes: 16 additions & 0 deletions src/test/java/org/jsoup/nodes/ElementTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2039,4 +2039,20 @@ public void childNodesAccessorDoesNotVivify() {
els.add(new Element("a"));
assertEquals(1, els.size());
}

@Test public void attributeSizeDoesNotAutoVivify() {
Document doc = Jsoup.parse("<p></p>");
Element p = doc.selectFirst("p");
assertNotNull(p);
assertFalse(p.hasAttributes());
assertEquals(0, p.attributesSize());
assertFalse(p.hasAttributes());

p.attr("foo", "bar");
assertEquals(1, p.attributesSize());
assertTrue(p.hasAttributes());

p.removeAttr("foo");
assertEquals(0, p.attributesSize());
}
}
Binary file added src/test/resources/fuzztests/1613.html.gz
Binary file not shown.

0 comments on commit d2c455c

Please sign in to comment.