Skip to content

Commit d2c455c

Browse files
committed
Speed improvement: cap number of cloned active formatting elements
Also saves on memory allocations for empty attribute lists. Fixes #1613
1 parent 0dcb53a commit d2c455c

File tree

8 files changed

+47
-6
lines changed

8 files changed

+47
-6
lines changed

CHANGES

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ jsoup changelog
7777
* Bugfix [Fuzz]: Fix an IOOB when the HTML root was cleared from the stack and then attributes were merged onto it.
7878
<https://github.com/jhy/jsoup/issues/1611>
7979

80+
* Bugfix [Fuzz]: Improved the speed of parsing when crafted HTML contains hundreds of active formatting elements
81+
that were copied for all new elements (similar to an amplification attack). The number of considered active
82+
formatting elements that will be cloned when mis-nested is now capped to 12.
83+
<https://github.com/jhy/jsoup/issues/1613>
84+
8085
*** Release 1.14.1 [2021-Jul-10]
8186
* Change: updated the minimum supported Java version from Java 7 to Java 8.
8287

src/main/java/org/jsoup/nodes/Attributes.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,6 @@ public void addAll(Attributes incoming) {
275275
// todo - should this be case insensitive?
276276
put(attr);
277277
}
278-
279278
}
280279

281280
public Iterator<Attribute> iterator() {

src/main/java/org/jsoup/nodes/Node.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ else if (attributeKey.startsWith("abs:"))
8686
*/
8787
public abstract Attributes attributes();
8888

89+
/**
90+
Get the number of attributes that this Node has.
91+
@return the number of attributes
92+
@since 1.14.2
93+
*/
94+
public int attributesSize() {
95+
// added so that we can test how many attributes exist without implicitly creating the Attributes object
96+
return hasAttributes() ? attributes().size() : 0;
97+
}
98+
8999
/**
90100
* Set an attribute (key=value). If the attribute already exists, it is replaced. The attribute key comparison is
91101
* <b>case insensitive</b>. The key will be set with case sensitivity as set in the parser settings.
@@ -100,7 +110,7 @@ public Node attr(String attributeKey, String attributeValue) {
100110
}
101111

102112
/**
103-
* Test if this element has an attribute. <b>Case insensitive</b>
113+
* Test if this Node has an attribute. <b>Case insensitive</b>.
104114
* @param attributeKey The attribute key to check.
105115
* @return true if the attribute exists, false if not.
106116
*/

src/main/java/org/jsoup/parser/HtmlTreeBuilder.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -690,10 +690,11 @@ void reconstructFormattingElements() {
690690

691691
Element entry = last;
692692
int size = formattingElements.size();
693+
int ceil = size - maxUsedFormattingElements; if (ceil <0) ceil = 0;
693694
int pos = size - 1;
694695
boolean skip = false;
695696
while (true) {
696-
if (pos == 0) { // step 4. if none before, skip to 8
697+
if (pos == ceil) { // step 4. if none before, skip to 8
697698
skip = true;
698699
break;
699700
}
@@ -710,7 +711,8 @@ void reconstructFormattingElements() {
710711
skip = false; // can only skip increment from 4.
711712
Element newEl = insertStartTag(entry.normalName()); // todo: avoid fostering here?
712713
// newEl.namespace(entry.namespace()); // todo: namespaces
713-
newEl.attributes().addAll(entry.attributes());
714+
if (entry.attributesSize() > 0)
715+
newEl.attributes().addAll(entry.attributes());
714716

715717
// 10. replace entry with new entry
716718
formattingElements.set(pos, newEl);
@@ -720,6 +722,7 @@ void reconstructFormattingElements() {
720722
break;
721723
}
722724
}
725+
private static final int maxUsedFormattingElements = 12; // limit how many elements get recreated
723726

724727
void clearFormattingElementsToLastMarker() {
725728
while (!formattingElements.isEmpty()) {

src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -815,8 +815,7 @@ else if (!tb.onStack(formatEl)) {
815815
Element furthestBlock = null;
816816
Element commonAncestor = null;
817817
boolean seenFormattingElement = false;
818-
// the spec doesn't limit to < 64, but in degenerate cases (9000+ stack depth) this prevents
819-
// run-aways
818+
// the spec doesn't limit to < 64, but in degenerate cases (9000+ stack depth) this prevents run-aways
820819
final int stackSize = stack.size();
821820
int bookmark = -1;
822821
for (int si = 1; si < stackSize && si < 64; si++) {

src/test/java/org/jsoup/integration/FuzzFixesTest.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,4 +220,13 @@ public void unconsume() throws IOException {
220220
Document docXml = Jsoup.parse(new FileInputStream(in), "UTF-8", "https://example.com", Parser.xmlParser());
221221
assertNotNull(docXml);
222222
}
223+
224+
@Test
225+
public void test36916() throws IOException {
226+
// https://github.com/jhy/jsoup/issues/1613
227+
File in = ParseTest.getFile("/fuzztests/1613.html.gz");
228+
229+
Document doc = Jsoup.parse(in, "UTF-8");
230+
assertNotNull(doc);
231+
}
223232
}

src/test/java/org/jsoup/nodes/ElementTest.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2039,4 +2039,20 @@ public void childNodesAccessorDoesNotVivify() {
20392039
els.add(new Element("a"));
20402040
assertEquals(1, els.size());
20412041
}
2042+
2043+
@Test public void attributeSizeDoesNotAutoVivify() {
2044+
Document doc = Jsoup.parse("<p></p>");
2045+
Element p = doc.selectFirst("p");
2046+
assertNotNull(p);
2047+
assertFalse(p.hasAttributes());
2048+
assertEquals(0, p.attributesSize());
2049+
assertFalse(p.hasAttributes());
2050+
2051+
p.attr("foo", "bar");
2052+
assertEquals(1, p.attributesSize());
2053+
assertTrue(p.hasAttributes());
2054+
2055+
p.removeAttr("foo");
2056+
assertEquals(0, p.attributesSize());
2057+
}
20422058
}
6.92 KB
Binary file not shown.

0 commit comments

Comments
 (0)