Skip to content

Commit c58112a

Browse files
committed
Set the read size correctly when capped
The read size of the inputstream should be the desired remaining max (if set), but no larger than the defined buffer size. Fixes #1807 See #1774, 1671
1 parent fa13c80 commit c58112a

File tree

3 files changed

+56
-3
lines changed

3 files changed

+56
-3
lines changed

CHANGES

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ Release 1.15.3 [PENDING]
44
* Improvement: the Cleaner will preserve the source position of cleaned elements, if source tracking is enabled in the
55
original parse.
66

7+
* Bugfix: the DataUtil would incorrectly read from InputStreams that emitted reads less than the requested size. This
8+
lead to incorrect results when parsing from chunked server responses, for e.g.
9+
<https://github.com/jhy/jsoup/issues/1807>
10+
711
* Build Improvement: added implementation version and related fields to the jar manifest.
812
<https://github.com/jhy/jsoup/issues/1809>
913

src/main/java/org/jsoup/internal/ConstrainableInputStream.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,16 @@ public ByteBuffer readToByteBuffer(int max) throws IOException {
8181
final ByteArrayOutputStream outStream = new ByteArrayOutputStream(bufferSize);
8282

8383
int read;
84+
int remaining = max;
8485
while (true) {
85-
read = read(readBuffer, 0, bufferSize);
86+
read = read(readBuffer, 0, localCapped ? Math.min(remaining, bufferSize) : bufferSize);
8687
if (read == -1) break;
8788
if (localCapped) { // this local byteBuffer cap may be smaller than the overall maxSize (like when reading first bytes)
88-
if (read >= max) {
89-
outStream.write(readBuffer, 0, max);
89+
if (read >= remaining) {
90+
outStream.write(readBuffer, 0, remaining);
9091
break;
9192
}
93+
remaining -= read;
9294
}
9395
outStream.write(readBuffer, 0, read);
9496
}

src/test/java/org/jsoup/helper/DataUtilTest.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
package org.jsoup.helper;
22

33
import org.jsoup.Jsoup;
4+
import org.jsoup.integration.ParseTest;
45
import org.jsoup.nodes.Document;
56
import org.jsoup.parser.Parser;
67
import org.junit.jupiter.api.Test;
78

89
import java.io.*;
10+
import java.nio.ByteBuffer;
911
import java.nio.charset.Charset;
1012
import java.nio.charset.StandardCharsets;
1113
import java.nio.file.Files;
@@ -228,4 +230,49 @@ public void handlesFakeGzipFile() throws IOException {
228230
assertEquals("This is not gzipped", doc.title());
229231
assertEquals("And should still be readable.", doc.selectFirst("p").text());
230232
}
233+
234+
// an input stream to give a range of output sizes, that changes on each read
235+
static class VaryingReadInputStream extends InputStream {
236+
final InputStream in;
237+
int stride = 0;
238+
239+
VaryingReadInputStream(InputStream in) {
240+
this.in = in;
241+
}
242+
243+
public int read() throws IOException {
244+
return in.read();
245+
}
246+
247+
public int read(byte[] b) throws IOException {
248+
return in.read(b, 0, Math.min(b.length, ++stride));
249+
}
250+
251+
public int read(byte[] b, int off, int len) throws IOException {
252+
return in.read(b, off, Math.min(len, ++stride));
253+
}
254+
}
255+
256+
@Test
257+
void handlesChunkedInputStream() throws IOException {
258+
File inputFile = ParseTest.getFile("/htmltests/large.html");
259+
String input = ParseTest.getFileAsString(inputFile);
260+
VaryingReadInputStream stream = new VaryingReadInputStream(ParseTest.inputStreamFrom(input));
261+
262+
Document expected = Jsoup.parse(input, "https://example.com");
263+
Document doc = Jsoup.parse(stream, null, "https://example.com");
264+
assertTrue(doc.hasSameValue(expected));
265+
}
266+
267+
@Test
268+
void handlesUnlimitedRead() throws IOException {
269+
File inputFile = ParseTest.getFile("/htmltests/large.html");
270+
String input = ParseTest.getFileAsString(inputFile);
271+
VaryingReadInputStream stream = new VaryingReadInputStream(ParseTest.inputStreamFrom(input));
272+
273+
ByteBuffer byteBuffer = DataUtil.readToByteBuffer(stream, 0);
274+
String read = new String(byteBuffer.array());
275+
276+
assertEquals(input, read);
277+
}
231278
}

0 commit comments

Comments
 (0)