Skip to content

Commit 7ccc94d

Browse files
[PR #11139/3dafd4c7 backport][3.12] Fix IOBasePayload reading entire files into memory instead of chunking (#11141)
Co-authored-by: J. Nick Koston <[email protected]> Fixes #11138
1 parent 47bc2a4 commit 7ccc94d

File tree

3 files changed

+165
-4
lines changed

3 files changed

+165
-4
lines changed

CHANGES/11138.bugfix.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fixed ``IOBasePayload`` and ``TextIOPayload`` reading entire files into memory when streaming large files -- by :user:`bdraco`.
2+
3+
When using file-like objects with the aiohttp client, the entire file would be read into memory if the file size was provided in the ``Content-Length`` header. This could cause out-of-memory errors when uploading large files. The payload classes now correctly read data in chunks of ``READ_SIZE`` (64KB) regardless of the total content length.

aiohttp/payload.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,7 @@ def _read_and_available_len(
514514
self._set_or_restore_start_position()
515515
size = self.size # Call size only once since it does I/O
516516
return size, self._value.read(
517-
min(size or READ_SIZE, remaining_content_len or READ_SIZE)
517+
min(READ_SIZE, size or READ_SIZE, remaining_content_len or READ_SIZE)
518518
)
519519

520520
def _read(self, remaining_content_len: Optional[int]) -> bytes:
@@ -617,7 +617,15 @@ async def write_with_length(
617617
return
618618

619619
# Read next chunk
620-
chunk = await loop.run_in_executor(None, self._read, remaining_content_len)
620+
chunk = await loop.run_in_executor(
621+
None,
622+
self._read,
623+
(
624+
min(READ_SIZE, remaining_content_len)
625+
if remaining_content_len is not None
626+
else READ_SIZE
627+
),
628+
)
621629

622630
def _should_stop_writing(
623631
self,
@@ -760,7 +768,7 @@ def _read_and_available_len(
760768
self._set_or_restore_start_position()
761769
size = self.size
762770
chunk = self._value.read(
763-
min(size or READ_SIZE, remaining_content_len or READ_SIZE)
771+
min(READ_SIZE, size or READ_SIZE, remaining_content_len or READ_SIZE)
764772
)
765773
return size, chunk.encode(self._encoding) if self._encoding else chunk.encode()
766774

tests/test_payload.py

Lines changed: 151 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@
66
from collections.abc import AsyncIterator
77
from io import StringIO
88
from pathlib import Path
9-
from typing import Optional, TextIO, Union
9+
from typing import List, Optional, TextIO, Union
1010

1111
import pytest
1212
from multidict import CIMultiDict
1313

1414
from aiohttp import payload
1515
from aiohttp.abc import AbstractStreamWriter
16+
from aiohttp.payload import READ_SIZE
1617

1718

1819
class BufferWriter(AbstractStreamWriter):
@@ -365,6 +366,155 @@ async def test_iobase_payload_exact_chunk_size_limit() -> None:
365366
assert written == data[:chunk_size]
366367

367368

369+
async def test_iobase_payload_reads_in_chunks() -> None:
370+
"""Test IOBasePayload reads data in chunks of READ_SIZE, not all at once."""
371+
# Create a large file that's multiple times larger than READ_SIZE
372+
large_data = b"x" * (READ_SIZE * 3 + 1000) # ~192KB + 1000 bytes
373+
374+
# Mock the file-like object to track read calls
375+
mock_file = unittest.mock.Mock(spec=io.BytesIO)
376+
mock_file.tell.return_value = 0
377+
mock_file.fileno.side_effect = AttributeError # Make size return None
378+
379+
# Track the sizes of read() calls
380+
read_sizes = []
381+
382+
def mock_read(size: int) -> bytes:
383+
read_sizes.append(size)
384+
# Return data based on how many times read was called
385+
call_count = len(read_sizes)
386+
if call_count == 1:
387+
return large_data[:size]
388+
elif call_count == 2:
389+
return large_data[READ_SIZE : READ_SIZE + size]
390+
elif call_count == 3:
391+
return large_data[READ_SIZE * 2 : READ_SIZE * 2 + size]
392+
else:
393+
return large_data[READ_SIZE * 3 :]
394+
395+
mock_file.read.side_effect = mock_read
396+
397+
payload_obj = payload.IOBasePayload(mock_file)
398+
writer = MockStreamWriter()
399+
400+
# Write with a large content_length
401+
await payload_obj.write_with_length(writer, len(large_data))
402+
403+
# Verify that reads were limited to READ_SIZE
404+
assert len(read_sizes) > 1 # Should have multiple reads
405+
for read_size in read_sizes:
406+
assert (
407+
read_size <= READ_SIZE
408+
), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}"
409+
410+
411+
async def test_iobase_payload_large_content_length() -> None:
412+
"""Test IOBasePayload with very large content_length doesn't read all at once."""
413+
data = b"x" * (READ_SIZE + 1000)
414+
415+
# Create a custom file-like object that tracks read sizes
416+
class TrackingBytesIO(io.BytesIO):
417+
def __init__(self, data: bytes) -> None:
418+
super().__init__(data)
419+
self.read_sizes: List[int] = []
420+
421+
def read(self, size: Optional[int] = -1) -> bytes:
422+
self.read_sizes.append(size if size is not None else -1)
423+
return super().read(size)
424+
425+
tracking_file = TrackingBytesIO(data)
426+
payload_obj = payload.IOBasePayload(tracking_file)
427+
writer = MockStreamWriter()
428+
429+
# Write with a very large content_length (simulating the bug scenario)
430+
large_content_length = 10 * 1024 * 1024 # 10MB
431+
await payload_obj.write_with_length(writer, large_content_length)
432+
433+
# Verify no single read exceeded READ_SIZE
434+
for read_size in tracking_file.read_sizes:
435+
assert (
436+
read_size <= READ_SIZE
437+
), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}"
438+
439+
# Verify the correct amount of data was written
440+
assert writer.get_written_bytes() == data
441+
442+
443+
async def test_textio_payload_reads_in_chunks() -> None:
444+
"""Test TextIOPayload reads data in chunks of READ_SIZE, not all at once."""
445+
# Create a large text file that's multiple times larger than READ_SIZE
446+
large_text = "x" * (READ_SIZE * 3 + 1000) # ~192KB + 1000 chars
447+
448+
# Mock the file-like object to track read calls
449+
mock_file = unittest.mock.Mock(spec=io.StringIO)
450+
mock_file.tell.return_value = 0
451+
mock_file.fileno.side_effect = AttributeError # Make size return None
452+
mock_file.encoding = "utf-8"
453+
454+
# Track the sizes of read() calls
455+
read_sizes = []
456+
457+
def mock_read(size: int) -> str:
458+
read_sizes.append(size)
459+
# Return data based on how many times read was called
460+
call_count = len(read_sizes)
461+
if call_count == 1:
462+
return large_text[:size]
463+
elif call_count == 2:
464+
return large_text[READ_SIZE : READ_SIZE + size]
465+
elif call_count == 3:
466+
return large_text[READ_SIZE * 2 : READ_SIZE * 2 + size]
467+
else:
468+
return large_text[READ_SIZE * 3 :]
469+
470+
mock_file.read.side_effect = mock_read
471+
472+
payload_obj = payload.TextIOPayload(mock_file)
473+
writer = MockStreamWriter()
474+
475+
# Write with a large content_length
476+
await payload_obj.write_with_length(writer, len(large_text.encode("utf-8")))
477+
478+
# Verify that reads were limited to READ_SIZE
479+
assert len(read_sizes) > 1 # Should have multiple reads
480+
for read_size in read_sizes:
481+
assert (
482+
read_size <= READ_SIZE
483+
), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}"
484+
485+
486+
async def test_textio_payload_large_content_length() -> None:
487+
"""Test TextIOPayload with very large content_length doesn't read all at once."""
488+
text_data = "x" * (READ_SIZE + 1000)
489+
490+
# Create a custom file-like object that tracks read sizes
491+
class TrackingStringIO(io.StringIO):
492+
def __init__(self, data: str) -> None:
493+
super().__init__(data)
494+
self.read_sizes: List[int] = []
495+
496+
def read(self, size: Optional[int] = -1) -> str:
497+
self.read_sizes.append(size if size is not None else -1)
498+
return super().read(size)
499+
500+
tracking_file = TrackingStringIO(text_data)
501+
payload_obj = payload.TextIOPayload(tracking_file)
502+
writer = MockStreamWriter()
503+
504+
# Write with a very large content_length (simulating the bug scenario)
505+
large_content_length = 10 * 1024 * 1024 # 10MB
506+
await payload_obj.write_with_length(writer, large_content_length)
507+
508+
# Verify no single read exceeded READ_SIZE
509+
for read_size in tracking_file.read_sizes:
510+
assert (
511+
read_size <= READ_SIZE
512+
), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}"
513+
514+
# Verify the correct amount of data was written
515+
assert writer.get_written_bytes() == text_data.encode("utf-8")
516+
517+
368518
async def test_async_iterable_payload_write_with_length_no_limit() -> None:
369519
"""Test AsyncIterablePayload writing with no content length limit."""
370520

0 commit comments

Comments
 (0)