@@ -256,13 +256,22 @@ class BodyPartReader:
256
256
chunk_size = 8192
257
257
258
258
def __init__ (
259
- self , boundary : bytes , headers : "CIMultiDictProxy[str]" , content : StreamReader
259
+ self ,
260
+ boundary : bytes ,
261
+ headers : "CIMultiDictProxy[str]" ,
262
+ content : StreamReader ,
263
+ * ,
264
+ subtype : str = "mixed" ,
265
+ default_charset : Optional [str ] = None ,
260
266
) -> None :
261
267
self .headers = headers
262
268
self ._boundary = boundary
263
269
self ._content = content
270
+ self ._default_charset = default_charset
264
271
self ._at_eof = False
265
- length = self .headers .get (CONTENT_LENGTH , None )
272
+ self ._is_form_data = subtype == "form-data"
273
+ # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
274
+ length = None if self ._is_form_data else self .headers .get (CONTENT_LENGTH , None )
266
275
self ._length = int (length ) if length is not None else None
267
276
self ._read_bytes = 0
268
277
self ._unread : Deque [bytes ] = deque ()
@@ -329,6 +338,8 @@ async def _read_chunk_from_length(self, size: int) -> bytes:
329
338
assert self ._length is not None , "Content-Length required for chunked read"
330
339
chunk_size = min (size , self ._length - self ._read_bytes )
331
340
chunk = await self ._content .read (chunk_size )
341
+ if self ._content .at_eof ():
342
+ self ._at_eof = True
332
343
return chunk
333
344
334
345
async def _read_chunk_from_stream (self , size : int ) -> bytes :
@@ -449,7 +460,8 @@ def decode(self, data: bytes) -> bytes:
449
460
"""
450
461
if CONTENT_TRANSFER_ENCODING in self .headers :
451
462
data = self ._decode_content_transfer (data )
452
- if CONTENT_ENCODING in self .headers :
463
+ # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
464
+ if not self ._is_form_data and CONTENT_ENCODING in self .headers :
453
465
return self ._decode_content (data )
454
466
return data
455
467
@@ -483,7 +495,7 @@ def get_charset(self, default: str) -> str:
483
495
"""Returns charset parameter from Content-Type header or default."""
484
496
ctype = self .headers .get (CONTENT_TYPE , "" )
485
497
mimetype = parse_mimetype (ctype )
486
- return mimetype .parameters .get ("charset" , default )
498
+ return mimetype .parameters .get ("charset" , self . _default_charset or default )
487
499
488
500
@reify
489
501
def name (self ) -> Optional [str ]:
@@ -538,9 +550,17 @@ class MultipartReader:
538
550
part_reader_cls = BodyPartReader
539
551
540
552
def __init__ (self , headers : Mapping [str , str ], content : StreamReader ) -> None :
553
+ self ._mimetype = parse_mimetype (headers [CONTENT_TYPE ])
554
+ assert self ._mimetype .type == "multipart" , "multipart/* content type expected"
555
+ if "boundary" not in self ._mimetype .parameters :
556
+ raise ValueError (
557
+ "boundary missed for Content-Type: %s" % headers [CONTENT_TYPE ]
558
+ )
559
+
541
560
self .headers = headers
542
561
self ._boundary = ("--" + self ._get_boundary ()).encode ()
543
562
self ._content = content
563
+ self ._default_charset : Optional [str ] = None
544
564
self ._last_part : Optional [Union ["MultipartReader" , BodyPartReader ]] = None
545
565
self ._at_eof = False
546
566
self ._at_bof = True
@@ -592,7 +612,24 @@ async def next(
592
612
await self ._read_boundary ()
593
613
if self ._at_eof : # we just read the last boundary, nothing to do there
594
614
return None
595
- self ._last_part = await self .fetch_next_part ()
615
+
616
+ part = await self .fetch_next_part ()
617
+ # https://datatracker.ietf.org/doc/html/rfc7578#section-4.6
618
+ if (
619
+ self ._last_part is None
620
+ and self ._mimetype .subtype == "form-data"
621
+ and isinstance (part , BodyPartReader )
622
+ ):
623
+ _ , params = parse_content_disposition (part .headers .get (CONTENT_DISPOSITION ))
624
+ if params .get ("name" ) == "_charset_" :
625
+ # Longest encoding in https://encoding.spec.whatwg.org/encodings.json
626
+ # is 19 characters, so 32 should be more than enough for any valid encoding.
627
+ charset = await part .read_chunk (32 )
628
+ if len (charset ) > 31 :
629
+ raise RuntimeError ("Invalid default charset" )
630
+ self ._default_charset = charset .strip ().decode ()
631
+ part = await self .fetch_next_part ()
632
+ self ._last_part = part
596
633
return self ._last_part
597
634
598
635
async def release (self ) -> None :
@@ -628,19 +665,16 @@ def _get_part_reader(
628
665
return type (self )(headers , self ._content )
629
666
return self .multipart_reader_cls (headers , self ._content )
630
667
else :
631
- return self .part_reader_cls (self ._boundary , headers , self ._content )
632
-
633
- def _get_boundary (self ) -> str :
634
- mimetype = parse_mimetype (self .headers [CONTENT_TYPE ])
635
-
636
- assert mimetype .type == "multipart" , "multipart/* content type expected"
637
-
638
- if "boundary" not in mimetype .parameters :
639
- raise ValueError (
640
- "boundary missed for Content-Type: %s" % self .headers [CONTENT_TYPE ]
668
+ return self .part_reader_cls (
669
+ self ._boundary ,
670
+ headers ,
671
+ self ._content ,
672
+ subtype = self ._mimetype .subtype ,
673
+ default_charset = self ._default_charset ,
641
674
)
642
675
643
- boundary = mimetype .parameters ["boundary" ]
676
+ def _get_boundary (self ) -> str :
677
+ boundary = self ._mimetype .parameters ["boundary" ]
644
678
if len (boundary ) > 70 :
645
679
raise ValueError ("boundary %r is too long (70 chars max)" % boundary )
646
680
@@ -731,6 +765,7 @@ def __init__(self, subtype: str = "mixed", boundary: Optional[str] = None) -> No
731
765
super ().__init__ (None , content_type = ctype )
732
766
733
767
self ._parts : List [_Part ] = []
768
+ self ._is_form_data = subtype == "form-data"
734
769
735
770
def __enter__ (self ) -> "MultipartWriter" :
736
771
return self
@@ -808,32 +843,36 @@ def append(self, obj: Any, headers: Optional[Mapping[str, str]] = None) -> Paylo
808
843
809
844
def append_payload (self , payload : Payload ) -> Payload :
810
845
"""Adds a new body part to multipart writer."""
811
- # compression
812
- encoding : Optional [str ] = payload .headers .get (
813
- CONTENT_ENCODING ,
814
- "" ,
815
- ).lower ()
816
- if encoding and encoding not in ("deflate" , "gzip" , "identity" ):
817
- raise RuntimeError (f"unknown content encoding: { encoding } " )
818
- if encoding == "identity" :
819
- encoding = None
820
-
821
- # te encoding
822
- te_encoding : Optional [str ] = payload .headers .get (
823
- CONTENT_TRANSFER_ENCODING ,
824
- "" ,
825
- ).lower ()
826
- if te_encoding not in ("" , "base64" , "quoted-printable" , "binary" ):
827
- raise RuntimeError (
828
- "unknown content transfer encoding: {}" "" .format (te_encoding )
846
+ encoding : Optional [str ] = None
847
+ te_encoding : Optional [str ] = None
848
+ if self ._is_form_data :
849
+ # https://datatracker.ietf.org/doc/html/rfc7578#section-4.7
850
+ # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
851
+ assert CONTENT_DISPOSITION in payload .headers
852
+ assert "name=" in payload .headers [CONTENT_DISPOSITION ]
853
+ assert (
854
+ not {CONTENT_ENCODING , CONTENT_LENGTH , CONTENT_TRANSFER_ENCODING }
855
+ & payload .headers .keys ()
829
856
)
830
- if te_encoding == "binary" :
831
- te_encoding = None
832
-
833
- # size
834
- size = payload .size
835
- if size is not None and not (encoding or te_encoding ):
836
- payload .headers [CONTENT_LENGTH ] = str (size )
857
+ else :
858
+ # compression
859
+ encoding = payload .headers .get (CONTENT_ENCODING , "" ).lower ()
860
+ if encoding and encoding not in ("deflate" , "gzip" , "identity" ):
861
+ raise RuntimeError (f"unknown content encoding: { encoding } " )
862
+ if encoding == "identity" :
863
+ encoding = None
864
+
865
+ # te encoding
866
+ te_encoding = payload .headers .get (CONTENT_TRANSFER_ENCODING , "" ).lower ()
867
+ if te_encoding not in ("" , "base64" , "quoted-printable" , "binary" ):
868
+ raise RuntimeError (f"unknown content transfer encoding: { te_encoding } " )
869
+ if te_encoding == "binary" :
870
+ te_encoding = None
871
+
872
+ # size
873
+ size = payload .size
874
+ if size is not None and not (encoding or te_encoding ):
875
+ payload .headers [CONTENT_LENGTH ] = str (size )
837
876
838
877
self ._parts .append ((payload , encoding , te_encoding )) # type: ignore[arg-type]
839
878
return payload
0 commit comments