Index: aiohttp-3.8.5/aiohttp/formdata.py =================================================================== --- aiohttp-3.8.5.orig/aiohttp/formdata.py +++ aiohttp-3.8.5/aiohttp/formdata.py @@ -1,4 +1,5 @@ import io +import warnings from typing import Any, Iterable, List, Optional from urllib.parse import urlencode @@ -53,7 +54,12 @@ class FormData: if isinstance(value, io.IOBase): self._is_multipart = True elif isinstance(value, (bytes, bytearray, memoryview)): + msg = ( + "In v4, passing bytes will no longer create a file field. " + "Please explicitly use the filename parameter or pass a BytesIO object." + ) if filename is None and content_transfer_encoding is None: + warnings.warn(msg, DeprecationWarning) filename = name type_options: MultiDict[str] = MultiDict({"name": name}) @@ -81,7 +87,11 @@ class FormData: "content_transfer_encoding must be an instance" " of str. Got: %s" % content_transfer_encoding ) - headers[hdrs.CONTENT_TRANSFER_ENCODING] = content_transfer_encoding + msg = ( + "content_transfer_encoding is deprecated. " + "To maintain compatibility with v4 please pass a BytesPayload." + ) + warnings.warn(msg, DeprecationWarning) self._is_multipart = True self._fields.append((type_options, headers, value)) Index: aiohttp-3.8.5/aiohttp/multipart.py =================================================================== --- aiohttp-3.8.5.orig/aiohttp/multipart.py +++ aiohttp-3.8.5/aiohttp/multipart.py @@ -255,13 +255,22 @@ class BodyPartReader: chunk_size = 8192 def __init__( - self, boundary: bytes, headers: "CIMultiDictProxy[str]", content: StreamReader + self, + boundary: bytes, + headers: "CIMultiDictProxy[str]", + content: StreamReader, + *, + subtype: str = "mixed", + default_charset: Optional[str] = None, ) -> None: self.headers = headers self._boundary = boundary self._content = content + self._default_charset = default_charset self._at_eof = False - length = self.headers.get(CONTENT_LENGTH, None) + self._is_form_data = subtype == "form-data" + # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8 + length = None if self._is_form_data else self.headers.get(CONTENT_LENGTH, None) self._length = int(length) if length is not None else None self._read_bytes = 0 # TODO: typeing.Deque is not supported by Python 3.5 @@ -329,6 +338,8 @@ class BodyPartReader: assert self._length is not None, "Content-Length required for chunked read" chunk_size = min(size, self._length - self._read_bytes) chunk = await self._content.read(chunk_size) + if self._content.at_eof(): + self._at_eof = True return chunk async def _read_chunk_from_stream(self, size: int) -> bytes: @@ -444,7 +455,8 @@ class BodyPartReader: """ if CONTENT_TRANSFER_ENCODING in self.headers: data = self._decode_content_transfer(data) - if CONTENT_ENCODING in self.headers: + # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8 + if not self._is_form_data and CONTENT_ENCODING in self.headers: return self._decode_content(data) return data @@ -478,7 +490,7 @@ class BodyPartReader: """Returns charset parameter from Content-Type header or default.""" ctype = self.headers.get(CONTENT_TYPE, "") mimetype = parse_mimetype(ctype) - return mimetype.parameters.get("charset", default) + return mimetype.parameters.get("charset", self._default_charset or default) @reify def name(self) -> Optional[str]: @@ -533,9 +545,17 @@ class MultipartReader: part_reader_cls = BodyPartReader def __init__(self, headers: Mapping[str, str], content: StreamReader) -> None: + self._mimetype = parse_mimetype(headers[CONTENT_TYPE]) + assert self._mimetype.type == "multipart", "multipart/* content type expected" + if "boundary" not in self._mimetype.parameters: + raise ValueError( + "boundary missed for Content-Type: %s" % headers[CONTENT_TYPE] + ) + self.headers = headers self._boundary = ("--" + self._get_boundary()).encode() self._content = content + self._default_charset: Optional[str] = None self._last_part: Optional[Union["MultipartReader", BodyPartReader]] = None self._at_eof = False self._at_bof = True @@ -587,7 +607,24 @@ class MultipartReader: await self._read_boundary() if self._at_eof: # we just read the last boundary, nothing to do there return None - self._last_part = await self.fetch_next_part() + + part = await self.fetch_next_part() + # https://datatracker.ietf.org/doc/html/rfc7578#section-4.6 + if ( + self._last_part is None + and self._mimetype.subtype == "form-data" + and isinstance(part, BodyPartReader) + ): + _, params = parse_content_disposition(part.headers.get(CONTENT_DISPOSITION)) + if params.get("name") == "_charset_": + # Longest encoding in https://encoding.spec.whatwg.org/encodings.json + # is 19 characters, so 32 should be more than enough for any valid encoding. + charset = await part.read_chunk(32) + if len(charset) > 31: + raise RuntimeError("Invalid default charset") + self._default_charset = charset.strip().decode() + part = await self.fetch_next_part() + self._last_part = part return self._last_part async def release(self) -> None: @@ -623,19 +660,16 @@ class MultipartReader: return type(self)(headers, self._content) return self.multipart_reader_cls(headers, self._content) else: - return self.part_reader_cls(self._boundary, headers, self._content) - - def _get_boundary(self) -> str: - mimetype = parse_mimetype(self.headers[CONTENT_TYPE]) - - assert mimetype.type == "multipart", "multipart/* content type expected" - - if "boundary" not in mimetype.parameters: - raise ValueError( - "boundary missed for Content-Type: %s" % self.headers[CONTENT_TYPE] + return self.part_reader_cls( + self._boundary, + headers, + self._content, + subtype=self._mimetype.subtype, + default_charset=self._default_charset, ) - boundary = mimetype.parameters["boundary"] + def _get_boundary(self) -> str: + boundary = self._mimetype.parameters["boundary"] if len(boundary) > 70: raise ValueError("boundary %r is too long (70 chars max)" % boundary) @@ -726,6 +760,7 @@ class MultipartWriter(Payload): super().__init__(None, content_type=ctype) self._parts: List[_Part] = [] + self._is_form_data = subtype == "form-data" def __enter__(self) -> "MultipartWriter": return self @@ -803,32 +838,38 @@ class MultipartWriter(Payload): def append_payload(self, payload: Payload) -> Payload: """Adds a new body part to multipart writer.""" - # compression - encoding: Optional[str] = payload.headers.get( - CONTENT_ENCODING, - "", - ).lower() - if encoding and encoding not in ("deflate", "gzip", "identity"): - raise RuntimeError(f"unknown content encoding: {encoding}") - if encoding == "identity": - encoding = None - - # te encoding - te_encoding: Optional[str] = payload.headers.get( - CONTENT_TRANSFER_ENCODING, - "", - ).lower() - if te_encoding not in ("", "base64", "quoted-printable", "binary"): - raise RuntimeError( - "unknown content transfer encoding: {}" "".format(te_encoding) + encoding: Optional[str] = None + te_encoding: Optional[str] = None + if self._is_form_data: + # https://datatracker.ietf.org/doc/html/rfc7578#section-4.7 + # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8 + assert ( + not {CONTENT_ENCODING, CONTENT_LENGTH, CONTENT_TRANSFER_ENCODING} + & payload.headers.keys() ) - if te_encoding == "binary": - te_encoding = None - - # size - size = payload.size - if size is not None and not (encoding or te_encoding): - payload.headers[CONTENT_LENGTH] = str(size) + # Set default Content-Disposition in case user doesn't create one + if CONTENT_DISPOSITION not in payload.headers: + name = f"section-{len(self._parts)}" + payload.set_content_disposition("form-data", name=name) + else: + # compression + encoding = payload.headers.get(CONTENT_ENCODING, "").lower() + if encoding and encoding not in ("deflate", "gzip", "identity"): + raise RuntimeError(f"unknown content encoding: {encoding}") + if encoding == "identity": + encoding = None + + # te encoding + te_encoding = payload.headers.get(CONTENT_TRANSFER_ENCODING, "").lower() + if te_encoding not in ("", "base64", "quoted-printable", "binary"): + raise RuntimeError(f"unknown content transfer encoding: {te_encoding}") + if te_encoding == "binary": + te_encoding = None + + # size + size = payload.size + if size is not None and not (encoding or te_encoding): + payload.headers[CONTENT_LENGTH] = str(size) self._parts.append((payload, encoding, te_encoding)) # type: ignore[arg-type] return payload @@ -886,6 +927,11 @@ class MultipartWriter(Payload): async def write(self, writer: Any, close_boundary: bool = True) -> None: """Write body.""" for part, encoding, te_encoding in self._parts: + if self._is_form_data: + # https://datatracker.ietf.org/doc/html/rfc7578#section-4.2 + assert CONTENT_DISPOSITION in part.headers + assert "name=" in part.headers[CONTENT_DISPOSITION] + await writer.write(b"--" + self._boundary + b"\r\n") await writer.write(part._binary_headers) Index: aiohttp-3.8.5/tests/test_client_functional.py =================================================================== --- aiohttp-3.8.5.orig/tests/test_client_functional.py +++ aiohttp-3.8.5/tests/test_client_functional.py @@ -1158,48 +1158,6 @@ async def test_POST_DATA_with_charset_po resp.close() -async def test_POST_DATA_with_context_transfer_encoding(aiohttp_client) -> None: - async def handler(request): - data = await request.post() - assert data["name"] == "text" - return web.Response(text=data["name"]) - - app = web.Application() - app.router.add_post("/", handler) - client = await aiohttp_client(app) - - form = aiohttp.FormData() - form.add_field("name", "text", content_transfer_encoding="base64") - - resp = await client.post("/", data=form) - assert 200 == resp.status - content = await resp.text() - assert content == "text" - resp.close() - - -async def test_POST_DATA_with_content_type_context_transfer_encoding(aiohttp_client): - async def handler(request): - data = await request.post() - assert data["name"] == "text" - return web.Response(body=data["name"]) - - app = web.Application() - app.router.add_post("/", handler) - client = await aiohttp_client(app) - - form = aiohttp.FormData() - form.add_field( - "name", "text", content_type="text/plain", content_transfer_encoding="base64" - ) - - resp = await client.post("/", data=form) - assert 200 == resp.status - content = await resp.text() - assert content == "text" - resp.close() - - async def test_POST_MultiDict(aiohttp_client) -> None: async def handler(request): data = await request.post() @@ -1249,7 +1207,7 @@ async def test_POST_FILES(aiohttp_client client = await aiohttp_client(app) with fname.open("rb") as f: - resp = await client.post("/", data={"some": f, "test": b"data"}, chunked=True) + resp = await client.post("/", data={"some": f, "test": io.BytesIO(b"data")}, chunked=True) assert 200 == resp.status resp.close() Index: aiohttp-3.8.5/tests/test_multipart.py =================================================================== --- aiohttp-3.8.5.orig/tests/test_multipart.py +++ aiohttp-3.8.5/tests/test_multipart.py @@ -942,6 +942,58 @@ class TestMultipartReader: assert first.at_eof() assert not second.at_eof() + async def test_read_form_default_encoding(self) -> None: + with Stream( + b"--:\r\n" + b'Content-Disposition: form-data; name="_charset_"\r\n\r\n' + b"ascii" + b"\r\n" + b"--:\r\n" + b'Content-Disposition: form-data; name="field1"\r\n\r\n' + b"foo" + b"\r\n" + b"--:\r\n" + b"Content-Type: text/plain;charset=UTF-8\r\n" + b'Content-Disposition: form-data; name="field2"\r\n\r\n' + b"foo" + b"\r\n" + b"--:\r\n" + b'Content-Disposition: form-data; name="field3"\r\n\r\n' + b"foo" + b"\r\n" + ) as stream: + reader = aiohttp.MultipartReader( + {CONTENT_TYPE: 'multipart/form-data;boundary=":"'}, + stream, + ) + field1 = await reader.next() + assert field1.name == "field1" + assert field1.get_charset("default") == "ascii" + field2 = await reader.next() + assert field2.name == "field2" + assert field2.get_charset("default") == "UTF-8" + field3 = await reader.next() + assert field3.name == "field3" + assert field3.get_charset("default") == "ascii" + + async def test_read_form_invalid_default_encoding(self) -> None: + with Stream( + b"--:\r\n" + b'Content-Disposition: form-data; name="_charset_"\r\n\r\n' + b"this-value-is-too-long-to-be-a-charset" + b"\r\n" + b"--:\r\n" + b'Content-Disposition: form-data; name="field1"\r\n\r\n' + b"foo" + b"\r\n" + ) as stream: + reader = aiohttp.MultipartReader( + {CONTENT_TYPE: 'multipart/form-data;boundary=":"'}, + stream, + ) + with pytest.raises(RuntimeError, match="Invalid default charset"): + await reader.next() + async def test_writer(writer) -> None: assert writer.size == 7 @@ -1228,6 +1280,25 @@ class TestMultipartWriter: part = writer._parts[0][0] assert part.headers[CONTENT_TYPE] == "test/passed" + def test_set_content_disposition_after_append(self): + writer = aiohttp.MultipartWriter("form-data") + part = writer.append("some-data") + part.set_content_disposition("form-data", name="method") + assert 'name="method"' in part.headers[CONTENT_DISPOSITION] + + def test_automatic_content_disposition(self): + writer = aiohttp.MultipartWriter("form-data") + writer.append_json(()) + part = payload.StringPayload("foo") + part.set_content_disposition("form-data", name="second") + writer.append_payload(part) + writer.append("foo") + + disps = tuple(p[0].headers[CONTENT_DISPOSITION] for p in writer._parts) + assert 'name="section-0"' in disps[0] + assert 'name="second"' in disps[1] + assert 'name="section-2"' in disps[2] + def test_with(self) -> None: with aiohttp.MultipartWriter(boundary=":") as writer: writer.append("foo") @@ -1278,7 +1349,6 @@ class TestMultipartWriter: CONTENT_TYPE: "text/python", }, ) - content_length = part.size await writer.write(stream) assert part.headers[CONTENT_TYPE] == "text/python" @@ -1289,9 +1359,7 @@ class TestMultipartWriter: assert headers == ( b"--:\r\n" b"Content-Type: text/python\r\n" - b'Content-Disposition: attachments; filename="bug.py"\r\n' - b"Content-Length: %s" - b"" % (str(content_length).encode(),) + b'Content-Disposition: attachments; filename="bug.py"' ) async def test_set_content_disposition_override(self, buf, stream): @@ -1305,7 +1373,6 @@ class TestMultipartWriter: CONTENT_TYPE: "text/python", }, ) - content_length = part.size await writer.write(stream) assert part.headers[CONTENT_TYPE] == "text/python" @@ -1316,9 +1383,7 @@ class TestMultipartWriter: assert headers == ( b"--:\r\n" b"Content-Type: text/python\r\n" - b'Content-Disposition: attachments; filename="bug.py"\r\n' - b"Content-Length: %s" - b"" % (str(content_length).encode(),) + b'Content-Disposition: attachments; filename="bug.py"' ) async def test_reset_content_disposition_header(self, buf, stream): @@ -1330,8 +1395,6 @@ class TestMultipartWriter: headers={CONTENT_TYPE: "text/plain"}, ) - content_length = part.size - assert CONTENT_DISPOSITION in part.headers part.set_content_disposition("attachments", filename="bug.py") @@ -1344,9 +1407,7 @@ class TestMultipartWriter: b"--:\r\n" b"Content-Type: text/plain\r\n" b"Content-Disposition:" - b' attachments; filename="bug.py"\r\n' - b"Content-Length: %s" - b"" % (str(content_length).encode(),) + b' attachments; filename="bug.py"' ) Index: aiohttp-3.8.5/tests/test_web_functional.py =================================================================== --- aiohttp-3.8.5.orig/tests/test_web_functional.py +++ aiohttp-3.8.5/tests/test_web_functional.py @@ -34,7 +34,8 @@ def fname(here): def new_dummy_form(): form = FormData() - form.add_field("name", b"123", content_transfer_encoding="base64") + with pytest.warns(DeprecationWarning, match="BytesPayload"): + form.add_field("name", b"123", content_transfer_encoding="base64") return form @@ -429,25 +430,6 @@ async def test_release_post_data(aiohttp await resp.release() -async def test_POST_DATA_with_content_transfer_encoding(aiohttp_client) -> None: - async def handler(request): - data = await request.post() - assert b"123" == data["name"] - return web.Response() - - app = web.Application() - app.router.add_post("/", handler) - client = await aiohttp_client(app) - - form = FormData() - form.add_field("name", b"123", content_transfer_encoding="base64") - - resp = await client.post("/", data=form) - assert 200 == resp.status - - await resp.release() - - async def test_post_form_with_duplicate_keys(aiohttp_client) -> None: async def handler(request): data = await request.post() @@ -505,7 +487,8 @@ async def test_100_continue(aiohttp_clie return web.Response() form = FormData() - form.add_field("name", b"123", content_transfer_encoding="base64") + with pytest.warns(DeprecationWarning, match="BytesPayload"): + form.add_field("name", b"123", content_transfer_encoding="base64") app = web.Application() app.router.add_post("/", handler) @@ -683,7 +666,7 @@ async def test_upload_file(aiohttp_clien app.router.add_post("/", handler) client = await aiohttp_client(app) - resp = await client.post("/", data={"file": data}) + resp = await client.post("/", data={"file": io.BytesIO(data)}) assert 200 == resp.status await resp.release()