Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix handling of multipart/form-data #8280

Merged
merged 39 commits into from
Apr 7, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
cf9a42c
Fix handling of multipart/form-data
Dreamsorcerer Apr 1, 2024
cbea0c6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 1, 2024
d9b039b
Create 8280.bugfix.rst
Dreamsorcerer Apr 1, 2024
970fda1
Update multipart.py
Dreamsorcerer Apr 1, 2024
1cec2cf
Update multipart.py
Dreamsorcerer Apr 1, 2024
56c0d6e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 1, 2024
af851dd
Update multipart.py
Dreamsorcerer Apr 1, 2024
91af09f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 1, 2024
1c6fcd7
Update multipart.py
Dreamsorcerer Apr 1, 2024
e874e3a
Update multipart.py
Dreamsorcerer Apr 1, 2024
d992d10
Update multipart.py
Dreamsorcerer Apr 1, 2024
502adac
Update test_multipart.py
Dreamsorcerer Apr 1, 2024
37a9414
Update test_web_functional.py
Dreamsorcerer Apr 1, 2024
57c71e1
Update formdata.py
Dreamsorcerer Apr 1, 2024
1adcd0a
Update test_client_functional.py
Dreamsorcerer Apr 1, 2024
3751264
Create 8280.breaking.rst
Dreamsorcerer Apr 1, 2024
4d11802
Update multipart.py
Dreamsorcerer Apr 1, 2024
cfa1d54
Update multipart.py
Dreamsorcerer Apr 3, 2024
91c4f65
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 3, 2024
00e76ab
Update multipart.py
Dreamsorcerer Apr 3, 2024
438fcf3
Update test_multipart.py
Dreamsorcerer Apr 3, 2024
db85964
Update CHANGES/8280.bugfix.rst
Dreamsorcerer Apr 4, 2024
e85bafb
Update CHANGES/8280.breaking.rst
Dreamsorcerer Apr 4, 2024
d24602a
Update multipart.py
Dreamsorcerer Apr 4, 2024
d9edc4d
Replace with warnings
Dreamsorcerer Apr 5, 2024
368ce2a
Readd deprecated parameter to tests
Dreamsorcerer Apr 5, 2024
a26d045
Update and rename 8280.breaking.rst to 8280.deprecation.rst
Dreamsorcerer Apr 5, 2024
e46f47f
Import
Dreamsorcerer Apr 5, 2024
967ba6b
Update test_web_functional.py
Dreamsorcerer Apr 5, 2024
f665a18
Update formdata.py
Dreamsorcerer Apr 5, 2024
2231e8c
Update test_client_functional.py
Dreamsorcerer Apr 5, 2024
846f02c
Update formdata.py
Dreamsorcerer Apr 5, 2024
391e725
Update test_web_functional.py
Dreamsorcerer Apr 5, 2024
60204e3
Update test_web_functional.py
Dreamsorcerer Apr 5, 2024
39a2d0a
Add tests for _charset_
Dreamsorcerer Apr 5, 2024
0ef28a7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 5, 2024
f6c816d
Update test_multipart.py
Dreamsorcerer Apr 5, 2024
be5b98c
Merge branch 'master' into Dreamsorcerer-patch-6
Dreamsorcerer Apr 6, 2024
c02cab6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/8280.breaking.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Removed deprecated `content_transfer_encoding` parameter from `FormData.add_field()` -- by :user:`Dreamsorcerer`.
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions CHANGES/8280.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed `multipart/form-data` compliance with RFC 7578 -- by :user:`Dreamsorcerer`.
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved
11 changes: 1 addition & 10 deletions aiohttp/formdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,11 @@ def add_field(
*,
content_type: Optional[str] = None,
filename: Optional[str] = None,
content_transfer_encoding: Optional[str] = None,
) -> None:
if isinstance(value, io.IOBase):
self._is_multipart = True
elif isinstance(value, (bytes, bytearray, memoryview)):
if filename is None and content_transfer_encoding is None:
if filename is None:
filename = name

type_options: MultiDict[str] = MultiDict({"name": name})
Expand All @@ -76,14 +75,6 @@ def add_field(
)
headers[hdrs.CONTENT_TYPE] = content_type
self._is_multipart = True
if content_transfer_encoding is not None:
if not isinstance(content_transfer_encoding, str):
raise TypeError(
"content_transfer_encoding must be an instance"
" of str. Got: %s" % content_transfer_encoding
)
headers[hdrs.CONTENT_TRANSFER_ENCODING] = content_transfer_encoding
self._is_multipart = True

self._fields.append((type_options, headers, value))

Expand Down
120 changes: 81 additions & 39 deletions aiohttp/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,13 +262,18 @@ def __init__(
content: StreamReader,
*,
_newline: bytes = b"\r\n",
subtype: str = "mixed",
default_charset: Optional[str] = None,
) -> None:
self.headers = headers
self._boundary = boundary
self._newline = _newline
self._content = content
self._default_charset = default_charset
self._at_eof = False
length = self.headers.get(CONTENT_LENGTH, None)
self._is_form_data = subtype == "form-data"
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
length = None if self._is_form_data else self.headers.get(CONTENT_LENGTH, None)
self._length = int(length) if length is not None else None
self._read_bytes = 0
self._unread: Deque[bytes] = deque()
Expand Down Expand Up @@ -360,6 +365,8 @@ async def _read_chunk_from_length(self, size: int) -> bytes:
assert self._length is not None, "Content-Length required for chunked read"
chunk_size = min(size, self._length - self._read_bytes)
chunk = await self._content.read(chunk_size)
if self._content.at_eof():
self._at_eof = True
return chunk

async def _read_chunk_from_stream(self, size: int) -> bytes:
Expand Down Expand Up @@ -486,7 +493,8 @@ def decode(self, data: bytes) -> bytes:
"""
if CONTENT_TRANSFER_ENCODING in self.headers:
data = self._decode_content_transfer(data)
if CONTENT_ENCODING in self.headers:
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
if not self._is_form_data and CONTENT_ENCODING in self.headers:
return self._decode_content(data)
return data

Expand Down Expand Up @@ -520,7 +528,7 @@ def get_charset(self, default: str) -> str:
"""Returns charset parameter from Content-Type header or default."""
ctype = self.headers.get(CONTENT_TYPE, "")
mimetype = parse_mimetype(ctype)
return mimetype.parameters.get("charset", default)
return mimetype.parameters.get("charset", self._default_charset or default)

@reify
def name(self) -> Optional[str]:
Expand Down Expand Up @@ -581,10 +589,18 @@ def __init__(
*,
_newline: bytes = b"\r\n",
) -> None:
self._mimetype = parse_mimetype(headers[CONTENT_TYPE])
assert self._mimetype.type == "multipart", "multipart/* content type expected"
if "boundary" not in self._mimetype.parameters:
raise ValueError(
"boundary missed for Content-Type: %s" % headers[CONTENT_TYPE]
)

self.headers = headers
self._boundary = ("--" + self._get_boundary()).encode()
self._newline = _newline
self._content = content
self._default_charset: Optional[str] = None
self._last_part: Optional[Union["MultipartReader", BodyPartReader]] = None
self._at_eof = False
self._at_bof = True
Expand Down Expand Up @@ -636,7 +652,24 @@ async def next(
await self._read_boundary()
if self._at_eof: # we just read the last boundary, nothing to do there
return None
self._last_part = await self.fetch_next_part()

part = await self.fetch_next_part()
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.6
if (
self._last_part is None
and self._mimetype.subtype == "form-data"
and isinstance(part, BodyPartReader)
):
_, params = parse_content_disposition(part.headers.get(CONTENT_DISPOSITION))
if params.get("name") == "_charset_":
# Longest encoding in https://encoding.spec.whatwg.org/encodings.json
# is 19 characters, so 32 should be more than enough for any valid encoding.
charset = await part.read_chunk(32)
if len(charset) >= 32:
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved
raise RuntimeError("Invalid default charset")
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved
self._default_charset = charset.strip().decode()
part = await self.fetch_next_part()
self._last_part = part
return self._last_part

async def release(self) -> None:
Expand Down Expand Up @@ -675,20 +708,16 @@ def _get_part_reader(
)
else:
return self.part_reader_cls(
self._boundary, headers, self._content, _newline=self._newline
self._boundary,
headers,
self._content,
_newline=self._newline,
subtype=self._mimetype.subtype,
default_charset=self._default_charset,
)

def _get_boundary(self) -> str:
mimetype = parse_mimetype(self.headers[CONTENT_TYPE])

assert mimetype.type == "multipart", "multipart/* content type expected"

if "boundary" not in mimetype.parameters:
raise ValueError(
"boundary missed for Content-Type: %s" % self.headers[CONTENT_TYPE]
)

boundary = mimetype.parameters["boundary"]
boundary = self._mimetype.parameters["boundary"]
if len(boundary) > 70:
raise ValueError("boundary %r is too long (70 chars max)" % boundary)

Expand Down Expand Up @@ -793,6 +822,7 @@ def __init__(self, subtype: str = "mixed", boundary: Optional[str] = None) -> No
super().__init__(None, content_type=ctype)

self._parts: List[_Part] = []
self._is_form_data = subtype == "form-data"

def __enter__(self) -> "MultipartWriter":
return self
Expand Down Expand Up @@ -870,32 +900,44 @@ def append(self, obj: Any, headers: Optional[Mapping[str, str]] = None) -> Paylo

def append_payload(self, payload: Payload) -> Payload:
"""Adds a new body part to multipart writer."""
# compression
encoding: Optional[str] = payload.headers.get(
CONTENT_ENCODING,
"",
).lower()
if encoding and encoding not in ("deflate", "gzip", "identity"):
raise RuntimeError(f"unknown content encoding: {encoding}")
if encoding == "identity":
encoding = None

# te encoding
te_encoding: Optional[str] = payload.headers.get(
CONTENT_TRANSFER_ENCODING,
"",
).lower()
if te_encoding not in ("", "base64", "quoted-printable", "binary"):
raise RuntimeError(
"unknown content transfer encoding: {}" "".format(te_encoding)
encoding: Optional[str] = None
te_encoding: Optional[str] = None
if self._is_form_data:
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.7
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
assert CONTENT_DISPOSITION in payload.headers
assert "name=" in payload.headers[CONTENT_DISPOSITION]
assert (
not {CONTENT_ENCODING, CONTENT_LENGTH, CONTENT_TRANSFER_ENCODING}
& payload.headers.keys()
)
if te_encoding == "binary":
te_encoding = None
else:
# compression
encoding = payload.headers.get(
CONTENT_ENCODING,
"",
).lower()
if encoding and encoding not in ("deflate", "gzip", "identity"):
raise RuntimeError(f"unknown content encoding: {encoding}")
if encoding == "identity":
encoding = None

# te encoding
te_encoding = payload.headers.get(
CONTENT_TRANSFER_ENCODING,
"",
).lower()
if te_encoding not in ("", "base64", "quoted-printable", "binary"):
raise RuntimeError(
"unknown content transfer encoding: {}" "".format(te_encoding)
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved
)
if te_encoding == "binary":
te_encoding = None

# size
size = payload.size
if size is not None and not (encoding or te_encoding):
payload.headers[CONTENT_LENGTH] = str(size)
# size
size = payload.size
if size is not None and not (encoding or te_encoding):
payload.headers[CONTENT_LENGTH] = str(size)

self._parts.append((payload, encoding, te_encoding)) # type: ignore[arg-type]
return payload
Expand Down
44 changes: 0 additions & 44 deletions tests/test_client_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,50 +1388,6 @@ async def handler(request):
resp.close()


async def test_POST_DATA_with_context_transfer_encoding(aiohttp_client: Any) -> None:
async def handler(request):
data = await request.post()
assert data["name"] == "text"
return web.Response(text=data["name"])

app = web.Application()
app.router.add_post("/", handler)
client = await aiohttp_client(app)

form = aiohttp.FormData()
form.add_field("name", "text", content_transfer_encoding="base64")

resp = await client.post("/", data=form)
assert 200 == resp.status
content = await resp.text()
assert content == "text"
resp.close()


async def test_POST_DATA_with_content_type_context_transfer_encoding(
aiohttp_client: Any,
):
async def handler(request):
data = await request.post()
assert data["name"] == "text"
return web.Response(body=data["name"])

app = web.Application()
app.router.add_post("/", handler)
client = await aiohttp_client(app)

form = aiohttp.FormData()
form.add_field(
"name", "text", content_type="text/plain", content_transfer_encoding="base64"
)

resp = await client.post("/", data=form)
assert 200 == resp.status
content = await resp.text()
assert content == "text"
resp.close()


async def test_POST_MultiDict(aiohttp_client: Any) -> None:
async def handler(request):
data = await request.post()
Expand Down
12 changes: 3 additions & 9 deletions tests/test_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -1410,9 +1410,7 @@ async def test_preserve_content_disposition_header(
assert headers == (
b"--:\r\n"
b"Content-Type: text/python\r\n"
b'Content-Disposition: attachments; filename="bug.py"\r\n'
b"Content-Length: %s"
b"" % (str(content_length).encode(),)
b'Content-Disposition: attachments; filename="bug.py"'
)

async def test_set_content_disposition_override(
Expand All @@ -1439,9 +1437,7 @@ async def test_set_content_disposition_override(
assert headers == (
b"--:\r\n"
b"Content-Type: text/python\r\n"
b'Content-Disposition: attachments; filename="bug.py"\r\n'
b"Content-Length: %s"
b"" % (str(content_length).encode(),)
b'Content-Disposition: attachments; filename="bug.py"'
)

async def test_reset_content_disposition_header(
Expand Down Expand Up @@ -1469,9 +1465,7 @@ async def test_reset_content_disposition_header(
b"--:\r\n"
b"Content-Type: text/plain\r\n"
b"Content-Disposition:"
b' attachments; filename="bug.py"\r\n'
b"Content-Length: %s"
b"" % (str(content_length).encode(),)
b' attachments; filename="bug.py"'
)


Expand Down
23 changes: 2 additions & 21 deletions tests/test_web_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def fname(here: Any):

def new_dummy_form():
form = FormData()
form.add_field("name", b"123", content_transfer_encoding="base64")
form.add_field("name", b"123")
return form


Expand Down Expand Up @@ -447,25 +447,6 @@ async def handler(request):
await resp.release()


async def test_POST_DATA_with_content_transfer_encoding(aiohttp_client: Any) -> None:
async def handler(request):
data = await request.post()
assert b"123" == data["name"]
return web.Response()

app = web.Application()
app.router.add_post("/", handler)
client = await aiohttp_client(app)

form = FormData()
form.add_field("name", b"123", content_transfer_encoding="base64")

resp = await client.post("/", data=form)
assert 200 == resp.status

await resp.release()


async def test_post_form_with_duplicate_keys(aiohttp_client: Any) -> None:
async def handler(request):
data = await request.post()
Expand Down Expand Up @@ -523,7 +504,7 @@ async def handler(request):
return web.Response()

form = FormData()
form.add_field("name", b"123", content_transfer_encoding="base64")
form.add_field("name", b"123")
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved

app = web.Application()
app.router.add_post("/", handler)
Expand Down
Loading