Revert #3492 (#8300)

#3492 was never backported and nobody seems to have reported the missing feature after 5 years which suggests there is little demand for it. Furthermore, there is a risk that this may introduce HTTP request smuggling vulnerabilities. Therefore, we should revert this for now. If there is desire to re-add the feature, the specs will need to referenced and evaluated to ensure this doesn't present a security issue.
aio-libs · Apr 6, 2024 · cbc0c86 · cbc0c86
1 parent 4d72dca
commit cbc0c86
Show file tree

Hide file tree

Showing 3 changed files with 214 additions and 475 deletions.
diff --git a/CHANGES/2302.feature b/CHANGES/2302.feature
diff --git a/aiohttp/multipart.py b/aiohttp/multipart.py
@@ -260,12 +260,9 @@ def __init__(
         boundary: bytes,
         headers: "CIMultiDictProxy[str]",
         content: StreamReader,
-        *,
-        _newline: bytes = b"\r\n",
     ) -> None:
         self.headers = headers
         self._boundary = boundary
-        self._newline = _newline
         self._content = content
         self._at_eof = False
         length = self.headers.get(CONTENT_LENGTH, None)
@@ -348,9 +345,9 @@ async def read_chunk(self, size: int = chunk_size) -> bytes:
         if self._read_bytes == self._length:
             self._at_eof = True
         if self._at_eof:
-            newline = await self._content.readline()
+            clrf = await self._content.readline()
             assert (
-                newline == self._newline
+                b"\r\n" == clrf
             ), "reader did not read all the data or it is malformed"
         return chunk
 
@@ -377,15 +374,11 @@ async def _read_chunk_from_stream(self, size: int) -> bytes:
         assert self._content_eof < 3, "Reading after EOF"
         assert self._prev_chunk is not None
         window = self._prev_chunk + chunk
-
-        intermeditate_boundary = self._newline + self._boundary
-
+        sub = b"\r\n" + self._boundary
         if first_chunk:
-            pos = 0
+            idx = window.find(sub)
         else:
-            pos = max(0, len(self._prev_chunk) - len(intermeditate_boundary))
-
-        idx = window.find(intermeditate_boundary, pos)
+            idx = window.find(sub, max(0, len(self._prev_chunk) - len(sub)))
         if idx >= 0:
             # pushing boundary back to content
             with warnings.catch_warnings():
@@ -396,7 +389,6 @@ async def _read_chunk_from_stream(self, size: int) -> bytes:
             chunk = window[len(self._prev_chunk) : idx]
             if not chunk:
                 self._at_eof = True
-
         result = self._prev_chunk
         self._prev_chunk = chunk
         return result
@@ -425,8 +417,7 @@ async def readline(self) -> bytes:
         else:
             next_line = await self._content.readline()
             if next_line.startswith(self._boundary):
-                # strip newline but only once
-                line = line[: -len(self._newline)]
+                line = line[:-2]  # strip CRLF but only once
             self._unread.append(next_line)
 
         return line
@@ -578,12 +569,9 @@ def __init__(
         self,
         headers: Mapping[str, str],
         content: StreamReader,
-        *,
-        _newline: bytes = b"\r\n",
     ) -> None:
         self.headers = headers
         self._boundary = ("--" + self._get_boundary()).encode()
-        self._newline = _newline
         self._content = content
         self._last_part: Optional[Union["MultipartReader", BodyPartReader]] = None
         self._at_eof = False
@@ -670,13 +658,9 @@ def _get_part_reader(
         if mimetype.type == "multipart":
             if self.multipart_reader_cls is None:
                 return type(self)(headers, self._content)
-            return self.multipart_reader_cls(
-                headers, self._content, _newline=self._newline
-            )
+            return self.multipart_reader_cls(headers, self._content)
         else:
-            return self.part_reader_cls(
-                self._boundary, headers, self._content, _newline=self._newline
-            )
+            return self.part_reader_cls(self._boundary, headers, self._content)
 
     def _get_boundary(self) -> str:
         mimetype = parse_mimetype(self.headers[CONTENT_TYPE])
@@ -703,23 +687,11 @@ async def _read_until_first_boundary(self) -> None:
         while True:
             chunk = await self._readline()
             if chunk == b"":
-                raise ValueError(
-                    "Could not find starting boundary %r" % (self._boundary)
-                )
-            newline = None
-            end_boundary = self._boundary + b"--"
-            if chunk.startswith(end_boundary):
-                _, newline = chunk.split(end_boundary, 1)
-            elif chunk.startswith(self._boundary):
-                _, newline = chunk.split(self._boundary, 1)
-            if newline is not None:
-                assert newline in (b"\r\n", b"\n"), (newline, chunk, self._boundary)
-                self._newline = newline
-
+                raise ValueError(f"Could not find starting boundary {self._boundary!r}")
             chunk = chunk.rstrip()
             if chunk == self._boundary:
                 return
-            elif chunk == end_boundary:
+            elif chunk == self._boundary + b"--":
                 self._at_eof = True
                 return