123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234 |
- From cc61050e8f26697463142d99864b562e8470b41d Mon Sep 17 00:00:00 2001
- From: Ben Darnell <ben@bendarnell.com>
- Date: Thu, 8 May 2025 13:29:43 -0400
- Subject: [PATCH] httputil: Raise errors instead of logging in
- multipart/form-data parsing
- We used to continue after logging an error, which allowed repeated
- errors to spam the logs. The error raised here will still be logged,
- but only once per request, consistent with other error handling in
- Tornado.
- Upstream: https://github.com/tornadoweb/tornado/commit/b39b892bf78fe8fea01dd45199aa88307e7162f3
- CVE: CVE-2025-47287
- Signed-off-by: Titouan Christophe <titouan.christophe@mind.be>
- ---
- tornado/httputil.py | 30 +++++++++++-------------------
- tornado/test/httpserver_test.py | 4 ++--
- tornado/test/httputil_test.py | 13 ++++++++-----
- tornado/web.py | 17 +++++++++++++----
- 4 files changed, 34 insertions(+), 30 deletions(-)
- diff --git a/tornado/httputil.py b/tornado/httputil.py
- index 7044aca02..ef460985e 100644
- --- a/tornado/httputil.py
- +++ b/tornado/httputil.py
- @@ -34,7 +34,6 @@
- from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
-
- from tornado.escape import native_str, parse_qs_bytes, utf8
- -from tornado.log import gen_log
- from tornado.util import ObjectDict, unicode_type
-
-
- @@ -884,25 +883,22 @@ def parse_body_arguments(
- """
- if content_type.startswith("application/x-www-form-urlencoded"):
- if headers and "Content-Encoding" in headers:
- - gen_log.warning(
- - "Unsupported Content-Encoding: %s", headers["Content-Encoding"]
- + raise HTTPInputError(
- + "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
- )
- - return
- try:
- # real charset decoding will happen in RequestHandler.decode_argument()
- uri_arguments = parse_qs_bytes(body, keep_blank_values=True)
- except Exception as e:
- - gen_log.warning("Invalid x-www-form-urlencoded body: %s", e)
- - uri_arguments = {}
- + raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e
- for name, values in uri_arguments.items():
- if values:
- arguments.setdefault(name, []).extend(values)
- elif content_type.startswith("multipart/form-data"):
- if headers and "Content-Encoding" in headers:
- - gen_log.warning(
- - "Unsupported Content-Encoding: %s", headers["Content-Encoding"]
- + raise HTTPInputError(
- + "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
- )
- - return
- try:
- fields = content_type.split(";")
- for field in fields:
- @@ -911,9 +907,9 @@ def parse_body_arguments(
- parse_multipart_form_data(utf8(v), body, arguments, files)
- break
- else:
- - raise ValueError("multipart boundary not found")
- + raise HTTPInputError("multipart boundary not found")
- except Exception as e:
- - gen_log.warning("Invalid multipart/form-data: %s", e)
- + raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e
-
-
- def parse_multipart_form_data(
- @@ -942,26 +938,22 @@ def parse_multipart_form_data(
- boundary = boundary[1:-1]
- final_boundary_index = data.rfind(b"--" + boundary + b"--")
- if final_boundary_index == -1:
- - gen_log.warning("Invalid multipart/form-data: no final boundary")
- - return
- + raise HTTPInputError("Invalid multipart/form-data: no final boundary found")
- parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
- for part in parts:
- if not part:
- continue
- eoh = part.find(b"\r\n\r\n")
- if eoh == -1:
- - gen_log.warning("multipart/form-data missing headers")
- - continue
- + raise HTTPInputError("multipart/form-data missing headers")
- headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
- disp_header = headers.get("Content-Disposition", "")
- disposition, disp_params = _parse_header(disp_header)
- if disposition != "form-data" or not part.endswith(b"\r\n"):
- - gen_log.warning("Invalid multipart/form-data")
- - continue
- + raise HTTPInputError("Invalid multipart/form-data")
- value = part[eoh + 4 : -2]
- if not disp_params.get("name"):
- - gen_log.warning("multipart/form-data value missing name")
- - continue
- + raise HTTPInputError("multipart/form-data missing name")
- name = disp_params["name"]
- if disp_params.get("filename"):
- ctype = headers.get("Content-Type", "application/unknown")
- diff --git a/tornado/test/httpserver_test.py b/tornado/test/httpserver_test.py
- index 570cb64ca..f197cfef8 100644
- --- a/tornado/test/httpserver_test.py
- +++ b/tornado/test/httpserver_test.py
- @@ -1148,9 +1148,9 @@ def test_gzip_unsupported(self):
- # Gzip support is opt-in; without it the server fails to parse
- # the body (but parsing form bodies is currently just a log message,
- # not a fatal error).
- - with ExpectLog(gen_log, "Unsupported Content-Encoding"):
- + with ExpectLog(gen_log, ".*Unsupported Content-Encoding"):
- response = self.post_gzip("foo=bar")
- - self.assertEqual(json_decode(response.body), {})
- + self.assertEqual(response.code, 400)
-
-
- class StreamingChunkSizeTest(AsyncHTTPTestCase):
- diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py
- index 30fbec4d7..7a09beaa3 100644
- --- a/tornado/test/httputil_test.py
- +++ b/tornado/test/httputil_test.py
- @@ -12,7 +12,6 @@
- )
- from tornado.escape import utf8, native_str
- from tornado.log import gen_log
- -from tornado.testing import ExpectLog
- from tornado.test.util import ignore_deprecation
-
- import copy
- @@ -195,7 +194,9 @@ def test_missing_headers(self):
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- - with ExpectLog(gen_log, "multipart/form-data missing headers"):
- + with self.assertRaises(
- + HTTPInputError, msg="multipart/form-data missing headers"
- + ):
- parse_multipart_form_data(b"1234", data, args, files)
- self.assertEqual(files, {})
-
- @@ -209,7 +210,7 @@ def test_invalid_content_disposition(self):
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- - with ExpectLog(gen_log, "Invalid multipart/form-data"):
- + with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"):
- parse_multipart_form_data(b"1234", data, args, files)
- self.assertEqual(files, {})
-
- @@ -222,7 +223,7 @@ def test_line_does_not_end_with_correct_line_break(self):
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- - with ExpectLog(gen_log, "Invalid multipart/form-data"):
- + with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"):
- parse_multipart_form_data(b"1234", data, args, files)
- self.assertEqual(files, {})
-
- @@ -236,7 +237,9 @@ def test_content_disposition_header_without_name_parameter(self):
- b"\n", b"\r\n"
- )
- args, files = form_data_args()
- - with ExpectLog(gen_log, "multipart/form-data value missing name"):
- + with self.assertRaises(
- + HTTPInputError, msg="multipart/form-data value missing name"
- + ):
- parse_multipart_form_data(b"1234", data, args, files)
- self.assertEqual(files, {})
-
- diff --git a/tornado/web.py b/tornado/web.py
- index 0303f547e..2f702d648 100644
- --- a/tornado/web.py
- +++ b/tornado/web.py
- @@ -1801,6 +1801,14 @@ async def _execute(
- try:
- if self.request.method not in self.SUPPORTED_METHODS:
- raise HTTPError(405)
- +
- + # If we're not in stream_request_body mode, this is the place where we parse the body.
- + if not _has_stream_request_body(self.__class__):
- + try:
- + self.request._parse_body()
- + except httputil.HTTPInputError as e:
- + raise HTTPError(400, "Invalid body: %s" % e) from e
- +
- self.path_args = [self.decode_argument(arg) for arg in args]
- self.path_kwargs = dict(
- (k, self.decode_argument(v, name=k)) for (k, v) in kwargs.items()
- @@ -1992,7 +2000,7 @@ def _has_stream_request_body(cls: Type[RequestHandler]) -> bool:
-
-
- def removeslash(
- - method: Callable[..., Optional[Awaitable[None]]]
- + method: Callable[..., Optional[Awaitable[None]]],
- ) -> Callable[..., Optional[Awaitable[None]]]:
- """Use this decorator to remove trailing slashes from the request path.
-
- @@ -2021,7 +2029,7 @@ def wrapper( # type: ignore
-
-
- def addslash(
- - method: Callable[..., Optional[Awaitable[None]]]
- + method: Callable[..., Optional[Awaitable[None]]],
- ) -> Callable[..., Optional[Awaitable[None]]]:
- """Use this decorator to add a missing trailing slash to the request path.
-
- @@ -2445,8 +2453,9 @@ def finish(self) -> None:
- if self.stream_request_body:
- future_set_result_unless_cancelled(self.request._body_future, None)
- else:
- + # Note that the body gets parsed in RequestHandler._execute so it can be in
- + # the right exception handler scope.
- self.request.body = b"".join(self.chunks)
- - self.request._parse_body()
- self.execute()
-
- def on_connection_close(self) -> None:
- @@ -3332,7 +3341,7 @@ def transform_chunk(self, chunk: bytes, finishing: bool) -> bytes:
-
-
- def authenticated(
- - method: Callable[..., Optional[Awaitable[None]]]
- + method: Callable[..., Optional[Awaitable[None]]],
- ) -> Callable[..., Optional[Awaitable[None]]]:
- """Decorate methods with this to require that the user be logged in.
-
|