0001-httputil-raise-errors-instead-of-logging-in.patch 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. From cc61050e8f26697463142d99864b562e8470b41d Mon Sep 17 00:00:00 2001
  2. From: Ben Darnell <ben@bendarnell.com>
  3. Date: Thu, 8 May 2025 13:29:43 -0400
  4. Subject: [PATCH] httputil: Raise errors instead of logging in
  5. multipart/form-data parsing
  6. We used to continue after logging an error, which allowed repeated
  7. errors to spam the logs. The error raised here will still be logged,
  8. but only once per request, consistent with other error handling in
  9. Tornado.
  10. Upstream: https://github.com/tornadoweb/tornado/commit/b39b892bf78fe8fea01dd45199aa88307e7162f3
  11. CVE: CVE-2025-47287
  12. Signed-off-by: Titouan Christophe <titouan.christophe@mind.be>
  13. ---
  14. tornado/httputil.py | 30 +++++++++++-------------------
  15. tornado/test/httpserver_test.py | 4 ++--
  16. tornado/test/httputil_test.py | 13 ++++++++-----
  17. tornado/web.py | 17 +++++++++++++----
  18. 4 files changed, 34 insertions(+), 30 deletions(-)
  19. diff --git a/tornado/httputil.py b/tornado/httputil.py
  20. index 7044aca02..ef460985e 100644
  21. --- a/tornado/httputil.py
  22. +++ b/tornado/httputil.py
  23. @@ -34,7 +34,6 @@
  24. from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
  25. from tornado.escape import native_str, parse_qs_bytes, utf8
  26. -from tornado.log import gen_log
  27. from tornado.util import ObjectDict, unicode_type
  28. @@ -884,25 +883,22 @@ def parse_body_arguments(
  29. """
  30. if content_type.startswith("application/x-www-form-urlencoded"):
  31. if headers and "Content-Encoding" in headers:
  32. - gen_log.warning(
  33. - "Unsupported Content-Encoding: %s", headers["Content-Encoding"]
  34. + raise HTTPInputError(
  35. + "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
  36. )
  37. - return
  38. try:
  39. # real charset decoding will happen in RequestHandler.decode_argument()
  40. uri_arguments = parse_qs_bytes(body, keep_blank_values=True)
  41. except Exception as e:
  42. - gen_log.warning("Invalid x-www-form-urlencoded body: %s", e)
  43. - uri_arguments = {}
  44. + raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e
  45. for name, values in uri_arguments.items():
  46. if values:
  47. arguments.setdefault(name, []).extend(values)
  48. elif content_type.startswith("multipart/form-data"):
  49. if headers and "Content-Encoding" in headers:
  50. - gen_log.warning(
  51. - "Unsupported Content-Encoding: %s", headers["Content-Encoding"]
  52. + raise HTTPInputError(
  53. + "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
  54. )
  55. - return
  56. try:
  57. fields = content_type.split(";")
  58. for field in fields:
  59. @@ -911,9 +907,9 @@ def parse_body_arguments(
  60. parse_multipart_form_data(utf8(v), body, arguments, files)
  61. break
  62. else:
  63. - raise ValueError("multipart boundary not found")
  64. + raise HTTPInputError("multipart boundary not found")
  65. except Exception as e:
  66. - gen_log.warning("Invalid multipart/form-data: %s", e)
  67. + raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e
  68. def parse_multipart_form_data(
  69. @@ -942,26 +938,22 @@ def parse_multipart_form_data(
  70. boundary = boundary[1:-1]
  71. final_boundary_index = data.rfind(b"--" + boundary + b"--")
  72. if final_boundary_index == -1:
  73. - gen_log.warning("Invalid multipart/form-data: no final boundary")
  74. - return
  75. + raise HTTPInputError("Invalid multipart/form-data: no final boundary found")
  76. parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
  77. for part in parts:
  78. if not part:
  79. continue
  80. eoh = part.find(b"\r\n\r\n")
  81. if eoh == -1:
  82. - gen_log.warning("multipart/form-data missing headers")
  83. - continue
  84. + raise HTTPInputError("multipart/form-data missing headers")
  85. headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
  86. disp_header = headers.get("Content-Disposition", "")
  87. disposition, disp_params = _parse_header(disp_header)
  88. if disposition != "form-data" or not part.endswith(b"\r\n"):
  89. - gen_log.warning("Invalid multipart/form-data")
  90. - continue
  91. + raise HTTPInputError("Invalid multipart/form-data")
  92. value = part[eoh + 4 : -2]
  93. if not disp_params.get("name"):
  94. - gen_log.warning("multipart/form-data value missing name")
  95. - continue
  96. + raise HTTPInputError("multipart/form-data missing name")
  97. name = disp_params["name"]
  98. if disp_params.get("filename"):
  99. ctype = headers.get("Content-Type", "application/unknown")
  100. diff --git a/tornado/test/httpserver_test.py b/tornado/test/httpserver_test.py
  101. index 570cb64ca..f197cfef8 100644
  102. --- a/tornado/test/httpserver_test.py
  103. +++ b/tornado/test/httpserver_test.py
  104. @@ -1148,9 +1148,9 @@ def test_gzip_unsupported(self):
  105. # Gzip support is opt-in; without it the server fails to parse
  106. # the body (but parsing form bodies is currently just a log message,
  107. # not a fatal error).
  108. - with ExpectLog(gen_log, "Unsupported Content-Encoding"):
  109. + with ExpectLog(gen_log, ".*Unsupported Content-Encoding"):
  110. response = self.post_gzip("foo=bar")
  111. - self.assertEqual(json_decode(response.body), {})
  112. + self.assertEqual(response.code, 400)
  113. class StreamingChunkSizeTest(AsyncHTTPTestCase):
  114. diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py
  115. index 30fbec4d7..7a09beaa3 100644
  116. --- a/tornado/test/httputil_test.py
  117. +++ b/tornado/test/httputil_test.py
  118. @@ -12,7 +12,6 @@
  119. )
  120. from tornado.escape import utf8, native_str
  121. from tornado.log import gen_log
  122. -from tornado.testing import ExpectLog
  123. from tornado.test.util import ignore_deprecation
  124. import copy
  125. @@ -195,7 +194,9 @@ def test_missing_headers(self):
  126. b"\n", b"\r\n"
  127. )
  128. args, files = form_data_args()
  129. - with ExpectLog(gen_log, "multipart/form-data missing headers"):
  130. + with self.assertRaises(
  131. + HTTPInputError, msg="multipart/form-data missing headers"
  132. + ):
  133. parse_multipart_form_data(b"1234", data, args, files)
  134. self.assertEqual(files, {})
  135. @@ -209,7 +210,7 @@ def test_invalid_content_disposition(self):
  136. b"\n", b"\r\n"
  137. )
  138. args, files = form_data_args()
  139. - with ExpectLog(gen_log, "Invalid multipart/form-data"):
  140. + with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"):
  141. parse_multipart_form_data(b"1234", data, args, files)
  142. self.assertEqual(files, {})
  143. @@ -222,7 +223,7 @@ def test_line_does_not_end_with_correct_line_break(self):
  144. b"\n", b"\r\n"
  145. )
  146. args, files = form_data_args()
  147. - with ExpectLog(gen_log, "Invalid multipart/form-data"):
  148. + with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"):
  149. parse_multipart_form_data(b"1234", data, args, files)
  150. self.assertEqual(files, {})
  151. @@ -236,7 +237,9 @@ def test_content_disposition_header_without_name_parameter(self):
  152. b"\n", b"\r\n"
  153. )
  154. args, files = form_data_args()
  155. - with ExpectLog(gen_log, "multipart/form-data value missing name"):
  156. + with self.assertRaises(
  157. + HTTPInputError, msg="multipart/form-data value missing name"
  158. + ):
  159. parse_multipart_form_data(b"1234", data, args, files)
  160. self.assertEqual(files, {})
  161. diff --git a/tornado/web.py b/tornado/web.py
  162. index 0303f547e..2f702d648 100644
  163. --- a/tornado/web.py
  164. +++ b/tornado/web.py
  165. @@ -1801,6 +1801,14 @@ async def _execute(
  166. try:
  167. if self.request.method not in self.SUPPORTED_METHODS:
  168. raise HTTPError(405)
  169. +
  170. + # If we're not in stream_request_body mode, this is the place where we parse the body.
  171. + if not _has_stream_request_body(self.__class__):
  172. + try:
  173. + self.request._parse_body()
  174. + except httputil.HTTPInputError as e:
  175. + raise HTTPError(400, "Invalid body: %s" % e) from e
  176. +
  177. self.path_args = [self.decode_argument(arg) for arg in args]
  178. self.path_kwargs = dict(
  179. (k, self.decode_argument(v, name=k)) for (k, v) in kwargs.items()
  180. @@ -1992,7 +2000,7 @@ def _has_stream_request_body(cls: Type[RequestHandler]) -> bool:
  181. def removeslash(
  182. - method: Callable[..., Optional[Awaitable[None]]]
  183. + method: Callable[..., Optional[Awaitable[None]]],
  184. ) -> Callable[..., Optional[Awaitable[None]]]:
  185. """Use this decorator to remove trailing slashes from the request path.
  186. @@ -2021,7 +2029,7 @@ def wrapper( # type: ignore
  187. def addslash(
  188. - method: Callable[..., Optional[Awaitable[None]]]
  189. + method: Callable[..., Optional[Awaitable[None]]],
  190. ) -> Callable[..., Optional[Awaitable[None]]]:
  191. """Use this decorator to add a missing trailing slash to the request path.
  192. @@ -2445,8 +2453,9 @@ def finish(self) -> None:
  193. if self.stream_request_body:
  194. future_set_result_unless_cancelled(self.request._body_future, None)
  195. else:
  196. + # Note that the body gets parsed in RequestHandler._execute so it can be in
  197. + # the right exception handler scope.
  198. self.request.body = b"".join(self.chunks)
  199. - self.request._parse_body()
  200. self.execute()
  201. def on_connection_close(self) -> None:
  202. @@ -3332,7 +3341,7 @@ def transform_chunk(self, chunk: bytes, finishing: bool) -> bytes:
  203. def authenticated(
  204. - method: Callable[..., Optional[Awaitable[None]]]
  205. + method: Callable[..., Optional[Awaitable[None]]],
  206. ) -> Callable[..., Optional[Awaitable[None]]]:
  207. """Decorate methods with this to require that the user be logged in.