0040-2.7-bpo-36742-Fix-urlparse.urlsplit-error-message-fo.patch 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. From 2b578479b96aa3deeeb8bac313a02b5cf3cb1aff Mon Sep 17 00:00:00 2001
  2. From: Victor Stinner <vstinner@redhat.com>
  3. Date: Tue, 11 Jun 2019 12:45:35 +0200
  4. Subject: [PATCH] [2.7] bpo-36742: Fix urlparse.urlsplit() error message for
  5. Unicode URL (GH-13937)
  6. If urlparse.urlsplit() detects an invalid netloc according to NFKC
  7. normalization, the error message type is now str rather than unicode,
  8. and use repr() to format the URL, to prevent <exception str() failed>
  9. when display the error message.
  10. Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
  11. ---
  12. Lib/test/test_urlparse.py | 9 +++++++++
  13. Lib/urlparse.py | 5 +++--
  14. .../NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst | 3 +++
  15. 3 files changed, 15 insertions(+), 2 deletions(-)
  16. create mode 100644 Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst
  17. diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
  18. index 857ed96d92..86c4a0595c 100644
  19. --- a/Lib/test/test_urlparse.py
  20. +++ b/Lib/test/test_urlparse.py
  21. @@ -656,6 +656,15 @@ class UrlParseTestCase(unittest.TestCase):
  22. with self.assertRaises(ValueError):
  23. urlparse.urlsplit(url)
  24. + # check error message: invalid netloc must be formated with repr()
  25. + # to get an ASCII error message
  26. + with self.assertRaises(ValueError) as cm:
  27. + urlparse.urlsplit(u'http://example.com\uFF03@bing.com')
  28. + self.assertEqual(str(cm.exception),
  29. + "netloc u'example.com\\uff03@bing.com' contains invalid characters "
  30. + "under NFKC normalization")
  31. + self.assertIsInstance(cm.exception.args[0], str)
  32. +
  33. def test_main():
  34. test_support.run_unittest(UrlParseTestCase)
  35. diff --git a/Lib/urlparse.py b/Lib/urlparse.py
  36. index 6834f3c179..798b467b60 100644
  37. --- a/Lib/urlparse.py
  38. +++ b/Lib/urlparse.py
  39. @@ -180,8 +180,9 @@ def _checknetloc(netloc):
  40. return
  41. for c in '/?#@:':
  42. if c in netloc2:
  43. - raise ValueError(u"netloc '" + netloc + u"' contains invalid " +
  44. - u"characters under NFKC normalization")
  45. + raise ValueError("netloc %r contains invalid characters "
  46. + "under NFKC normalization"
  47. + % netloc)
  48. def urlsplit(url, scheme='', allow_fragments=True):
  49. """Parse a URL into 5 components:
  50. diff --git a/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst b/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst
  51. new file mode 100644
  52. index 0000000000..3ba774056f
  53. --- /dev/null
  54. +++ b/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst
  55. @@ -0,0 +1,3 @@
  56. +:func:`urlparse.urlsplit` error message for invalid ``netloc`` according to
  57. +NFKC normalization is now a :class:`str` string, rather than a
  58. +:class:`unicode` string, to prevent error when displaying the error.
  59. --
  60. 2.11.0