# HG changeset patch # User Mads Kiilerich # Date 2021-05-18 00:58:06 # Node ID f0fbb0fe4462e2980c1de2263f77e3acc4b8e237 # Parent 070b8c39736fb41a6fb38ffa11a2c5bcbdeb757e git: update check for invalid URL characters to work with Python versions that include an attempt at fixing the very same problem With changes like https://github.com/python/cpython/commit/76cd81d60310d65d01f9d7b48a8985d8ab89c8b4 making it to Python 3.10 and being backported to previous Python versions, the approach in a8a51a3bdb61 no longer works when combined with urllib.parse.urlparse in d2f59de17bef: path will never contain the invalid characters. To catch this case anyway, add a new check to verify that the parsed URL can roundtrip back to the original representation with urllib.parse.urlunparse . The actual exception might vary, but one of them should always fire. There is a risk that the new check will reject some URLs that somehow isn't normalized. No such cases have been found yet. diff --git a/kallithea/lib/vcs/backends/git/repository.py b/kallithea/lib/vcs/backends/git/repository.py --- a/kallithea/lib/vcs/backends/git/repository.py +++ b/kallithea/lib/vcs/backends/git/repository.py @@ -192,7 +192,11 @@ class GitRepository(BaseRepository): >>> GitRepository._check_url('git://example.com/\t') Traceback (most recent call last): ... + urllib.error.URLError: + + The failure above will be one of, depending on the level of WhatWG support: urllib.error.URLError: + urllib.error.URLError: """ try: parsed_url = urllib.parse.urlparse(url) @@ -204,6 +208,10 @@ class GitRepository(BaseRepository): if os.path.isabs(url) and os.path.isdir(url): return + unparsed_url = urllib.parse.urlunparse(parsed_url) + if unparsed_url != url: + raise urllib.error.URLError("Invalid url: '%s' normalizes to '%s'" % (url, unparsed_url)) + if parsed_url.scheme == 'git': # Mitigate problems elsewhere with incorrect handling of encoded paths. # Don't trust urllib.parse.unquote but be prepared for more flexible implementations elsewhere.