From dcb320a0c85713c5dfe89a83d6eb295ad1511be8 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Tue, 27 Aug 2024 17:10:30 -0400 Subject: [PATCH] [3.8] [3.9] [3.11] gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) Applies changes from zipp 3.20.1 and jaraco/zippGH-124 (cherry picked from commit 2231286d78d328c2f575e0b05b16fe447d1656d6) (cherry picked from commit 17b77bb41409259bad1cd6c74761c18b6ab1e860) (cherry picked from commit 66d3383) Co-authored-by: Jason R. Coombs --- Lib/test/test_zipfile.py | 75 ++++++++++ Lib/zipfile.py | 9 - Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst | 3 3 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -3054,6 +3054,81 @@ class TestPath(unittest.TestCase): data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)] zipfile.CompleteDirs._implied_dirs(data) + def test_malformed_paths(self): + """ + Path should handle malformed paths gracefully. + + Paths with leading slashes are not visible. + + Paths with dots are treated like regular files. + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("../parent.txt", b"content") + zf.filename = '' + root = zipfile.Path(zf) + assert list(map(str, root.iterdir())) == ['../'] + assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content' + + def test_unsupported_names(self): + """ + Path segments with special characters are readable. + + On some platforms or file systems, characters like + ``:`` and ``?`` are not allowed, but they are valid + in the zip file. + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("path?", b"content") + zf.writestr("V: NMS.flac", b"fLaC...") + zf.filename = '' + root = zipfile.Path(zf) + contents = root.iterdir() + assert next(contents).name == 'path?' + assert next(contents).name == 'V: NMS.flac' + assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..." + + def test_backslash_not_separator(self): + """ + In a zip file, backslashes are not separators. + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content") + zf.filename = '' + root = zipfile.Path(zf) + (first,) = root.iterdir() + assert not first.is_dir() + assert first.name == 'foo\\bar' + + +class DirtyZipInfo(zipfile.ZipInfo): + """ + Bypass name sanitization. + """ + + def __init__(self, filename, *args, **kwargs): + super().__init__(filename, *args, **kwargs) + self.filename = filename + + @classmethod + def for_name(cls, name, archive): + """ + Construct the same way that ZipFile.writestr does. + + TODO: extract this functionality and re-use + """ + self = cls(filename=name, date_time=time.localtime(time.time())[:6]) + self.compress_type = archive.compression + self.compress_level = archive.compresslevel + if self.filename.endswith('/'): # pragma: no cover + self.external_attr = 0o40775 << 16 # drwxrwxr-x + self.external_attr |= 0x10 # MS-DOS directory flag + else: + self.external_attr = 0o600 << 16 # ?rw------- + return self + if __name__ == "__main__": unittest.main() --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -2146,7 +2146,7 @@ def _parents(path): def _ancestry(path): """ Given a path with elements separated by - posixpath.sep, generate all elements of that path + posixpath.sep, generate all elements of that path. >>> list(_ancestry('b/d')) ['b/d', 'b'] @@ -2158,9 +2158,14 @@ def _ancestry(path): ['b'] >>> list(_ancestry('')) [] + + Multiple separators are treated like a single. + + >>> list(_ancestry('//b//d///f//')) + ['//b//d///f', '//b//d', '//b'] """ path = path.rstrip(posixpath.sep) - while path and path != posixpath.sep: + while path.rstrip(posixpath.sep): yield path path, tail = posixpath.split(path) --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst @@ -0,0 +1,3 @@ +Applied a more surgical fix for malformed payloads in :class:`zipfile.Path` +causing infinite loops (gh-122905) without breaking contents using +legitimate characters.