From ff2aadd3f575bac381ca3ef73127057303604256b1c850055820061d8be1f5a9 Mon Sep 17 00:00:00 2001 From: Matej Cepl Date: Mon, 27 Mar 2023 15:00:17 +0000 Subject: [PATCH 1/8] - Switch off obsoleting previous interpreters. OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python310?expand=0&rev=82 --- python310.changes | 5 +++++ python310.spec | 10 +++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/python310.changes b/python310.changes index 21c207e..b043638 100644 --- a/python310.changes +++ b/python310.changes @@ -1,3 +1,8 @@ +------------------------------------------------------------------- +Mon Mar 27 14:59:57 UTC 2023 - Matej Cepl + +- Switch off obsoleting previous interpreters. + ------------------------------------------------------------------- Mon Mar 13 08:39:53 UTC 2023 - Matej Cepl diff --git a/python310.spec b/python310.spec index 6b565a6..68f0650 100644 --- a/python310.spec +++ b/python310.spec @@ -43,14 +43,14 @@ %define primary_interpreter 0 %endif -%if 0%{?sle_version} && 0%{?suse_version} < 1550 +# %%if 0%%{?sle_version} && 0%%{?suse_version} < 1550 # Obsoleting previous "latest" Python versions # Next versions will get more lines like for older versions -%define obsolete_python_versioned() \ -Obsoletes: python39%{?1:-%{1}} -%else +# %%define obsolete_python_versioned() \ +# Obsoletes: python39%%{?1:-%%{1}} +# %%else %define obsolete_python_versioned() %{nil} -%endif +# %%endif # Setting up variables %define _version %(c=%{version}; echo ${c/[a-z]*/}) From f5edaf893f28124194e22e71d5f915cf493c431aadab9ca694e2104362641894 Mon Sep 17 00:00:00 2001 From: Matej Cepl Date: Mon, 27 Mar 2023 15:08:59 +0000 Subject: [PATCH 2/8] Revert OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python310?expand=0&rev=83 --- python310.changes | 5 ----- python310.spec | 10 +++++----- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/python310.changes b/python310.changes index b043638..21c207e 100644 --- a/python310.changes +++ b/python310.changes @@ -1,8 +1,3 @@ -------------------------------------------------------------------- -Mon Mar 27 14:59:57 UTC 2023 - Matej Cepl - -- Switch off obsoleting previous interpreters. - ------------------------------------------------------------------- Mon Mar 13 08:39:53 UTC 2023 - Matej Cepl diff --git a/python310.spec b/python310.spec index 68f0650..6b565a6 100644 --- a/python310.spec +++ b/python310.spec @@ -43,14 +43,14 @@ %define primary_interpreter 0 %endif -# %%if 0%%{?sle_version} && 0%%{?suse_version} < 1550 +%if 0%{?sle_version} && 0%{?suse_version} < 1550 # Obsoleting previous "latest" Python versions # Next versions will get more lines like for older versions -# %%define obsolete_python_versioned() \ -# Obsoletes: python39%%{?1:-%%{1}} -# %%else +%define obsolete_python_versioned() \ +Obsoletes: python39%{?1:-%{1}} +%else %define obsolete_python_versioned() %{nil} -# %%endif +%endif # Setting up variables %define _version %(c=%{version}; echo ${c/[a-z]*/}) From 0a6bd2edcbde0dc70fe9a29f5e364ff3b8135ffb60594023121911a58808bd18 Mon Sep 17 00:00:00 2001 From: Matej Cepl Date: Thu, 27 Apr 2023 21:21:50 +0000 Subject: [PATCH 3/8] - Add CVE-2007-4559-filter-tarfile_extractall.patch to fix CVE-2007-4559 (bsc#1203750) by adding the filter for tarfile.extractall (PEP 706). OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python310?expand=0&rev=84 --- CVE-2007-4559-filter-tarfile_extractall.patch | 2589 +++++++++++++++++ python310.changes | 7 + python310.spec | 4 + 3 files changed, 2600 insertions(+) create mode 100644 CVE-2007-4559-filter-tarfile_extractall.patch diff --git a/CVE-2007-4559-filter-tarfile_extractall.patch b/CVE-2007-4559-filter-tarfile_extractall.patch new file mode 100644 index 0000000..6cbf07f --- /dev/null +++ b/CVE-2007-4559-filter-tarfile_extractall.patch @@ -0,0 +1,2589 @@ +From cde089c808a2c21dd311905ba7f1b7e1004c0ada Mon Sep 17 00:00:00 2001 +From: Petr Viktorin +Date: Tue, 31 Jan 2023 14:40:52 +0100 +Subject: [PATCH 01/15] =?UTF-8?q?Implement=20PEP=20706=20=E2=80=93=20Filte?= + =?UTF-8?q?r=20for=20tarfile.extractall?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +--- + Doc/library/shutil.rst | 24 + Doc/library/tarfile.rst | 457 ++++ + Lib/shutil.py | 17 + Lib/tarfile.py | 361 +++ + Lib/test/test_shutil.py | 41 + Lib/test/test_tarfile.py | 964 +++++++++- + Misc/NEWS.d/next/Library/2023-03-23-15-24-38.gh-issue-102953.YR4KaK.rst | 4 + 7 files changed, 1770 insertions(+), 98 deletions(-) + +--- a/Doc/library/shutil.rst ++++ b/Doc/library/shutil.rst +@@ -620,7 +620,7 @@ provided. They rely on the :mod:`zipfil + Remove the archive format *name* from the list of supported formats. + + +-.. function:: unpack_archive(filename[, extract_dir[, format]]) ++.. function:: unpack_archive(filename[, extract_dir[, format[, filter]]]) + + Unpack an archive. *filename* is the full path of the archive. + +@@ -634,6 +634,14 @@ provided. They rely on the :mod:`zipfil + registered for that extension. In case none is found, + a :exc:`ValueError` is raised. + ++ The keyword-only *filter* argument is passed to the underlying unpacking ++ function. For zip files, *filter* is not accepted. ++ For tar files, it is recommended to set it to ``'data'``, ++ unless using features specific to tar and UNIX-like filesystems. ++ (See :ref:`tarfile-extraction-filter` for details.) ++ The ``'data'`` filter will become the default for tar files ++ in Python 3.14. ++ + .. audit-event:: shutil.unpack_archive filename,extract_dir,format shutil.unpack_archive + + .. warning:: +@@ -646,6 +654,9 @@ provided. They rely on the :mod:`zipfil + .. versionchanged:: 3.7 + Accepts a :term:`path-like object` for *filename* and *extract_dir*. + ++ .. versionchanged:: 3.12 ++ Added the *filter* argument. ++ + .. function:: register_unpack_format(name, extensions, function[, extra_args[, description]]) + + Registers an unpack format. *name* is the name of the format and +@@ -653,11 +664,14 @@ provided. They rely on the :mod:`zipfil + ``.zip`` for Zip files. + + *function* is the callable that will be used to unpack archives. The +- callable will receive the path of the archive, followed by the directory +- the archive must be extracted to. ++ callable will receive: + +- When provided, *extra_args* is a sequence of ``(name, value)`` tuples that +- will be passed as keywords arguments to the callable. ++ - the path of the archive, as a positional argument; ++ - the directory the archive must be extracted to, as a positional argument; ++ - possibly a *filter* keyword argument, if it was given to ++ :func:`unpack_archive`; ++ - additional keyword arguments, specified by *extra_args* as a sequence ++ of ``(name, value)`` tuples. + + *description* can be provided to describe the format, and will be returned + by the :func:`get_unpack_formats` function. +--- a/Doc/library/tarfile.rst ++++ b/Doc/library/tarfile.rst +@@ -36,6 +36,13 @@ Some facts and figures: + .. versionchanged:: 3.3 + Added support for :mod:`lzma` compression. + ++.. versionchanged:: 3.12 ++ Archives are extracted using a :ref:`filter `, ++ which makes it possible to either limit surprising/dangerous features, ++ or to acknowledge that they are expected and the archive is fully trusted. ++ By default, archives are fully trusted, but this default is deprecated ++ and slated to change in Python 3.14. ++ + + .. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, **kwargs) + +@@ -206,6 +213,38 @@ The :mod:`tarfile` module defines the fo + Is raised by :meth:`TarInfo.frombuf` if the buffer it gets is invalid. + + ++.. exception:: FilterError ++ ++ Base class for members :ref:`refused ` by ++ filters. ++ ++ .. attribute:: tarinfo ++ ++ Information about the member that the filter refused to extract, ++ as :ref:`TarInfo `. ++ ++.. exception:: AbsolutePathError ++ ++ Raised to refuse extracting a member with an absolute path. ++ ++.. exception:: OutsideDestinationError ++ ++ Raised to refuse extracting a member outside the destination directory. ++ ++.. exception:: SpecialFileError ++ ++ Raised to refuse extracting a special file (e.g. a device or pipe). ++ ++.. exception:: AbsoluteLinkError ++ ++ Raised to refuse extracting a symbolic link with an absolute path. ++ ++.. exception:: LinkOutsideDestinationError ++ ++ Raised to refuse extracting a symbolic link pointing outside the destination ++ directory. ++ ++ + The following constants are available at the module level: + + .. data:: ENCODING +@@ -316,11 +355,8 @@ be finalized; only the internally used f + *debug* can be set from ``0`` (no debug messages) up to ``3`` (all debug + messages). The messages are written to ``sys.stderr``. + +- If *errorlevel* is ``0``, all errors are ignored when using :meth:`TarFile.extract`. +- Nevertheless, they appear as error messages in the debug output, when debugging +- is enabled. If ``1``, all *fatal* errors are raised as :exc:`OSError` +- exceptions. If ``2``, all *non-fatal* errors are raised as :exc:`TarError` +- exceptions as well. ++ *errorlevel* controls how extraction errors are handled, ++ see :attr:`the corresponding attribute <~TarFile.errorlevel>`. + + The *encoding* and *errors* arguments define the character encoding to be + used for reading or writing the archive and how conversion errors are going +@@ -387,7 +423,7 @@ be finalized; only the internally used f + available. + + +-.. method:: TarFile.extractall(path=".", members=None, *, numeric_owner=False) ++.. method:: TarFile.extractall(path=".", members=None, *, numeric_owner=False, filter=None) + + Extract all members from the archive to the current working directory or + directory *path*. If optional *members* is given, it must be a subset of the +@@ -401,6 +437,12 @@ be finalized; only the internally used f + are used to set the owner/group for the extracted files. Otherwise, the named + values from the tarfile are used. + ++ The *filter* argument specifies how ``members`` are modified or rejected ++ before extraction. ++ See :ref:`tarfile-extraction-filter` for details. ++ It is recommended to set this explicitly depending on which *tar* features ++ you need to support. ++ + .. warning:: + + Never extract archives from untrusted sources without prior inspection. +@@ -408,14 +450,20 @@ be finalized; only the internally used f + that have absolute filenames starting with ``"/"`` or filenames with two + dots ``".."``. + ++ Set ``filter='data'`` to prevent the most dangerous security issues, ++ and read the :ref:`tarfile-extraction-filter` section for details. ++ + .. versionchanged:: 3.5 + Added the *numeric_owner* parameter. + + .. versionchanged:: 3.6 + The *path* parameter accepts a :term:`path-like object`. + ++ .. versionchanged:: 3.12 ++ Added the *filter* parameter. ++ + +-.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False) ++.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False, filter=None) + + Extract a member from the archive to the current working directory, using its + full name. Its file information is extracted as accurately as possible. *member* +@@ -423,9 +471,8 @@ be finalized; only the internally used f + directory using *path*. *path* may be a :term:`path-like object`. + File attributes (owner, mtime, mode) are set unless *set_attrs* is false. + +- If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile +- are used to set the owner/group for the extracted files. Otherwise, the named +- values from the tarfile are used. ++ The *numeric_owner* and *filter* arguments are the same as ++ for :meth:`extractall`. + + .. note:: + +@@ -436,6 +483,9 @@ be finalized; only the internally used f + + See the warning for :meth:`extractall`. + ++ Set ``filter='data'`` to prevent the most dangerous security issues, ++ and read the :ref:`tarfile-extraction-filter` section for details. ++ + .. versionchanged:: 3.2 + Added the *set_attrs* parameter. + +@@ -445,6 +495,9 @@ be finalized; only the internally used f + .. versionchanged:: 3.6 + The *path* parameter accepts a :term:`path-like object`. + ++ .. versionchanged:: 3.12 ++ Added the *filter* parameter. ++ + + .. method:: TarFile.extractfile(member) + +@@ -457,6 +510,55 @@ be finalized; only the internally used f + .. versionchanged:: 3.3 + Return an :class:`io.BufferedReader` object. + ++.. attribute:: TarFile.errorlevel ++ :type: int ++ ++ If *errorlevel* is ``0``, errors are ignored when using :meth:`TarFile.extract` ++ and :meth:`TarFile.extractall`. ++ Nevertheless, they appear as error messages in the debug output when ++ *debug* is greater than 0. ++ If ``1`` (the default), all *fatal* errors are raised as :exc:`OSError` or ++ :exc:`FilterError` exceptions. If ``2``, all *non-fatal* errors are raised ++ as :exc:`TarError` exceptions as well. ++ ++ Some exceptions, e.g. ones caused by wrong argument types or data ++ corruption, are always raised. ++ ++ Custom :ref:`extraction filters ` ++ should raise :exc:`FilterError` for *fatal* errors ++ and :exc:`ExtractError` for *non-fatal* ones. ++ ++ Note that when an exception is raised, the archive may be partially ++ extracted. It is the user’s responsibility to clean up. ++ ++.. attribute:: TarFile.extraction_filter ++ ++ .. versionadded:: 3.12 ++ ++ The :ref:`extraction filter ` used ++ as a default for the *filter* argument of :meth:`~TarFile.extract` ++ and :meth:`~TarFile.extractall`. ++ ++ The attribute may be ``None`` or a callable. ++ String names are not allowed for this attribute, unlike the *filter* ++ argument to :meth:`~TarFile.extract`. ++ ++ If ``extraction_filter`` is ``None`` (the default), ++ calling an extraction method without a *filter* argument will raise a ++ ``DeprecationWarning``, ++ and fall back to the :func:`fully_trusted ` filter, ++ whose dangerous behavior matches previous versions of Python. ++ ++ In Python 3.14+, leaving ``extraction_filter=None`` will cause ++ extraction methods to use the :func:`data ` filter by default. ++ ++ The attribute may be set on instances or overridden in subclasses. ++ It also is possible to set it on the ``TarFile`` class itself to set a ++ global default, although, since it affects all uses of *tarfile*, ++ it is best practice to only do so in top-level applications or ++ :mod:`site configuration `. ++ To set a global default this way, a filter function needs to be wrapped in ++ :func:`staticmethod()` to prevent injection of a ``self`` argument. + + .. method:: TarFile.add(name, arcname=None, recursive=True, *, filter=None) + +@@ -532,8 +634,23 @@ permissions, owner etc.), it provides so + It does *not* contain the file's data itself. + + :class:`TarInfo` objects are returned by :class:`TarFile`'s methods +-:meth:`getmember`, :meth:`getmembers` and :meth:`gettarinfo`. ++:meth:`~TarFile.getmember`, :meth:`~TarFile.getmembers` and ++:meth:`~TarFile.gettarinfo`. + ++Modifying the objects returned by :meth:`~!TarFile.getmember` or ++:meth:`~!TarFile.getmembers` will affect all subsequent ++operations on the archive. ++For cases where this is unwanted, you can use :mod:`copy.copy() ` or ++call the :meth:`~TarInfo.replace` method to create a modified copy in one step. ++ ++Several attributes can be set to ``None`` to indicate that a piece of metadata ++is unused or unknown. ++Different :class:`TarInfo` methods handle ``None`` differently: ++ ++- The :meth:`~TarFile.extract` or :meth:`~TarFile.extractall` methods will ++ ignore the corresponding metadata, leaving it set to a default. ++- :meth:`~TarFile.addfile` will fail. ++- :meth:`~TarFile.list` will print a placeholder string. + + .. class:: TarInfo(name="") + +@@ -566,24 +683,39 @@ A ``TarInfo`` object has the following p + + + .. attribute:: TarInfo.name ++ :type: str + + Name of the archive member. + + + .. attribute:: TarInfo.size ++ :type: int + + Size in bytes. + + + .. attribute:: TarInfo.mtime ++ :type: int | float ++ ++ Time of last modification in seconds since the :ref:`epoch `, ++ as in :attr:`os.stat_result.st_mtime`. + +- Time of last modification. ++ .. versionchanged:: 3.12 + ++ Can be set to ``None`` for :meth:`~TarFile.extract` and ++ :meth:`~TarFile.extractall`, causing extraction to skip applying this ++ attribute. + + .. attribute:: TarInfo.mode ++ :type: int + +- Permission bits. ++ Permission bits, as for :func:`os.chmod`. + ++ .. versionchanged:: 3.12 ++ ++ Can be set to ``None`` for :meth:`~TarFile.extract` and ++ :meth:`~TarFile.extractall`, causing extraction to skip applying this ++ attribute. + + .. attribute:: TarInfo.type + +@@ -595,35 +727,76 @@ A ``TarInfo`` object has the following p + + + .. attribute:: TarInfo.linkname ++ :type: str + + Name of the target file name, which is only present in :class:`TarInfo` objects + of type :const:`LNKTYPE` and :const:`SYMTYPE`. + + + .. attribute:: TarInfo.uid ++ :type: int + + User ID of the user who originally stored this member. + ++ .. versionchanged:: 3.12 ++ ++ Can be set to ``None`` for :meth:`~TarFile.extract` and ++ :meth:`~TarFile.extractall`, causing extraction to skip applying this ++ attribute. + + .. attribute:: TarInfo.gid ++ :type: int + + Group ID of the user who originally stored this member. + ++ .. versionchanged:: 3.12 ++ ++ Can be set to ``None`` for :meth:`~TarFile.extract` and ++ :meth:`~TarFile.extractall`, causing extraction to skip applying this ++ attribute. + + .. attribute:: TarInfo.uname ++ :type: str + + User name. + ++ .. versionchanged:: 3.12 ++ ++ Can be set to ``None`` for :meth:`~TarFile.extract` and ++ :meth:`~TarFile.extractall`, causing extraction to skip applying this ++ attribute. + + .. attribute:: TarInfo.gname ++ :type: str + + Group name. + ++ .. versionchanged:: 3.12 ++ ++ Can be set to ``None`` for :meth:`~TarFile.extract` and ++ :meth:`~TarFile.extractall`, causing extraction to skip applying this ++ attribute. + + .. attribute:: TarInfo.pax_headers ++ :type: dict + + A dictionary containing key-value pairs of an associated pax extended header. + ++.. method:: TarInfo.replace(name=..., mtime=..., mode=..., linkname=..., ++ uid=..., gid=..., uname=..., gname=..., ++ deep=True) ++ ++ .. versionadded:: 3.12 ++ ++ Return a *new* copy of the :class:`!TarInfo` object with the given attributes ++ changed. For example, to return a ``TarInfo`` with the group name set to ++ ``'staff'``, use:: ++ ++ new_tarinfo = old_tarinfo.replace(gname='staff') ++ ++ By default, a deep copy is made. ++ If *deep* is false, the copy is shallow, i.e. ``pax_headers`` ++ and any custom attributes are shared with the original ``TarInfo`` object. + + A :class:`TarInfo` object also provides some convenient query methods: + +@@ -673,9 +846,258 @@ A :class:`TarInfo` object also provides + Return :const:`True` if it is one of character device, block device or FIFO. + + ++.. _tarfile-extraction-filter: ++ ++Extraction filters ++------------------ ++ ++.. versionadded:: 3.12 ++ ++The *tar* format is designed to capture all details of a UNIX-like filesystem, ++which makes it very powerful. ++Unfortunately, the features make it easy to create tar files that have ++unintended -- and possibly malicious -- effects when extracted. ++For example, extracting a tar file can overwrite arbitrary files in various ++ways (e.g. by using absolute paths, ``..`` path components, or symlinks that ++affect later members). ++ ++In most cases, the full functionality is not needed. ++Therefore, *tarfile* supports extraction filters: a mechanism to limit ++functionality, and thus mitigate some of the security issues. ++ ++.. seealso:: ++ ++ :pep:`706` ++ Contains further motivation and rationale behind the design. ++ ++The *filter* argument to :meth:`TarFile.extract` or :meth:`~TarFile.extractall` ++can be: ++ ++* the string ``'fully_trusted'``: Honor all metadata as specified in the ++ archive. ++ Should be used if the user trusts the archive completely, or implements ++ their own complex verification. ++ ++* the string ``'tar'``: Honor most *tar*-specific features (i.e. features of ++ UNIX-like filesystems), but block features that are very likely to be ++ surprising or malicious. See :func:`tar_filter` for details. ++ ++* the string ``'data'``: Ignore or block most features specific to UNIX-like ++ filesystems. Intended for extracting cross-platform data archives. ++ See :func:`data_filter` for details. ++ ++* ``None`` (default): Use :attr:`TarFile.extraction_filter`. ++ ++ If that is also ``None`` (the default), raise a ``DeprecationWarning``, ++ and fall back to the ``'fully_trusted'`` filter, whose dangerous behavior ++ matches previous versions of Python. ++ ++ In Python 3.14, the ``'data'`` filter will become the default instead. ++ It's possible to switch earlier; see :attr:`TarFile.extraction_filter`. ++ ++* A callable which will be called for each extracted member with a ++ :ref:`TarInfo ` describing the member and the destination ++ path to where the archive is extracted (i.e. the same path is used for all ++ members):: ++ ++ filter(/, member: TarInfo, path: str) -> TarInfo | None ++ ++ The callable is called just before each member is extracted, so it can ++ take the current state of the disk into account. ++ It can: ++ ++ - return a :class:`TarInfo` object which will be used instead of the metadata ++ in the archive, or ++ - return ``None``, in which case the member will be skipped, or ++ - raise an exception to abort the operation or skip the member, ++ depending on :attr:`~TarFile.errorlevel`. ++ Note that when extraction is aborted, :meth:`~TarFile.extractall` may leave ++ the archive partially extracted. It does not attempt to clean up. ++ ++Default named filters ++~~~~~~~~~~~~~~~~~~~~~ ++ ++The pre-defined, named filters are available as functions, so they can be ++reused in custom filters: ++ ++.. function:: fully_trusted_filter(/, member, path) ++ ++ Return *member* unchanged. ++ ++ This implements the ``'fully_trusted'`` filter. ++ ++.. function:: tar_filter(/, member, path) ++ ++ Implements the ``'tar'`` filter. ++ ++ - Strip leading slashes (``/`` and :attr:`os.sep`) from filenames. ++ - :ref:`Refuse ` to extract files with absolute ++ paths (in case the name is absolute ++ even after stripping slashes, e.g. ``C:/foo`` on Windows). ++ This raises :class:`~tarfile.AbsolutePathError`. ++ - :ref:`Refuse ` to extract files whose absolute ++ path (after following symlinks) would end up outside the destination. ++ This raises :class:`~tarfile.OutsideDestinationError`. ++ - Clear high mode bits (setuid, setgid, sticky) and group/other write bits ++ (:attr:`~stat.S_IWGRP`|:attr:`~stat.S_IWOTH`). ++ ++ Return the modified ``TarInfo`` member. ++ ++.. function:: data_filter(/, member, path) ++ ++ Implements the ``'data'`` filter. ++ In addition to what ``tar_filter`` does: ++ ++ - :ref:`Refuse ` to extract links (hard or soft) ++ that link to absolute paths, or ones that link outside the destination. ++ ++ This raises :class:`~tarfile.AbsoluteLinkError` or ++ :class:`~tarfile.LinkOutsideDestinationError`. ++ ++ Note that such files are refused even on platforms that do not support ++ symbolic links. ++ ++ - :ref:`Refuse ` to extract device files ++ (including pipes). ++ This raises :class:`~tarfile.SpecialFileError`. ++ ++ - For regular files, including hard links: ++ ++ - Set the owner read and write permissions ++ (:attr:`~stat.S_IRUSR`|:attr:`~stat.S_IWUSR`). ++ - Remove the group & other executable permission ++ (:attr:`~stat.S_IXGRP`|:attr:`~stat.S_IXOTH`) ++ if the owner doesn’t have it (:attr:`~stat.S_IXUSR`). ++ ++ - For other files (directories), set ``mode`` to ``None``, so ++ that extraction methods skip applying permission bits. ++ - Set user and group info (``uid``, ``gid``, ``uname``, ``gname``) ++ to ``None``, so that extraction methods skip setting it. ++ ++ Return the modified ``TarInfo`` member. ++ ++ ++.. _tarfile-extraction-refuse: ++ ++Filter errors ++~~~~~~~~~~~~~ ++ ++When a filter refuses to extract a file, it will raise an appropriate exception, ++a subclass of :class:`~tarfile.FilterError`. ++This will abort the extraction if :attr:`TarFile.errorlevel` is 1 or more. ++With ``errorlevel=0`` the error will be logged and the member will be skipped, ++but extraction will continue. ++ ++ ++Hints for further verification ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++Even with ``filter='data'``, *tarfile* is not suited for extracting untrusted ++files without prior inspection. ++Among other issues, the pre-defined filters do not prevent denial-of-service ++attacks. Users should do additional checks. ++ ++Here is an incomplete list of things to consider: ++ ++* Extract to a :func:`new temporary directory ` ++ to prevent e.g. exploiting pre-existing links, and to make it easier to ++ clean up after a failed extraction. ++* When working with untrusted data, use external (e.g. OS-level) limits on ++ disk, memory and CPU usage. ++* Check filenames against an allow-list of characters ++ (to filter out control characters, confusables, foreign path separators, ++ etc.). ++* Check that filenames have expected extensions (discouraging files that ++ execute when you “click on them”, or extension-less files like Windows special device names). ++* Limit the number of extracted files, total size of extracted data, ++ filename length (including symlink length), and size of individual files. ++* Check for files that would be shadowed on case-insensitive filesystems. ++ ++Also note that: ++ ++* Tar files may contain multiple versions of the same file. ++ Later ones are expected to overwrite any earlier ones. ++ This feature is crucial to allow updating tape archives, but can be abused ++ maliciously. ++* *tarfile* does not protect against issues with “live” data, ++ e.g. an attacker tinkering with the destination (or source) directory while ++ extraction (or archiving) is in progress. ++ ++ ++Supporting older Python versions ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++Extraction filters were added to Python 3.12, but may be backported to older ++versions as security updates. ++To check whether the feature is available, use e.g. ++``hasattr(tarfile, 'data_filter')`` rather than checking the Python version. ++ ++The following examples show how to support Python versions with and without ++the feature. ++Note that setting ``extraction_filter`` will affect any subsequent operations. ++ ++* Fully trusted archive:: ++ ++ my_tarfile.extraction_filter = (lambda member, path: member) ++ my_tarfile.extractall() ++ ++* Use the ``'data'`` filter if available, but revert to Python 3.11 behavior ++ (``'fully_trusted'``) if this feature is not available:: ++ ++ my_tarfile.extraction_filter = getattr(tarfile, 'data_filter', ++ (lambda member, path: member)) ++ my_tarfile.extractall() ++ ++* Use the ``'data'`` filter; *fail* if it is not available:: ++ ++ my_tarfile.extractall(filter=tarfile.data_filter) ++ ++ or:: ++ ++ my_tarfile.extraction_filter = tarfile.data_filter ++ my_tarfile.extractall() ++ ++* Use the ``'data'`` filter; *warn* if it is not available:: ++ ++ if hasattr(tarfile, 'data_filter'): ++ my_tarfile.extractall(filter='data') ++ else: ++ # remove this when no longer needed ++ warn_the_user('Extracting may be unsafe; consider updating Python') ++ my_tarfile.extractall() ++ ++ ++Stateful extraction filter example ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++While *tarfile*'s extraction methods take a simple *filter* callable, ++custom filters may be more complex objects with an internal state. ++It may be useful to write these as context managers, to be used like this:: ++ ++ with StatefulFilter() as filter_func: ++ tar.extractall(path, filter=filter_func) ++ ++Such a filter can be written as, for example:: ++ ++ class StatefulFilter: ++ def __init__(self): ++ self.file_count = 0 ++ ++ def __enter__(self): ++ return self ++ ++ def __call__(self, member, path): ++ self.file_count += 1 ++ return member ++ ++ def __exit__(self, *exc_info): ++ print(f'{self.file_count} files extracted') ++ ++ + .. _tarfile-commandline: + .. program:: tarfile + ++ + Command-Line Interface + ---------------------- + +@@ -745,6 +1167,13 @@ Command-line options + + Verbose output. + ++.. cmdoption:: --filter ++ ++ Specifies the *filter* for ``--extract``. ++ See :ref:`tarfile-extraction-filter` for details. ++ Only string names are accepted (that is, ``fully_trusted``, ``tar``, ++ and ``data``). ++ + .. _tar-examples: + + Examples +@@ -754,7 +1183,7 @@ How to extract an entire tar archive to + + import tarfile + tar = tarfile.open("sample.tar.gz") +- tar.extractall() ++ tar.extractall(filter='data') + tar.close() + + How to extract a subset of a tar archive with :meth:`TarFile.extractall` using +--- a/Lib/shutil.py ++++ b/Lib/shutil.py +@@ -1222,7 +1222,7 @@ def _unpack_zipfile(filename, extract_di + finally: + zip.close() + +-def _unpack_tarfile(filename, extract_dir): ++def _unpack_tarfile(filename, extract_dir, *, filter=None): + """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir` + """ + import tarfile # late import for breaking circular dependency +@@ -1232,7 +1232,7 @@ def _unpack_tarfile(filename, extract_di + raise ReadError( + "%s is not a compressed or uncompressed tar file" % filename) + try: +- tarobj.extractall(extract_dir) ++ tarobj.extractall(extract_dir, filter=filter) + finally: + tarobj.close() + +@@ -1265,7 +1265,7 @@ def _find_unpack_format(filename): + return name + return None + +-def unpack_archive(filename, extract_dir=None, format=None): ++def unpack_archive(filename, extract_dir=None, format=None, *, filter=None): + """Unpack an archive. + + `filename` is the name of the archive. +@@ -1279,6 +1279,9 @@ def unpack_archive(filename, extract_dir + was registered for that extension. + + In case none is found, a ValueError is raised. ++ ++ If `filter` is given, it is passed to the underlying ++ extraction function. + """ + sys.audit("shutil.unpack_archive", filename, extract_dir, format) + +@@ -1288,6 +1291,10 @@ def unpack_archive(filename, extract_dir + extract_dir = os.fspath(extract_dir) + filename = os.fspath(filename) + ++ if filter is None: ++ filter_kwargs = {} ++ else: ++ filter_kwargs = {'filter': filter} + if format is not None: + try: + format_info = _UNPACK_FORMATS[format] +@@ -1295,7 +1302,7 @@ def unpack_archive(filename, extract_dir + raise ValueError("Unknown unpack format '{0}'".format(format)) from None + + func = format_info[1] +- func(filename, extract_dir, **dict(format_info[2])) ++ func(filename, extract_dir, **dict(format_info[2]), **filter_kwargs) + else: + # we need to look at the registered unpackers supported extensions + format = _find_unpack_format(filename) +@@ -1303,7 +1310,7 @@ def unpack_archive(filename, extract_dir + raise ReadError("Unknown archive format '{0}'".format(filename)) + + func = _UNPACK_FORMATS[format][1] +- kwargs = dict(_UNPACK_FORMATS[format][2]) ++ kwargs = dict(_UNPACK_FORMATS[format][2]) | filter_kwargs + func(filename, extract_dir, **kwargs) + + +--- a/Lib/tarfile.py ++++ b/Lib/tarfile.py +@@ -46,6 +46,7 @@ import time + import struct + import copy + import re ++import warnings + + try: + import pwd +@@ -69,7 +70,11 @@ except NameError: + __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError", + "CompressionError", "StreamError", "ExtractError", "HeaderError", + "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT", +- "DEFAULT_FORMAT", "open"] ++ "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter", ++ "tar_filter", "FilterError", "AbsoluteLinkError", ++ "OutsideDestinationError", "SpecialFileError", "AbsolutePathError", ++ "LinkOutsideDestinationError"] ++ + + #--------------------------------------------------------- + # tar constants +@@ -158,6 +163,8 @@ else: + def stn(s, length, encoding, errors): + """Convert a string to a null-terminated bytes object. + """ ++ if s is None: ++ raise ValueError("metadata cannot contain None") + s = s.encode(encoding, errors) + return s[:length] + (length - len(s)) * NUL + +@@ -709,9 +716,127 @@ class ExFileObject(io.BufferedReader): + super().__init__(fileobj) + #class ExFileObject + ++ ++#----------------------------- ++# extraction filters (PEP 706) ++#----------------------------- ++ ++class FilterError(TarError): ++ pass ++ ++class AbsolutePathError(FilterError): ++ def __init__(self, tarinfo): ++ self.tarinfo = tarinfo ++ super().__init__(f'member {tarinfo.name!r} has an absolute path') ++ ++class OutsideDestinationError(FilterError): ++ def __init__(self, tarinfo, path): ++ self.tarinfo = tarinfo ++ self._path = path ++ super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, ' ++ + 'which is outside the destination') ++ ++class SpecialFileError(FilterError): ++ def __init__(self, tarinfo): ++ self.tarinfo = tarinfo ++ super().__init__(f'{tarinfo.name!r} is a special file') ++ ++class AbsoluteLinkError(FilterError): ++ def __init__(self, tarinfo): ++ self.tarinfo = tarinfo ++ super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path') ++ ++class LinkOutsideDestinationError(FilterError): ++ def __init__(self, tarinfo, path): ++ self.tarinfo = tarinfo ++ self._path = path ++ super().__init__(f'{tarinfo.name!r} would link to {path!r}, ' ++ + 'which is outside the destination') ++ ++def _get_filtered_attrs(member, dest_path, for_data=True): ++ new_attrs = {} ++ name = member.name ++ dest_path = os.path.realpath(dest_path) ++ # Strip leading / (tar's directory separator) from filenames. ++ # Include os.sep (target OS directory separator) as well. ++ if name.startswith(('/', os.sep)): ++ name = new_attrs['name'] = member.path.lstrip('/' + os.sep) ++ if os.path.isabs(name): ++ # Path is absolute even after stripping. ++ # For example, 'C:/foo' on Windows. ++ raise AbsolutePathError(member) ++ # Ensure we stay in the destination ++ target_path = os.path.realpath(os.path.join(dest_path, name)) ++ if os.path.commonpath([target_path, dest_path]) != dest_path: ++ raise OutsideDestinationError(member, target_path) ++ # Limit permissions (no high bits, and go-w) ++ mode = member.mode ++ if mode is not None: ++ # Strip high bits & group/other write bits ++ mode = mode & 0o755 ++ if for_data: ++ # For data, handle permissions & file types ++ if member.isreg() or member.islnk(): ++ if not mode & 0o100: ++ # Clear executable bits if not executable by user ++ mode &= ~0o111 ++ # Ensure owner can read & write ++ mode |= 0o600 ++ elif member.isdir() or member.issym(): ++ # Ignore mode for directories & symlinks ++ mode = None ++ else: ++ # Reject special files ++ raise SpecialFileError(member) ++ if mode != member.mode: ++ new_attrs['mode'] = mode ++ if for_data: ++ # Ignore ownership for 'data' ++ if member.uid is not None: ++ new_attrs['uid'] = None ++ if member.gid is not None: ++ new_attrs['gid'] = None ++ if member.uname is not None: ++ new_attrs['uname'] = None ++ if member.gname is not None: ++ new_attrs['gname'] = None ++ # Check link destination for 'data' ++ if member.islnk() or member.issym(): ++ if os.path.isabs(member.linkname): ++ raise AbsoluteLinkError(member) ++ target_path = os.path.realpath(os.path.join(dest_path, member.linkname)) ++ if os.path.commonpath([target_path, dest_path]) != dest_path: ++ raise LinkOutsideDestinationError(member, target_path) ++ return new_attrs ++ ++def fully_trusted_filter(member, dest_path): ++ return member ++ ++def tar_filter(member, dest_path): ++ new_attrs = _get_filtered_attrs(member, dest_path, False) ++ if new_attrs: ++ return member.replace(**new_attrs, deep=False) ++ return member ++ ++def data_filter(member, dest_path): ++ new_attrs = _get_filtered_attrs(member, dest_path, True) ++ if new_attrs: ++ return member.replace(**new_attrs, deep=False) ++ return member ++ ++_NAMED_FILTERS = { ++ "fully_trusted": fully_trusted_filter, ++ "tar": tar_filter, ++ "data": data_filter, ++} ++ + #------------------ + # Exported Classes + #------------------ ++ ++# Sentinel for replace() defaults, meaning "don't change the attribute" ++_KEEP = object() ++ + class TarInfo(object): + """Informational class which holds the details about an + archive member given by a tar header block. +@@ -792,12 +917,44 @@ class TarInfo(object): + def __repr__(self): + return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) + ++ def replace(self, *, ++ name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP, ++ uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP, ++ deep=True, _KEEP=_KEEP): ++ """Return a deep copy of self with the given attributes replaced. ++ """ ++ if deep: ++ result = copy.deepcopy(self) ++ else: ++ result = copy.copy(self) ++ if name is not _KEEP: ++ result.name = name ++ if mtime is not _KEEP: ++ result.mtime = mtime ++ if mode is not _KEEP: ++ result.mode = mode ++ if linkname is not _KEEP: ++ result.linkname = linkname ++ if uid is not _KEEP: ++ result.uid = uid ++ if gid is not _KEEP: ++ result.gid = gid ++ if uname is not _KEEP: ++ result.uname = uname ++ if gname is not _KEEP: ++ result.gname = gname ++ return result ++ + def get_info(self): + """Return the TarInfo's attributes as a dictionary. + """ ++ if self.mode is None: ++ mode = None ++ else: ++ mode = self.mode & 0o7777 + info = { + "name": self.name, +- "mode": self.mode & 0o7777, ++ "mode": mode, + "uid": self.uid, + "gid": self.gid, + "size": self.size, +@@ -820,6 +977,9 @@ class TarInfo(object): + """Return a tar header as a string of 512 byte blocks. + """ + info = self.get_info() ++ for name, value in info.items(): ++ if value is None: ++ raise ValueError("%s may not be None" % name) + + if format == USTAR_FORMAT: + return self.create_ustar_header(info, encoding, errors) +@@ -950,6 +1110,12 @@ class TarInfo(object): + devmajor = stn("", 8, encoding, errors) + devminor = stn("", 8, encoding, errors) + ++ # None values in metadata should cause ValueError. ++ # itn()/stn() do this for all fields except type. ++ filetype = info.get("type", REGTYPE) ++ if filetype is None: ++ raise ValueError("TarInfo.type must not be None") ++ + parts = [ + stn(info.get("name", ""), 100, encoding, errors), + itn(info.get("mode", 0) & 0o7777, 8, format), +@@ -958,7 +1124,7 @@ class TarInfo(object): + itn(info.get("size", 0), 12, format), + itn(info.get("mtime", 0), 12, format), + b" ", # checksum field +- info.get("type", REGTYPE), ++ filetype, + stn(info.get("linkname", ""), 100, encoding, errors), + info.get("magic", POSIX_MAGIC), + stn(info.get("uname", ""), 32, encoding, errors), +@@ -1468,6 +1634,8 @@ class TarFile(object): + + fileobject = ExFileObject # The file-object for extractfile(). + ++ extraction_filter = None # The default filter for extraction. ++ + def __init__(self, name=None, mode="r", fileobj=None, format=None, + tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, + errors="surrogateescape", pax_headers=None, debug=None, +@@ -1940,7 +2108,10 @@ class TarFile(object): + members = self + for tarinfo in members: + if verbose: +- _safe_print(stat.filemode(tarinfo.mode)) ++ if tarinfo.mode is None: ++ _safe_print("??????????") ++ else: ++ _safe_print(stat.filemode(tarinfo.mode)) + _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid, + tarinfo.gname or tarinfo.gid)) + if tarinfo.ischr() or tarinfo.isblk(): +@@ -1948,8 +2119,11 @@ class TarFile(object): + ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor))) + else: + _safe_print("%10d" % tarinfo.size) +- _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ +- % time.localtime(tarinfo.mtime)[:6]) ++ if tarinfo.mtime is None: ++ _safe_print("????-??-?? ??:??:??") ++ else: ++ _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ ++ % time.localtime(tarinfo.mtime)[:6]) + + _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else "")) + +@@ -2036,32 +2210,63 @@ class TarFile(object): + + self.members.append(tarinfo) + +- def extractall(self, path=".", members=None, *, numeric_owner=False): ++ def _get_filter_function(self, filter): ++ if filter is None: ++ filter = self.extraction_filter ++ if filter is None: ++ warnings.warn( ++ 'Python 3.14 will, by default, filter extracted tar ' ++ + 'archives and reject files or modify their metadata. ' ++ + 'Use the filter argument to control this behavior.', ++ DeprecationWarning) ++ return fully_trusted_filter ++ if isinstance(filter, str): ++ raise TypeError( ++ 'String names are not supported for ' ++ + 'TarFile.extraction_filter. Use a function such as ' ++ + 'tarfile.data_filter directly.') ++ return filter ++ if callable(filter): ++ return filter ++ try: ++ return _NAMED_FILTERS[filter] ++ except KeyError: ++ raise ValueError(f"filter {filter!r} not found") from None ++ ++ def extractall(self, path=".", members=None, *, numeric_owner=False, ++ filter=None): + """Extract all members from the archive to the current working + directory and set owner, modification time and permissions on + directories afterwards. `path' specifies a different directory + to extract to. `members' is optional and must be a subset of the + list returned by getmembers(). If `numeric_owner` is True, only + the numbers for user/group names are used and not the names. ++ ++ The `filter` function will be called on each member just ++ before extraction. ++ It can return a changed TarInfo or None to skip the member. ++ String names of common filters are accepted. + """ + directories = [] + ++ filter_function = self._get_filter_function(filter) + if members is None: + members = self + +- for tarinfo in members: ++ for member in members: ++ tarinfo = self._get_extract_tarinfo(member, filter_function, path) ++ if tarinfo is None: ++ continue + if tarinfo.isdir(): +- # Extract directories with a safe mode. ++ # For directories, delay setting attributes until later, ++ # since permissions can interfere with extraction and ++ # extracting contents can reset mtime. + directories.append(tarinfo) +- tarinfo = copy.copy(tarinfo) +- tarinfo.mode = 0o700 +- # Do not set_attrs directories, as we will do that further down +- self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(), +- numeric_owner=numeric_owner) ++ self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), ++ numeric_owner=numeric_owner) + + # Reverse sort directories. +- directories.sort(key=lambda a: a.name) +- directories.reverse() ++ directories.sort(key=lambda a: a.name, reverse=True) + + # Set correct owner, mtime and filemode on directories. + for tarinfo in directories: +@@ -2071,12 +2276,10 @@ class TarFile(object): + self.utime(tarinfo, dirpath) + self.chmod(tarinfo, dirpath) + except ExtractError as e: +- if self.errorlevel > 1: +- raise +- else: +- self._dbg(1, "tarfile: %s" % e) ++ self._handle_nonfatal_error(e) + +- def extract(self, member, path="", set_attrs=True, *, numeric_owner=False): ++ def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, ++ filter=None): + """Extract a member from the archive to the current working directory, + using its full name. Its file information is extracted as accurately + as possible. `member' may be a filename or a TarInfo object. You can +@@ -2084,35 +2287,70 @@ class TarFile(object): + mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` + is True, only the numbers for user/group names are used and not + the names. ++ ++ The `filter` function will be called before extraction. ++ It can return a changed TarInfo or None to skip the member. ++ String names of common filters are accepted. + """ +- self._check("r") ++ filter_function = self._get_filter_function(filter) ++ tarinfo = self._get_extract_tarinfo(member, filter_function, path) ++ if tarinfo is not None: ++ self._extract_one(tarinfo, path, set_attrs, numeric_owner) + ++ def _get_extract_tarinfo(self, member, filter_function, path): ++ """Get filtered TarInfo (or None) from member, which might be a str""" + if isinstance(member, str): + tarinfo = self.getmember(member) + else: + tarinfo = member + ++ unfiltered = tarinfo ++ try: ++ tarinfo = filter_function(tarinfo, path) ++ except (OSError, FilterError) as e: ++ self._handle_fatal_error(e) ++ except ExtractError as e: ++ self._handle_nonfatal_error(e) ++ if tarinfo is None: ++ self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) ++ return None + # Prepare the link target for makelink(). + if tarinfo.islnk(): ++ tarinfo = copy.copy(tarinfo) + tarinfo._link_target = os.path.join(path, tarinfo.linkname) ++ return tarinfo ++ ++ def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): ++ """Extract from filtered tarinfo to disk""" ++ self._check("r") + + try: + self._extract_member(tarinfo, os.path.join(path, tarinfo.name), + set_attrs=set_attrs, + numeric_owner=numeric_owner) + except OSError as e: +- if self.errorlevel > 0: +- raise +- else: +- if e.filename is None: +- self._dbg(1, "tarfile: %s" % e.strerror) +- else: +- self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) ++ self._handle_fatal_error(e) + except ExtractError as e: +- if self.errorlevel > 1: +- raise ++ self._handle_nonfatal_error(e) ++ ++ def _handle_nonfatal_error(self, e): ++ """Handle non-fatal error (ExtractError) according to errorlevel""" ++ if self.errorlevel > 1: ++ raise ++ else: ++ self._dbg(1, "tarfile: %s" % e) ++ ++ def _handle_fatal_error(self, e): ++ """Handle "fatal" error according to self.errorlevel""" ++ if self.errorlevel > 0: ++ raise ++ elif isinstance(e, OSError): ++ if e.filename is None: ++ self._dbg(1, "tarfile: %s" % e.strerror) + else: +- self._dbg(1, "tarfile: %s" % e) ++ self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) ++ else: ++ self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e)) + + def extractfile(self, member): + """Extract a member from the archive as a file object. `member' may be +@@ -2199,9 +2437,13 @@ class TarFile(object): + """Make a directory called targetpath. + """ + try: +- # Use a safe mode for the directory, the real mode is set +- # later in _extract_member(). +- os.mkdir(targetpath, 0o700) ++ if tarinfo.mode is None: ++ # Use the system's default mode ++ os.mkdir(targetpath) ++ else: ++ # Use a safe mode for the directory, the real mode is set ++ # later in _extract_member(). ++ os.mkdir(targetpath, 0o700) + except FileExistsError: + pass + +@@ -2244,6 +2486,9 @@ class TarFile(object): + raise ExtractError("special devices not supported by system") + + mode = tarinfo.mode ++ if mode is None: ++ # Use mknod's default ++ mode = 0o600 + if tarinfo.isblk(): + mode |= stat.S_IFBLK + else: +@@ -2265,7 +2510,6 @@ class TarFile(object): + os.unlink(targetpath) + os.symlink(tarinfo.linkname, targetpath) + else: +- # See extract(). + if os.path.exists(tarinfo._link_target): + os.link(tarinfo._link_target, targetpath) + else: +@@ -2290,15 +2534,19 @@ class TarFile(object): + u = tarinfo.uid + if not numeric_owner: + try: +- if grp: ++ if grp and tarinfo.gname: + g = grp.getgrnam(tarinfo.gname)[2] + except KeyError: + pass + try: +- if pwd: ++ if pwd and tarinfo.uname: + u = pwd.getpwnam(tarinfo.uname)[2] + except KeyError: + pass ++ if g is None: ++ g = -1 ++ if u is None: ++ u = -1 + try: + if tarinfo.issym() and hasattr(os, "lchown"): + os.lchown(targetpath, u, g) +@@ -2310,6 +2558,8 @@ class TarFile(object): + def chmod(self, tarinfo, targetpath): + """Set file permissions of targetpath according to tarinfo. + """ ++ if tarinfo.mode is None: ++ return + try: + os.chmod(targetpath, tarinfo.mode) + except OSError as e: +@@ -2318,10 +2568,13 @@ class TarFile(object): + def utime(self, tarinfo, targetpath): + """Set modification time of targetpath according to tarinfo. + """ ++ mtime = tarinfo.mtime ++ if mtime is None: ++ return + if not hasattr(os, 'utime'): + return + try: +- os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) ++ os.utime(targetpath, (mtime, mtime)) + except OSError as e: + raise ExtractError("could not change modification time") from e + +@@ -2397,13 +2650,26 @@ class TarFile(object): + members = self.getmembers() + + # Limit the member search list up to tarinfo. ++ skipping = False + if tarinfo is not None: +- members = members[:members.index(tarinfo)] ++ try: ++ index = members.index(tarinfo) ++ except ValueError: ++ # The given starting point might be a (modified) copy. ++ # We'll later skip members until we find an equivalent. ++ skipping = True ++ else: ++ # Happy fast path ++ members = members[:index] + + if normalize: + name = os.path.normpath(name) + + for member in reversed(members): ++ if skipping: ++ if tarinfo.offset == member.offset: ++ skipping = False ++ continue + if normalize: + member_name = os.path.normpath(member.name) + else: +@@ -2412,6 +2678,10 @@ class TarFile(object): + if name == member_name: + return member + ++ if skipping: ++ # Starting point was not found ++ raise ValueError(tarinfo) ++ + def _load(self): + """Read through the entire archive file and look for readable + members. +@@ -2504,6 +2774,7 @@ class TarFile(object): + #-------------------- + # exported functions + #-------------------- ++ + def is_tarfile(name): + """Return True if name points to a tar archive that we + are able to handle, else return False. +@@ -2530,6 +2801,10 @@ def main(): + parser = argparse.ArgumentParser(description=description) + parser.add_argument('-v', '--verbose', action='store_true', default=False, + help='Verbose output') ++ parser.add_argument('--filter', metavar='', ++ choices=_NAMED_FILTERS, ++ help='Filter for extraction') ++ + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('-l', '--list', metavar='', + help='Show listing of a tarfile') +@@ -2541,8 +2816,12 @@ def main(): + help='Create tarfile from sources') + group.add_argument('-t', '--test', metavar='', + help='Test if a tarfile is valid') ++ + args = parser.parse_args() + ++ if args.filter and args.extract is None: ++ parser.exit(1, '--filter is only valid for extraction\n') ++ + if args.test is not None: + src = args.test + if is_tarfile(src): +@@ -2573,7 +2852,7 @@ def main(): + + if is_tarfile(src): + with TarFile.open(src, 'r:*') as tf: +- tf.extractall(path=curdir) ++ tf.extractall(path=curdir, filter=args.filter) + if args.verbose: + if curdir == '.': + msg = '{!r} file is extracted.'.format(src) +--- a/Lib/test/test_shutil.py ++++ b/Lib/test/test_shutil.py +@@ -32,6 +32,7 @@ except ImportError: + from test import support + from test.support import os_helper + from test.support.os_helper import TESTFN, FakePath ++from test.support import warnings_helper + + TESTFN2 = TESTFN + "2" + TESTFN_SRC = TESTFN + "_SRC" +@@ -1610,12 +1611,14 @@ class TestArchives(BaseTest, unittest.Te + + ### shutil.unpack_archive + +- def check_unpack_archive(self, format): +- self.check_unpack_archive_with_converter(format, lambda path: path) +- self.check_unpack_archive_with_converter(format, pathlib.Path) +- self.check_unpack_archive_with_converter(format, FakePath) ++ def check_unpack_archive(self, format, **kwargs): ++ self.check_unpack_archive_with_converter( ++ format, lambda path: path, **kwargs) ++ self.check_unpack_archive_with_converter( ++ format, pathlib.Path, **kwargs) ++ self.check_unpack_archive_with_converter(format, FakePath, **kwargs) + +- def check_unpack_archive_with_converter(self, format, converter): ++ def check_unpack_archive_with_converter(self, format, converter, **kwargs): + root_dir, base_dir = self._create_files() + expected = rlistdir(root_dir) + expected.remove('outer') +@@ -1625,36 +1628,48 @@ class TestArchives(BaseTest, unittest.Te + + # let's try to unpack it now + tmpdir2 = self.mkdtemp() +- unpack_archive(converter(filename), converter(tmpdir2)) ++ unpack_archive(converter(filename), converter(tmpdir2), **kwargs) + self.assertEqual(rlistdir(tmpdir2), expected) + + # and again, this time with the format specified + tmpdir3 = self.mkdtemp() +- unpack_archive(converter(filename), converter(tmpdir3), format=format) ++ unpack_archive(converter(filename), converter(tmpdir3), format=format, ++ **kwargs) + self.assertEqual(rlistdir(tmpdir3), expected) + +- self.assertRaises(shutil.ReadError, unpack_archive, converter(TESTFN)) +- self.assertRaises(ValueError, unpack_archive, converter(TESTFN), format='xxx') ++ with self.assertRaises(shutil.ReadError): ++ unpack_archive(converter(TESTFN), **kwargs) ++ with self.assertRaises(ValueError): ++ unpack_archive(converter(TESTFN), format='xxx', **kwargs) ++ ++ def check_unpack_tarball(self, format): ++ self.check_unpack_archive(format, filter='fully_trusted') ++ self.check_unpack_archive(format, filter='data') ++ with warnings_helper.check_warnings( ++ ('Python 3.14', DeprecationWarning)): ++ self.check_unpack_archive(format) + + def test_unpack_archive_tar(self): +- self.check_unpack_archive('tar') ++ self.check_unpack_tarball('tar') + + @support.requires_zlib() + def test_unpack_archive_gztar(self): +- self.check_unpack_archive('gztar') ++ self.check_unpack_tarball('gztar') + + @support.requires_bz2() + def test_unpack_archive_bztar(self): +- self.check_unpack_archive('bztar') ++ self.check_unpack_tarball('bztar') + + @support.requires_lzma() + @unittest.skipIf(AIX and not _maxdataOK(), "AIX MAXDATA must be 0x20000000 or larger") + def test_unpack_archive_xztar(self): +- self.check_unpack_archive('xztar') ++ self.check_unpack_tarball('xztar') + + @support.requires_zlib() + def test_unpack_archive_zip(self): + self.check_unpack_archive('zip') ++ with self.assertRaises(TypeError): ++ self.check_unpack_archive('zip', filter='data') + + def test_unpack_registry(self): + +--- a/Lib/test/test_tarfile.py ++++ b/Lib/test/test_tarfile.py +@@ -2,9 +2,13 @@ import sys + import os + import io + from hashlib import sha256 +-from contextlib import contextmanager ++from contextlib import contextmanager, ExitStack + from random import Random + import pathlib ++import shutil ++import re ++import warnings ++import stat + + import unittest + import unittest.mock +@@ -13,6 +17,7 @@ import tarfile + from test import support + from test.support import os_helper + from test.support import script_helper ++from test.support import warnings_helper + + # Check for our compression modules. + try: +@@ -108,7 +113,7 @@ class UstarReadTest(ReadTest, unittest.T + "regular file extraction failed") + + def test_fileobj_readlines(self): +- self.tar.extract("ustar/regtype", TEMPDIR) ++ self.tar.extract("ustar/regtype", TEMPDIR, filter='data') + tarinfo = self.tar.getmember("ustar/regtype") + with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1: + lines1 = fobj1.readlines() +@@ -126,7 +131,7 @@ class UstarReadTest(ReadTest, unittest.T + "fileobj.readlines() failed") + + def test_fileobj_iter(self): +- self.tar.extract("ustar/regtype", TEMPDIR) ++ self.tar.extract("ustar/regtype", TEMPDIR, filter='data') + tarinfo = self.tar.getmember("ustar/regtype") + with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1: + lines1 = fobj1.readlines() +@@ -136,7 +141,8 @@ class UstarReadTest(ReadTest, unittest.T + "fileobj.__iter__() failed") + + def test_fileobj_seek(self): +- self.tar.extract("ustar/regtype", TEMPDIR) ++ self.tar.extract("ustar/regtype", TEMPDIR, ++ filter='data') + with open(os.path.join(TEMPDIR, "ustar/regtype"), "rb") as fobj: + data = fobj.read() + +@@ -454,7 +460,7 @@ class CommonReadTest(ReadTest): + t = tar.next() + + with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): +- tar.extract(t, TEMPDIR) ++ tar.extract(t, TEMPDIR, filter='data') + + with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): + tar.extractfile(t).read() +@@ -609,16 +615,16 @@ class MiscReadTestBase(CommonReadTest): + def test_extract_hardlink(self): + # Test hardlink extraction (e.g. bug #857297). + with tarfile.open(tarname, errorlevel=1, encoding="iso8859-1") as tar: +- tar.extract("ustar/regtype", TEMPDIR) ++ tar.extract("ustar/regtype", TEMPDIR, filter='data') + self.addCleanup(os_helper.unlink, os.path.join(TEMPDIR, "ustar/regtype")) + +- tar.extract("ustar/lnktype", TEMPDIR) ++ tar.extract("ustar/lnktype", TEMPDIR, filter='data') + self.addCleanup(os_helper.unlink, os.path.join(TEMPDIR, "ustar/lnktype")) + with open(os.path.join(TEMPDIR, "ustar/lnktype"), "rb") as f: + data = f.read() + self.assertEqual(sha256sum(data), sha256_regtype) + +- tar.extract("ustar/symtype", TEMPDIR) ++ tar.extract("ustar/symtype", TEMPDIR, filter='data') + self.addCleanup(os_helper.unlink, os.path.join(TEMPDIR, "ustar/symtype")) + with open(os.path.join(TEMPDIR, "ustar/symtype"), "rb") as f: + data = f.read() +@@ -632,13 +638,14 @@ class MiscReadTestBase(CommonReadTest): + os.mkdir(DIR) + try: + directories = [t for t in tar if t.isdir()] +- tar.extractall(DIR, directories) ++ tar.extractall(DIR, directories, filter='fully_trusted') + for tarinfo in directories: + path = os.path.join(DIR, tarinfo.name) + if sys.platform != "win32": + # Win32 has no support for fine grained permissions. + self.assertEqual(tarinfo.mode & 0o777, +- os.stat(path).st_mode & 0o777) ++ os.stat(path).st_mode & 0o777, ++ tarinfo.name) + def format_mtime(mtime): + if isinstance(mtime, float): + return "{} ({})".format(mtime, mtime.hex()) +@@ -661,7 +668,7 @@ class MiscReadTestBase(CommonReadTest): + try: + with tarfile.open(tarname, encoding="iso8859-1") as tar: + tarinfo = tar.getmember(dirtype) +- tar.extract(tarinfo, path=DIR) ++ tar.extract(tarinfo, path=DIR, filter='fully_trusted') + extracted = os.path.join(DIR, dirtype) + self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime) + if sys.platform != "win32": +@@ -674,7 +681,7 @@ class MiscReadTestBase(CommonReadTest): + with os_helper.temp_dir(DIR), \ + tarfile.open(tarname, encoding="iso8859-1") as tar: + directories = [t for t in tar if t.isdir()] +- tar.extractall(DIR, directories) ++ tar.extractall(DIR, directories, filter='fully_trusted') + for tarinfo in directories: + path = DIR / tarinfo.name + self.assertEqual(os.path.getmtime(path), tarinfo.mtime) +@@ -685,7 +692,7 @@ class MiscReadTestBase(CommonReadTest): + with os_helper.temp_dir(DIR), \ + tarfile.open(tarname, encoding="iso8859-1") as tar: + tarinfo = tar.getmember(dirtype) +- tar.extract(tarinfo, path=DIR) ++ tar.extract(tarinfo, path=DIR, filter='fully_trusted') + extracted = DIR / dirtype + self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime) + +@@ -1041,7 +1048,7 @@ class GNUReadTest(LongnameTest, ReadTest + # an all platforms, and after that a test that will work only on + # platforms/filesystems that prove to support sparse files. + def _test_sparse_file(self, name): +- self.tar.extract(name, TEMPDIR) ++ self.tar.extract(name, TEMPDIR, filter='data') + filename = os.path.join(TEMPDIR, name) + with open(filename, "rb") as fobj: + data = fobj.read() +@@ -1408,7 +1415,8 @@ class WriteTest(WriteTestBase, unittest. + with tarfile.open(temparchive, errorlevel=2) as tar: + # this should not raise OSError: [Errno 17] File exists + try: +- tar.extractall(path=tempdir) ++ tar.extractall(path=tempdir, ++ filter='fully_trusted') + except OSError: + self.fail("extractall failed with symlinked files") + finally: +@@ -2440,6 +2448,15 @@ class CommandLineTest(unittest.TestCase) + for tardata in files: + tf.add(tardata, arcname=os.path.basename(tardata)) + ++ def make_evil_tarfile(self, tar_name): ++ files = [support.findfile('tokenize_tests.txt')] ++ self.addCleanup(os_helper.unlink, tar_name) ++ with tarfile.open(tar_name, 'w') as tf: ++ benign = tarfile.TarInfo('benign') ++ tf.addfile(benign, fileobj=io.BytesIO(b'')) ++ evil = tarfile.TarInfo('../evil') ++ tf.addfile(evil, fileobj=io.BytesIO(b'')) ++ + def test_bad_use(self): + rc, out, err = self.tarfilecmd_failure() + self.assertEqual(out, b'') +@@ -2596,6 +2613,25 @@ class CommandLineTest(unittest.TestCase) + finally: + os_helper.rmtree(tarextdir) + ++ def test_extract_command_filter(self): ++ self.make_evil_tarfile(tmpname) ++ # Make an inner directory, so the member named '../evil' ++ # is still extracted into `tarextdir` ++ destdir = os.path.join(tarextdir, 'dest') ++ os.mkdir(tarextdir) ++ try: ++ with os_helper.temp_cwd(destdir): ++ self.tarfilecmd_failure('-e', tmpname, ++ '-v', ++ '--filter', 'data') ++ out = self.tarfilecmd('-e', tmpname, ++ '-v', ++ '--filter', 'fully_trusted', ++ PYTHONIOENCODING='utf-8') ++ self.assertIn(b' file is extracted.', out) ++ finally: ++ os_helper.rmtree(tarextdir) ++ + def test_extract_command_different_directory(self): + self.make_simple_tarfile(tmpname) + try: +@@ -2679,7 +2715,7 @@ class LinkEmulationTest(ReadTest, unitte + # symbolic or hard links tarfile tries to extract these types of members + # as the regular files they point to. + def _test_link_extraction(self, name): +- self.tar.extract(name, TEMPDIR) ++ self.tar.extract(name, TEMPDIR, filter='fully_trusted') + with open(os.path.join(TEMPDIR, name), "rb") as f: + data = f.read() + self.assertEqual(sha256sum(data), sha256_regtype) +@@ -2811,8 +2847,10 @@ class NumericOwnerTest(unittest.TestCase + mock_chown): + with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, + filename_2): +- tarfl.extract(filename_1, TEMPDIR, numeric_owner=True) +- tarfl.extract(filename_2 , TEMPDIR, numeric_owner=True) ++ tarfl.extract(filename_1, TEMPDIR, numeric_owner=True, ++ filter='fully_trusted') ++ tarfl.extract(filename_2 , TEMPDIR, numeric_owner=True, ++ filter='fully_trusted') + + # convert to filesystem paths + f_filename_1 = os.path.join(TEMPDIR, filename_1) +@@ -2830,7 +2868,8 @@ class NumericOwnerTest(unittest.TestCase + mock_chown): + with self._setup_test(mock_geteuid) as (tarfl, filename_1, dirname_1, + filename_2): +- tarfl.extractall(TEMPDIR, numeric_owner=True) ++ tarfl.extractall(TEMPDIR, numeric_owner=True, ++ filter='fully_trusted') + + # convert to filesystem paths + f_filename_1 = os.path.join(TEMPDIR, filename_1) +@@ -2855,7 +2894,8 @@ class NumericOwnerTest(unittest.TestCase + def test_extract_without_numeric_owner(self, mock_geteuid, mock_chmod, + mock_chown): + with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _): +- tarfl.extract(filename_1, TEMPDIR, numeric_owner=False) ++ tarfl.extract(filename_1, TEMPDIR, numeric_owner=False, ++ filter='fully_trusted') + + # convert to filesystem paths + f_filename_1 = os.path.join(TEMPDIR, filename_1) +@@ -2869,6 +2909,890 @@ class NumericOwnerTest(unittest.TestCase + tarfl.extract, filename_1, TEMPDIR, False, True) + + ++class ReplaceTests(ReadTest, unittest.TestCase): ++ def test_replace_name(self): ++ member = self.tar.getmember('ustar/regtype') ++ replaced = member.replace(name='misc/other') ++ self.assertEqual(replaced.name, 'misc/other') ++ self.assertEqual(member.name, 'ustar/regtype') ++ self.assertEqual(self.tar.getmember('ustar/regtype').name, ++ 'ustar/regtype') ++ ++ def test_replace_deep(self): ++ member = self.tar.getmember('pax/regtype1') ++ replaced = member.replace() ++ replaced.pax_headers['gname'] = 'not-bar' ++ self.assertEqual(member.pax_headers['gname'], 'bar') ++ self.assertEqual( ++ self.tar.getmember('pax/regtype1').pax_headers['gname'], 'bar') ++ ++ def test_replace_shallow(self): ++ member = self.tar.getmember('pax/regtype1') ++ replaced = member.replace(deep=False) ++ replaced.pax_headers['gname'] = 'not-bar' ++ self.assertEqual(member.pax_headers['gname'], 'not-bar') ++ self.assertEqual( ++ self.tar.getmember('pax/regtype1').pax_headers['gname'], 'not-bar') ++ ++ def test_replace_all(self): ++ member = self.tar.getmember('ustar/regtype') ++ for attr_name in ('name', 'mtime', 'mode', 'linkname', ++ 'uid', 'gid', 'uname', 'gname'): ++ with self.subTest(attr_name=attr_name): ++ replaced = member.replace(**{attr_name: None}) ++ self.assertEqual(getattr(replaced, attr_name), None) ++ self.assertNotEqual(getattr(member, attr_name), None) ++ ++ def test_replace_internal(self): ++ member = self.tar.getmember('ustar/regtype') ++ with self.assertRaises(TypeError): ++ member.replace(offset=123456789) ++ ++ ++class NoneInfoExtractTests(ReadTest): ++ # These mainly check that all kinds of members are extracted successfully ++ # if some metadata is None. ++ # Some of the methods do additional spot checks. ++ ++ # We also test that the default filters can deal with None. ++ ++ extraction_filter = None ++ ++ @classmethod ++ def setUpClass(cls): ++ tar = tarfile.open(tarname, mode='r', encoding="iso8859-1") ++ cls.control_dir = pathlib.Path(TEMPDIR) / "extractall_ctrl" ++ tar.errorlevel = 0 ++ with ExitStack() as cm: ++ if cls.extraction_filter is None: ++ cm.enter_context(warnings.catch_warnings( ++ action="ignore", category=DeprecationWarning)) ++ tar.extractall(cls.control_dir, filter=cls.extraction_filter) ++ tar.close() ++ cls.control_paths = set( ++ p.relative_to(cls.control_dir) ++ for p in pathlib.Path(cls.control_dir).glob('**/*')) ++ ++ @classmethod ++ def tearDownClass(cls): ++ shutil.rmtree(cls.control_dir) ++ ++ def check_files_present(self, directory): ++ got_paths = set( ++ p.relative_to(directory) ++ for p in pathlib.Path(directory).glob('**/*')) ++ self.assertEqual(self.control_paths, got_paths) ++ ++ @contextmanager ++ def extract_with_none(self, *attr_names): ++ DIR = pathlib.Path(TEMPDIR) / "extractall_none" ++ self.tar.errorlevel = 0 ++ for member in self.tar.getmembers(): ++ for attr_name in attr_names: ++ setattr(member, attr_name, None) ++ with os_helper.temp_dir(DIR): ++ self.tar.extractall(DIR, filter='fully_trusted') ++ self.check_files_present(DIR) ++ yield DIR ++ ++ def test_extractall_none_mtime(self): ++ # mtimes of extracted files should be later than 'now' -- the mtime ++ # of a previously created directory. ++ now = pathlib.Path(TEMPDIR).stat().st_mtime ++ with self.extract_with_none('mtime') as DIR: ++ for path in pathlib.Path(DIR).glob('**/*'): ++ with self.subTest(path=path): ++ try: ++ mtime = path.stat().st_mtime ++ except OSError: ++ # Some systems can't stat symlinks, ignore those ++ if not path.is_symlink(): ++ raise ++ else: ++ self.assertGreaterEqual(path.stat().st_mtime, now) ++ ++ def test_extractall_none_mode(self): ++ # modes of directories and regular files should match the mode ++ # of a "normally" created directory or regular file ++ dir_mode = pathlib.Path(TEMPDIR).stat().st_mode ++ regular_file = pathlib.Path(TEMPDIR) / 'regular_file' ++ regular_file.write_text('') ++ regular_file_mode = regular_file.stat().st_mode ++ with self.extract_with_none('mode') as DIR: ++ for path in pathlib.Path(DIR).glob('**/*'): ++ with self.subTest(path=path): ++ if path.is_dir(): ++ self.assertEqual(path.stat().st_mode, dir_mode) ++ elif path.is_file(): ++ self.assertEqual(path.stat().st_mode, ++ regular_file_mode) ++ ++ def test_extractall_none_uid(self): ++ with self.extract_with_none('uid'): ++ pass ++ ++ def test_extractall_none_gid(self): ++ with self.extract_with_none('gid'): ++ pass ++ ++ def test_extractall_none_uname(self): ++ with self.extract_with_none('uname'): ++ pass ++ ++ def test_extractall_none_gname(self): ++ with self.extract_with_none('gname'): ++ pass ++ ++ def test_extractall_none_ownership(self): ++ with self.extract_with_none('uid', 'gid', 'uname', 'gname'): ++ pass ++ ++class NoneInfoExtractTests_Data(NoneInfoExtractTests, unittest.TestCase): ++ extraction_filter = 'data' ++ ++class NoneInfoExtractTests_FullyTrusted(NoneInfoExtractTests, ++ unittest.TestCase): ++ extraction_filter = 'fully_trusted' ++ ++class NoneInfoExtractTests_Tar(NoneInfoExtractTests, unittest.TestCase): ++ extraction_filter = 'tar' ++ ++class NoneInfoExtractTests_Default(NoneInfoExtractTests, ++ unittest.TestCase): ++ extraction_filter = None ++ ++class NoneInfoTests_Misc(unittest.TestCase): ++ def test_add(self): ++ # When addfile() encounters None metadata, it raises a ValueError ++ bio = io.BytesIO() ++ for tarformat in (tarfile.USTAR_FORMAT, tarfile.GNU_FORMAT, ++ tarfile.PAX_FORMAT): ++ with self.subTest(tarformat=tarformat): ++ tar = tarfile.open(fileobj=bio, mode='w', format=tarformat) ++ tarinfo = tar.gettarinfo(tarname) ++ try: ++ tar.addfile(tarinfo) ++ except Exception: ++ if tarformat == tarfile.USTAR_FORMAT: ++ # In the old, limited format, adding might fail for ++ # reasons like the UID being too large ++ pass ++ else: ++ raise ++ else: ++ for attr_name in ('mtime', 'mode', 'uid', 'gid', ++ 'uname', 'gname'): ++ with self.subTest(attr_name=attr_name): ++ replaced = tarinfo.replace(**{attr_name: None}) ++ with self.assertRaisesRegex(ValueError, ++ f"{attr_name}"): ++ tar.addfile(replaced) ++ ++ def test_list(self): ++ # Change some metadata to None, then compare list() output ++ # word-for-word. We want list() to not raise, and to only change ++ # printout for the affected piece of metadata. ++ # (n.b.: some contents of the test archive are hardcoded.) ++ for attr_names in ({'mtime'}, {'mode'}, {'uid'}, {'gid'}, ++ {'uname'}, {'gname'}, ++ {'uid', 'uname'}, {'gid', 'gname'}): ++ with (self.subTest(attr_names=attr_names), ++ tarfile.open(tarname, encoding="iso8859-1") as tar): ++ tio_prev = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n') ++ with support.swap_attr(sys, 'stdout', tio_prev): ++ tar.list() ++ for member in tar.getmembers(): ++ for attr_name in attr_names: ++ setattr(member, attr_name, None) ++ tio_new = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n') ++ with support.swap_attr(sys, 'stdout', tio_new): ++ tar.list() ++ for expected, got in zip(tio_prev.detach().getvalue().split(), ++ tio_new.detach().getvalue().split()): ++ if attr_names == {'mtime'} and re.match(rb'2003-01-\d\d', expected): ++ self.assertEqual(got, b'????-??-??') ++ elif attr_names == {'mtime'} and re.match(rb'\d\d:\d\d:\d\d', expected): ++ self.assertEqual(got, b'??:??:??') ++ elif attr_names == {'mode'} and re.match( ++ rb'.([r-][w-][x-]){3}', expected): ++ self.assertEqual(got, b'??????????') ++ elif attr_names == {'uname'} and expected.startswith( ++ (b'tarfile/', b'lars/', b'foo/')): ++ exp_user, exp_group = expected.split(b'/') ++ got_user, got_group = got.split(b'/') ++ self.assertEqual(got_group, exp_group) ++ self.assertRegex(got_user, b'[0-9]+') ++ elif attr_names == {'gname'} and expected.endswith( ++ (b'/tarfile', b'/users', b'/bar')): ++ exp_user, exp_group = expected.split(b'/') ++ got_user, got_group = got.split(b'/') ++ self.assertEqual(got_user, exp_user) ++ self.assertRegex(got_group, b'[0-9]+') ++ elif attr_names == {'uid'} and expected.startswith( ++ (b'1000/')): ++ exp_user, exp_group = expected.split(b'/') ++ got_user, got_group = got.split(b'/') ++ self.assertEqual(got_group, exp_group) ++ self.assertEqual(got_user, b'None') ++ elif attr_names == {'gid'} and expected.endswith((b'/100')): ++ exp_user, exp_group = expected.split(b'/') ++ got_user, got_group = got.split(b'/') ++ self.assertEqual(got_user, exp_user) ++ self.assertEqual(got_group, b'None') ++ elif attr_names == {'uid', 'uname'} and expected.startswith( ++ (b'tarfile/', b'lars/', b'foo/', b'1000/')): ++ exp_user, exp_group = expected.split(b'/') ++ got_user, got_group = got.split(b'/') ++ self.assertEqual(got_group, exp_group) ++ self.assertEqual(got_user, b'None') ++ elif attr_names == {'gname', 'gid'} and expected.endswith( ++ (b'/tarfile', b'/users', b'/bar', b'/100')): ++ exp_user, exp_group = expected.split(b'/') ++ got_user, got_group = got.split(b'/') ++ self.assertEqual(got_user, exp_user) ++ self.assertEqual(got_group, b'None') ++ else: ++ # In other cases the output should be the same ++ self.assertEqual(expected, got) ++ ++def _filemode_to_int(mode): ++ """Inverse of `stat.filemode` (for permission bits) ++ ++ Using mode strings rather than numbers makes the later tests more readable. ++ """ ++ str_mode = mode[1:] ++ result = ( ++ {'r': stat.S_IRUSR, '-': 0}[str_mode[0]] ++ | {'w': stat.S_IWUSR, '-': 0}[str_mode[1]] ++ | {'x': stat.S_IXUSR, '-': 0, ++ 's': stat.S_IXUSR | stat.S_ISUID, ++ 'S': stat.S_ISUID}[str_mode[2]] ++ | {'r': stat.S_IRGRP, '-': 0}[str_mode[3]] ++ | {'w': stat.S_IWGRP, '-': 0}[str_mode[4]] ++ | {'x': stat.S_IXGRP, '-': 0, ++ 's': stat.S_IXGRP | stat.S_ISGID, ++ 'S': stat.S_ISGID}[str_mode[5]] ++ | {'r': stat.S_IROTH, '-': 0}[str_mode[6]] ++ | {'w': stat.S_IWOTH, '-': 0}[str_mode[7]] ++ | {'x': stat.S_IXOTH, '-': 0, ++ 't': stat.S_IXOTH | stat.S_ISVTX, ++ 'T': stat.S_ISVTX}[str_mode[8]] ++ ) ++ # check we did this right ++ assert stat.filemode(result)[1:] == mode[1:] ++ ++ return result ++ ++class ArchiveMaker: ++ """Helper to create a tar file with specific contents ++ ++ Usage: ++ ++ with ArchiveMaker() as t: ++ t.add('filename', ...) ++ ++ with t.open() as tar: ++ ... # `tar` is now a TarFile with 'filename' in it! ++ """ ++ def __init__(self): ++ self.bio = io.BytesIO() ++ ++ def __enter__(self): ++ self.tar_w = tarfile.TarFile(mode='w', fileobj=self.bio) ++ return self ++ ++ def __exit__(self, *exc): ++ self.tar_w.close() ++ self.contents = self.bio.getvalue() ++ self.bio = None ++ ++ def add(self, name, *, type=None, symlink_to=None, hardlink_to=None, ++ mode=None, **kwargs): ++ """Add a member to the test archive. Call within `with`.""" ++ name = str(name) ++ tarinfo = tarfile.TarInfo(name).replace(**kwargs) ++ if mode: ++ tarinfo.mode = _filemode_to_int(mode) ++ if symlink_to is not None: ++ type = tarfile.SYMTYPE ++ tarinfo.linkname = str(symlink_to) ++ if hardlink_to is not None: ++ type = tarfile.LNKTYPE ++ tarinfo.linkname = str(hardlink_to) ++ if name.endswith('/') and type is None: ++ type = tarfile.DIRTYPE ++ if type is not None: ++ tarinfo.type = type ++ if tarinfo.isreg(): ++ fileobj = io.BytesIO(bytes(tarinfo.size)) ++ else: ++ fileobj = None ++ self.tar_w.addfile(tarinfo, fileobj) ++ ++ def open(self, **kwargs): ++ """Open the resulting archive as TarFile. Call after `with`.""" ++ bio = io.BytesIO(self.contents) ++ return tarfile.open(fileobj=bio, **kwargs) ++ ++# Under WASI, `os_helper.can_symlink` is False to make ++# `skip_unless_symlink` skip symlink tests. " ++# But in the following tests we use can_symlink to *determine* which ++# behavior is expected. ++# Like other symlink tests, skip these on WASI for now. ++if support.is_wasi: ++ def symlink_test(f): ++ return unittest.skip("WASI: Skip symlink test for now")(f) ++else: ++ def symlink_test(f): ++ return f ++ ++ ++class TestExtractionFilters(unittest.TestCase): ++ ++ # A temporary directory for the extraction results. ++ # All files that "escape" the destination path should still end ++ # up in this directory. ++ outerdir = pathlib.Path(TEMPDIR) / 'outerdir' ++ ++ # The destination for the extraction, within `outerdir` ++ destdir = outerdir / 'dest' ++ ++ @contextmanager ++ def check_context(self, tar, filter): ++ """Extracts `tar` to `self.destdir` and allows checking the result ++ ++ If an error occurs, it must be checked using `expect_exception` ++ ++ Otherwise, all resulting files must be checked using `expect_file`, ++ except the destination directory itself and parent directories of ++ other files. ++ When checking directories, do so before their contents. ++ """ ++ with os_helper.temp_dir(self.outerdir): ++ try: ++ tar.extractall(self.destdir, filter=filter) ++ except Exception as exc: ++ self.raised_exception = exc ++ self.expected_paths = set() ++ else: ++ self.raised_exception = None ++ self.expected_paths = set(self.outerdir.glob('**/*')) ++ self.expected_paths.discard(self.destdir) ++ try: ++ yield ++ finally: ++ tar.close() ++ if self.raised_exception: ++ raise self.raised_exception ++ self.assertEqual(self.expected_paths, set()) ++ ++ def expect_file(self, name, type=None, symlink_to=None, mode=None): ++ """Check a single file. See check_context.""" ++ if self.raised_exception: ++ raise self.raised_exception ++ # use normpath() rather than resolve() so we don't follow symlinks ++ path = pathlib.Path(os.path.normpath(self.destdir / name)) ++ self.assertIn(path, self.expected_paths) ++ self.expected_paths.remove(path) ++ if mode is not None and os_helper.can_chmod(): ++ got = stat.filemode(stat.S_IMODE(path.stat().st_mode)) ++ self.assertEqual(got, mode) ++ if type is None and isinstance(name, str) and name.endswith('/'): ++ type = tarfile.DIRTYPE ++ if symlink_to is not None: ++ got = (self.destdir / name).readlink() ++ expected = pathlib.Path(symlink_to) ++ # The symlink might be the same (textually) as what we expect, ++ # but some systems change the link to an equivalent path, so ++ # we fall back to samefile(). ++ if expected != got: ++ self.assertTrue(got.samefile(expected)) ++ elif type == tarfile.REGTYPE or type is None: ++ self.assertTrue(path.is_file()) ++ elif type == tarfile.DIRTYPE: ++ self.assertTrue(path.is_dir()) ++ elif type == tarfile.FIFOTYPE: ++ self.assertTrue(path.is_fifo()) ++ else: ++ raise NotImplementedError(type) ++ for parent in path.parents: ++ self.expected_paths.discard(parent) ++ ++ def expect_exception(self, exc_type, message_re='.'): ++ with self.assertRaisesRegex(exc_type, message_re): ++ if self.raised_exception is not None: ++ raise self.raised_exception ++ self.raised_exception = None ++ ++ def test_benign_file(self): ++ with ArchiveMaker() as arc: ++ arc.add('benign.txt') ++ for filter in 'fully_trusted', 'tar', 'data': ++ with self.check_context(arc.open(), filter): ++ self.expect_file('benign.txt') ++ ++ def test_absolute(self): ++ # Test handling a member with an absolute path ++ # Inspired by 'absolute1' in https://github.com/jwilk/traversal-archives ++ with ArchiveMaker() as arc: ++ arc.add(self.outerdir / 'escaped.evil') ++ ++ with self.check_context(arc.open(), 'fully_trusted'): ++ self.expect_file('../escaped.evil') ++ ++ for filter in 'tar', 'data': ++ with self.check_context(arc.open(), filter): ++ if str(self.outerdir).startswith('/'): ++ # We strip leading slashes, as e.g. GNU tar does ++ # (without --absolute-filenames). ++ outerdir_stripped = str(self.outerdir).lstrip('/') ++ self.expect_file(f'{outerdir_stripped}/escaped.evil') ++ else: ++ # On this system, absolute paths don't have leading ++ # slashes. ++ # So, there's nothing to strip. We refuse to unpack ++ # to an absolute path, nonetheless. ++ self.expect_exception( ++ tarfile.AbsolutePathError, ++ """['"].*escaped.evil['"] has an absolute path""") ++ ++ @symlink_test ++ def test_parent_symlink(self): ++ # Test interplaying symlinks ++ # Inspired by 'dirsymlink2a' in jwilk/traversal-archives ++ with ArchiveMaker() as arc: ++ arc.add('current', symlink_to='.') ++ arc.add('parent', symlink_to='current/..') ++ arc.add('parent/evil') ++ ++ if os_helper.can_symlink(): ++ with self.check_context(arc.open(), 'fully_trusted'): ++ if self.raised_exception is not None: ++ # Windows will refuse to create a file that's a symlink to itself ++ # (and tarfile doesn't swallow that exception) ++ self.expect_exception(FileExistsError) ++ # The other cases will fail with this error too. ++ # Skip the rest of this test. ++ return ++ else: ++ self.expect_file('current', symlink_to='.') ++ self.expect_file('parent', symlink_to='current/..') ++ self.expect_file('../evil') ++ ++ with self.check_context(arc.open(), 'tar'): ++ self.expect_exception( ++ tarfile.OutsideDestinationError, ++ """'parent/evil' would be extracted to ['"].*evil['"], """ ++ + "which is outside the destination") ++ ++ with self.check_context(arc.open(), 'data'): ++ self.expect_exception( ++ tarfile.LinkOutsideDestinationError, ++ """'parent' would link to ['"].*outerdir['"], """ ++ + "which is outside the destination") ++ ++ else: ++ # No symlink support. The symlinks are ignored. ++ with self.check_context(arc.open(), 'fully_trusted'): ++ self.expect_file('parent/evil') ++ with self.check_context(arc.open(), 'tar'): ++ self.expect_file('parent/evil') ++ with self.check_context(arc.open(), 'data'): ++ self.expect_file('parent/evil') ++ ++ @symlink_test ++ def test_parent_symlink2(self): ++ # Test interplaying symlinks ++ # Inspired by 'dirsymlink2b' in jwilk/traversal-archives ++ with ArchiveMaker() as arc: ++ arc.add('current', symlink_to='.') ++ arc.add('current/parent', symlink_to='..') ++ arc.add('parent/evil') ++ ++ with self.check_context(arc.open(), 'fully_trusted'): ++ if os_helper.can_symlink(): ++ self.expect_file('current', symlink_to='.') ++ self.expect_file('parent', symlink_to='..') ++ self.expect_file('../evil') ++ else: ++ self.expect_file('current/') ++ self.expect_file('parent/evil') ++ ++ with self.check_context(arc.open(), 'tar'): ++ if os_helper.can_symlink(): ++ self.expect_exception( ++ tarfile.OutsideDestinationError, ++ "'parent/evil' would be extracted to " ++ + """['"].*evil['"], which is outside """ ++ + "the destination") ++ else: ++ self.expect_file('current/') ++ self.expect_file('parent/evil') ++ ++ with self.check_context(arc.open(), 'data'): ++ self.expect_exception( ++ tarfile.LinkOutsideDestinationError, ++ """'current/parent' would link to ['"].*['"], """ ++ + "which is outside the destination") ++ ++ @symlink_test ++ def test_absolute_symlink(self): ++ # Test symlink to an absolute path ++ # Inspired by 'dirsymlink' in jwilk/traversal-archives ++ with ArchiveMaker() as arc: ++ arc.add('parent', symlink_to=self.outerdir) ++ arc.add('parent/evil') ++ ++ with self.check_context(arc.open(), 'fully_trusted'): ++ if os_helper.can_symlink(): ++ self.expect_file('parent', symlink_to=self.outerdir) ++ self.expect_file('../evil') ++ else: ++ self.expect_file('parent/evil') ++ ++ with self.check_context(arc.open(), 'tar'): ++ if os_helper.can_symlink(): ++ self.expect_exception( ++ tarfile.OutsideDestinationError, ++ "'parent/evil' would be extracted to " ++ + """['"].*evil['"], which is outside """ ++ + "the destination") ++ else: ++ self.expect_file('parent/evil') ++ ++ with self.check_context(arc.open(), 'data'): ++ self.expect_exception( ++ tarfile.AbsoluteLinkError, ++ "'parent' is a symlink to an absolute path") ++ ++ @symlink_test ++ def test_sly_relative0(self): ++ # Inspired by 'relative0' in jwilk/traversal-archives ++ with ArchiveMaker() as arc: ++ arc.add('../moo', symlink_to='..//tmp/moo') ++ ++ try: ++ with self.check_context(arc.open(), filter='fully_trusted'): ++ if os_helper.can_symlink(): ++ if isinstance(self.raised_exception, FileExistsError): ++ # XXX TarFile happens to fail creating a parent ++ # directory. ++ # This might be a bug, but fixing it would hurt ++ # security. ++ # Note that e.g. GNU `tar` rejects '..' components, ++ # so you could argue this is an invalid archive and we ++ # just raise an bad type of exception. ++ self.expect_exception(FileExistsError) ++ else: ++ self.expect_file('../moo', symlink_to='..//tmp/moo') ++ else: ++ # The symlink can't be extracted and is ignored ++ pass ++ except FileExistsError: ++ pass ++ ++ for filter in 'tar', 'data': ++ with self.check_context(arc.open(), filter): ++ self.expect_exception( ++ tarfile.OutsideDestinationError, ++ "'../moo' would be extracted to " ++ + "'.*moo', which is outside " ++ + "the destination") ++ ++ @symlink_test ++ def test_sly_relative2(self): ++ # Inspired by 'relative2' in jwilk/traversal-archives ++ with ArchiveMaker() as arc: ++ arc.add('tmp/') ++ arc.add('tmp/../../moo', symlink_to='tmp/../..//tmp/moo') ++ ++ with self.check_context(arc.open(), 'fully_trusted'): ++ self.expect_file('tmp', type=tarfile.DIRTYPE) ++ if os_helper.can_symlink(): ++ self.expect_file('../moo', symlink_to='tmp/../../tmp/moo') ++ ++ for filter in 'tar', 'data': ++ with self.check_context(arc.open(), filter): ++ self.expect_exception( ++ tarfile.OutsideDestinationError, ++ "'tmp/../../moo' would be extracted to " ++ + """['"].*moo['"], which is outside the """ ++ + "destination") ++ ++ def test_modes(self): ++ # Test how file modes are extracted ++ # (Note that the modes are ignored on platforms without working chmod) ++ with ArchiveMaker() as arc: ++ arc.add('all_bits', mode='?rwsrwsrwt') ++ arc.add('perm_bits', mode='?rwxrwxrwx') ++ arc.add('exec_group_other', mode='?rw-rwxrwx') ++ arc.add('read_group_only', mode='?---r-----') ++ arc.add('no_bits', mode='?---------') ++ arc.add('dir/', mode='?---rwsrwt', type=tarfile.DIRTYPE) ++ ++ with self.check_context(arc.open(), 'fully_trusted'): ++ self.expect_file('all_bits', mode='?rwsrwsrwt') ++ self.expect_file('perm_bits', mode='?rwxrwxrwx') ++ self.expect_file('exec_group_other', mode='?rw-rwxrwx') ++ self.expect_file('read_group_only', mode='?---r-----') ++ self.expect_file('no_bits', mode='?---------') ++ self.expect_file('dir', type=tarfile.DIRTYPE, mode='?---rwsrwt') ++ ++ with self.check_context(arc.open(), 'tar'): ++ self.expect_file('all_bits', mode='?rwxr-xr-x') ++ self.expect_file('perm_bits', mode='?rwxr-xr-x') ++ self.expect_file('exec_group_other', mode='?rw-r-xr-x') ++ self.expect_file('read_group_only', mode='?---r-----') ++ self.expect_file('no_bits', mode='?---------') ++ self.expect_file('dir/', type=tarfile.DIRTYPE, mode='?---r-xr-x') ++ ++ with self.check_context(arc.open(), 'data'): ++ normal_dir_mode = stat.filemode(stat.S_IMODE( ++ self.outerdir.stat().st_mode)) ++ self.expect_file('all_bits', mode='?rwxr-xr-x') ++ self.expect_file('perm_bits', mode='?rwxr-xr-x') ++ self.expect_file('exec_group_other', mode='?rw-r--r--') ++ self.expect_file('read_group_only', mode='?rw-r-----') ++ self.expect_file('no_bits', mode='?rw-------') ++ self.expect_file('dir/', type=tarfile.DIRTYPE, mode=normal_dir_mode) ++ ++ def test_pipe(self): ++ # Test handling of a special file ++ with ArchiveMaker() as arc: ++ arc.add('foo', type=tarfile.FIFOTYPE) ++ ++ for filter in 'fully_trusted', 'tar': ++ with self.check_context(arc.open(), filter): ++ if hasattr(os, 'mkfifo'): ++ self.expect_file('foo', type=tarfile.FIFOTYPE) ++ else: ++ # The pipe can't be extracted and is skipped. ++ pass ++ ++ with self.check_context(arc.open(), 'data'): ++ self.expect_exception( ++ tarfile.SpecialFileError, ++ "'foo' is a special file") ++ ++ def test_special_files(self): ++ # Creating device files is tricky. Instead of attempting that let's ++ # only check the filter result. ++ for special_type in tarfile.FIFOTYPE, tarfile.CHRTYPE, tarfile.BLKTYPE: ++ tarinfo = tarfile.TarInfo('foo') ++ tarinfo.type = special_type ++ trusted = tarfile.fully_trusted_filter(tarinfo, '') ++ self.assertIs(trusted, tarinfo) ++ tar = tarfile.tar_filter(tarinfo, '') ++ self.assertEqual(tar.type, special_type) ++ with self.assertRaises(tarfile.SpecialFileError) as cm: ++ tarfile.data_filter(tarinfo, '') ++ self.assertIsInstance(cm.exception.tarinfo, tarfile.TarInfo) ++ self.assertEqual(cm.exception.tarinfo.name, 'foo') ++ ++ def test_fully_trusted_filter(self): ++ # The 'fully_trusted' filter returns the original TarInfo objects. ++ with tarfile.TarFile.open(tarname) as tar: ++ for tarinfo in tar.getmembers(): ++ filtered = tarfile.fully_trusted_filter(tarinfo, '') ++ self.assertIs(filtered, tarinfo) ++ ++ def test_tar_filter(self): ++ # The 'tar' filter returns TarInfo objects with the same name/type. ++ # (It can also fail for particularly "evil" input, but we don't have ++ # that in the test archive.) ++ with tarfile.TarFile.open(tarname) as tar: ++ for tarinfo in tar.getmembers(): ++ filtered = tarfile.tar_filter(tarinfo, '') ++ self.assertIs(filtered.name, tarinfo.name) ++ self.assertIs(filtered.type, tarinfo.type) ++ ++ def test_data_filter(self): ++ # The 'data' filter either raises, or returns TarInfo with the same ++ # name/type. ++ with tarfile.TarFile.open(tarname) as tar: ++ for tarinfo in tar.getmembers(): ++ try: ++ filtered = tarfile.data_filter(tarinfo, '') ++ except tarfile.FilterError: ++ continue ++ self.assertIs(filtered.name, tarinfo.name) ++ self.assertIs(filtered.type, tarinfo.type) ++ ++ def test_default_filter_warns(self): ++ """Ensure the default filter warns""" ++ with ArchiveMaker() as arc: ++ arc.add('foo') ++ with warnings_helper.check_warnings( ++ ('Python 3.14', DeprecationWarning)): ++ with self.check_context(arc.open(), None): ++ self.expect_file('foo') ++ ++ def test_change_default_filter_on_instance(self): ++ tar = tarfile.TarFile(tarname, 'r') ++ def strict_filter(tarinfo, path): ++ if tarinfo.name == 'ustar/regtype': ++ return tarinfo ++ else: ++ return None ++ tar.extraction_filter = strict_filter ++ with self.check_context(tar, None): ++ self.expect_file('ustar/regtype') ++ ++ def test_change_default_filter_on_class(self): ++ def strict_filter(tarinfo, path): ++ if tarinfo.name == 'ustar/regtype': ++ return tarinfo ++ else: ++ return None ++ tar = tarfile.TarFile(tarname, 'r') ++ with support.swap_attr(tarfile.TarFile, 'extraction_filter', ++ staticmethod(strict_filter)): ++ with self.check_context(tar, None): ++ self.expect_file('ustar/regtype') ++ ++ def test_change_default_filter_on_subclass(self): ++ class TarSubclass(tarfile.TarFile): ++ def extraction_filter(self, tarinfo, path): ++ if tarinfo.name == 'ustar/regtype': ++ return tarinfo ++ else: ++ return None ++ ++ tar = TarSubclass(tarname, 'r') ++ with self.check_context(tar, None): ++ self.expect_file('ustar/regtype') ++ ++ def test_change_default_filter_to_string(self): ++ tar = tarfile.TarFile(tarname, 'r') ++ tar.extraction_filter = 'data' ++ with self.check_context(tar, None): ++ self.expect_exception(TypeError) ++ ++ def test_custom_filter(self): ++ def custom_filter(tarinfo, path): ++ self.assertIs(path, self.destdir) ++ if tarinfo.name == 'move_this': ++ return tarinfo.replace(name='moved') ++ if tarinfo.name == 'ignore_this': ++ return None ++ return tarinfo ++ ++ with ArchiveMaker() as arc: ++ arc.add('move_this') ++ arc.add('ignore_this') ++ arc.add('keep') ++ with self.check_context(arc.open(), custom_filter): ++ self.expect_file('moved') ++ self.expect_file('keep') ++ ++ def test_bad_filter_name(self): ++ with ArchiveMaker() as arc: ++ arc.add('foo') ++ with self.check_context(arc.open(), 'bad filter name'): ++ self.expect_exception(ValueError) ++ ++ def test_stateful_filter(self): ++ # Stateful filters should be possible. ++ # (This doesn't really test tarfile. Rather, it demonstrates ++ # that third parties can implement a stateful filter.) ++ class StatefulFilter: ++ def __enter__(self): ++ self.num_files_processed = 0 ++ return self ++ ++ def __call__(self, tarinfo, path): ++ try: ++ tarinfo = tarfile.data_filter(tarinfo, path) ++ except tarfile.FilterError: ++ return None ++ self.num_files_processed += 1 ++ return tarinfo ++ ++ def __exit__(self, *exc_info): ++ self.done = True ++ ++ with ArchiveMaker() as arc: ++ arc.add('good') ++ arc.add('bad', symlink_to='/') ++ arc.add('good') ++ with StatefulFilter() as custom_filter: ++ with self.check_context(arc.open(), custom_filter): ++ self.expect_file('good') ++ self.assertEqual(custom_filter.num_files_processed, 2) ++ self.assertEqual(custom_filter.done, True) ++ ++ def test_errorlevel(self): ++ def extracterror_filter(tarinfo, path): ++ raise tarfile.ExtractError('failed with ExtractError') ++ def filtererror_filter(tarinfo, path): ++ raise tarfile.FilterError('failed with FilterError') ++ def oserror_filter(tarinfo, path): ++ raise OSError('failed with OSError') ++ def tarerror_filter(tarinfo, path): ++ raise tarfile.TarError('failed with base TarError') ++ def valueerror_filter(tarinfo, path): ++ raise ValueError('failed with ValueError') ++ ++ with ArchiveMaker() as arc: ++ arc.add('file') ++ ++ # If errorlevel is 0, errors affected by errorlevel are ignored ++ ++ with self.check_context(arc.open(errorlevel=0), extracterror_filter): ++ self.expect_file('file') ++ ++ with self.check_context(arc.open(errorlevel=0), filtererror_filter): ++ self.expect_file('file') ++ ++ with self.check_context(arc.open(errorlevel=0), oserror_filter): ++ self.expect_file('file') ++ ++ with self.check_context(arc.open(errorlevel=0), tarerror_filter): ++ self.expect_exception(tarfile.TarError) ++ ++ with self.check_context(arc.open(errorlevel=0), valueerror_filter): ++ self.expect_exception(ValueError) ++ ++ # If 1, all fatal errors are raised ++ ++ with self.check_context(arc.open(errorlevel=1), extracterror_filter): ++ self.expect_file('file') ++ ++ with self.check_context(arc.open(errorlevel=1), filtererror_filter): ++ self.expect_exception(tarfile.FilterError) ++ ++ with self.check_context(arc.open(errorlevel=1), oserror_filter): ++ self.expect_exception(OSError) ++ ++ with self.check_context(arc.open(errorlevel=1), tarerror_filter): ++ self.expect_exception(tarfile.TarError) ++ ++ with self.check_context(arc.open(errorlevel=1), valueerror_filter): ++ self.expect_exception(ValueError) ++ ++ # If 2, all non-fatal errors are raised as well. ++ ++ with self.check_context(arc.open(errorlevel=2), extracterror_filter): ++ self.expect_exception(tarfile.ExtractError) ++ ++ with self.check_context(arc.open(errorlevel=2), filtererror_filter): ++ self.expect_exception(tarfile.FilterError) ++ ++ with self.check_context(arc.open(errorlevel=2), oserror_filter): ++ self.expect_exception(OSError) ++ ++ with self.check_context(arc.open(errorlevel=2), tarerror_filter): ++ self.expect_exception(tarfile.TarError) ++ ++ with self.check_context(arc.open(errorlevel=2), valueerror_filter): ++ self.expect_exception(ValueError) ++ ++ # We only handle ExtractionError, FilterError & OSError specially. ++ ++ with self.check_context(arc.open(errorlevel='boo!'), filtererror_filter): ++ self.expect_exception(TypeError) # errorlevel is not int ++ ++ + def setUpModule(): + os_helper.unlink(TEMPDIR) + os.makedirs(TEMPDIR) +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2023-03-23-15-24-38.gh-issue-102953.YR4KaK.rst +@@ -0,0 +1,4 @@ ++The extraction methods in :mod:`tarfile`, and :func:`shutil.unpack_archive`, ++have a new a *filter* argument that allows limiting tar features than may be ++surprising or dangerous, such as creating files outside the destination ++directory. See :ref:`tarfile-extraction-filter` for details. diff --git a/python310.changes b/python310.changes index 21c207e..bae45db 100644 --- a/python310.changes +++ b/python310.changes @@ -1,3 +1,10 @@ +------------------------------------------------------------------- +Thu Apr 27 21:19:52 UTC 2023 - Matej Cepl + +- Add CVE-2007-4559-filter-tarfile_extractall.patch to fix + CVE-2007-4559 (bsc#1203750) by adding the filter for + tarfile.extractall (PEP 706). + ------------------------------------------------------------------- Mon Mar 13 08:39:53 UTC 2023 - Matej Cepl diff --git a/python310.spec b/python310.spec index 6b565a6..045fccd 100644 --- a/python310.spec +++ b/python310.spec @@ -173,6 +173,9 @@ Patch37: CVE-2023-24329-blank-URL-bypass.patch # PATCH-FIX-UPSTREAM invalid-json.patch gh#python/cpython#102582 mcepl@suse.com # We require valid JSON in documentation Patch38: invalid-json.patch +# PATCH-FIX-UPSTREAM CVE-2007-4559-filter-tarfile_extractall.patch bsc#1203750 mcepl@suse.com +# PEP 706 – Filter for tarfile.extractall +Patch39: CVE-2007-4559-filter-tarfile_extractall.patch BuildRequires: autoconf-archive BuildRequires: automake BuildRequires: fdupes @@ -447,6 +450,7 @@ other applications. %patch36 -p1 %patch37 -p1 %patch38 -p1 +%patch39 -p1 # drop Autoconf version requirement sed -i 's/^AC_PREREQ/dnl AC_PREREQ/' configure.ac From e8a35797e65a8509ac6e3ec9541361fe2cc39b685bdd0baca855218d46509688 Mon Sep 17 00:00:00 2001 From: Matej Cepl Date: Thu, 27 Apr 2023 21:53:08 +0000 Subject: [PATCH 4/8] =?UTF-8?q?-=20Update=20to=203.10.11:=20=20=20-=20Core?= =?UTF-8?q?=20and=20Builtins=20=20=20=20=20-=20gh-102416:=20Do=20not=20mem?= =?UTF-8?q?oize=20incorrectly=20automatically=20=20=20=20=20=20=20generate?= =?UTF-8?q?d=20loop=20rules=20in=20the=20parser.=20Patch=20by=20Pablo=20Ga?= =?UTF-8?q?lindo.=20=20=20=20=20-=20gh-102356:=20Fix=20a=20bug=20that=20ca?= =?UTF-8?q?used=20a=20crash=20when=20deallocating=20=20=20=20=20=20=20deep?= =?UTF-8?q?ly=20nested=20filter=20objects.=20Patch=20by=20Marta=20G=C3=B3m?= =?UTF-8?q?ez=20Mac=C3=ADas.=20=20=20=20=20-=20gh-102397:=20Fix=20segfault?= =?UTF-8?q?=20from=20race=20condition=20in=20signal=20=20=20=20=20=20=20ha?= =?UTF-8?q?ndling=20during=20garbage=20collection.=20Patch=20by=20Kumar=20?= =?UTF-8?q?Aditya.=20=20=20=20=20-=20gh-102126:=20Fix=20deadlock=20at=20sh?= =?UTF-8?q?utdown=20when=20clearing=20thread=20=20=20=20=20=20=20states=20?= =?UTF-8?q?if=20any=20finalizer=20tries=20to=20acquire=20the=20runtime=20h?= =?UTF-8?q?ead=20=20=20=20=20=20=20lock.=20Patch=20by=20Kumar=20Aditya.=20?= =?UTF-8?q?=20=20=20=20-=20gh-102027:=20Fix=20SSE2=20and=20SSE3=20detectio?= =?UTF-8?q?n=20in=20=5Fblake2=20internal=20=20=20=20=20=20=20module.=20Pat?= =?UTF-8?q?ch=20by=20Max=20Bachmann.=20=20=20=20=20-=20gh-101967:=20Fix=20?= =?UTF-8?q?possible=20segfault=20in=20=20=20=20=20=20=20positional=5Fonly?= =?UTF-8?q?=5Fpassed=5Fas=5Fkeyword=20function,=20when=20new=20list=20=20?= =?UTF-8?q?=20=20=20=20=20created.=20=20=20=20=20-=20gh-101765:=20Fix=20Sy?= =?UTF-8?q?stemError=20/=20segmentation=20fault=20in=20iter=20=20=20=20=20?= =?UTF-8?q?=20=20=5F=5Freduce=5F=5F=20when=20internal=20access=20of=20buil?= =?UTF-8?q?tins.=5F=5Fdict=5F=5F=20keys=20=20=20=20=20=20=20mutates=20the?= =?UTF-8?q?=20iter=20object.=20=20=20-=20Library=20=20=20=20=20-=20gh-1029?= =?UTF-8?q?47:=20Improve=20traceback=20when=20dataclasses.fields()=20is=20?= =?UTF-8?q?=20=20=20=20=20=20called=20on=20a=20non-dataclass.=20Patch=20by?= =?UTF-8?q?=20Alex=20Waygood=20=20=20=20=20-=20gh-101979:=20Fix=20a=20bug?= =?UTF-8?q?=20where=20parentheses=20in=20the=20metavar=20=20=20=20=20=20?= =?UTF-8?q?=20argument=20to=20argparse.ArgumentParser.add=5Fargument()=20w?= =?UTF-8?q?ere=20=20=20=20=20=20=20dropped.=20Patch=20by=20Yeojin=20Kim.?= =?UTF-8?q?=20=20=20=20=20-=20gh-102179:=20Fix=20os.dup2()=20error=20messa?= =?UTF-8?q?ge=20for=20negative=20fds.=20=20=20=20=20-=20gh-101961:=20For?= =?UTF-8?q?=20the=20binary=20mode,=20fileinput.hookcompressed()=20=20=20?= =?UTF-8?q?=20=20=20=20doesn=E2=80=99t=20set=20the=20encoding=20value=20ev?= =?UTF-8?q?en=20if=20the=20value=20is=20=20=20=20=20=20=20None.=20Patch=20?= =?UTF-8?q?by=20Gihwan=20Kim.=20=20=20=20=20-=20gh-101936:=20The=20default?= =?UTF-8?q?=20value=20of=20fp=20becomes=20io.BytesIO?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python310?expand=0&rev=85 --- CVE-2007-4559-filter-tarfile_extractall.patch | 30 +++---- Python-3.10.10.tar.xz | 3 - Python-3.10.10.tar.xz.asc | 16 ---- Python-3.10.11.tar.xz | 3 + Python-3.10.11.tar.xz.asc | 16 ++++ invalid-json.patch | 44 ---------- python310.changes | 85 ++++++++++++++++++- python310.spec | 8 +- 8 files changed, 120 insertions(+), 85 deletions(-) delete mode 100644 Python-3.10.10.tar.xz delete mode 100644 Python-3.10.10.tar.xz.asc create mode 100644 Python-3.10.11.tar.xz create mode 100644 Python-3.10.11.tar.xz.asc delete mode 100644 invalid-json.patch diff --git a/CVE-2007-4559-filter-tarfile_extractall.patch b/CVE-2007-4559-filter-tarfile_extractall.patch index 6cbf07f..11e1e3e 100644 --- a/CVE-2007-4559-filter-tarfile_extractall.patch +++ b/CVE-2007-4559-filter-tarfile_extractall.patch @@ -1513,7 +1513,7 @@ Content-Transfer-Encoding: 8bit with open(os.path.join(TEMPDIR, "ustar/regtype"), "rb") as fobj: data = fobj.read() -@@ -454,7 +460,7 @@ class CommonReadTest(ReadTest): +@@ -455,7 +461,7 @@ class CommonReadTest(ReadTest): t = tar.next() with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): @@ -1522,7 +1522,7 @@ Content-Transfer-Encoding: 8bit with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): tar.extractfile(t).read() -@@ -609,16 +615,16 @@ class MiscReadTestBase(CommonReadTest): +@@ -610,16 +616,16 @@ class MiscReadTestBase(CommonReadTest): def test_extract_hardlink(self): # Test hardlink extraction (e.g. bug #857297). with tarfile.open(tarname, errorlevel=1, encoding="iso8859-1") as tar: @@ -1542,7 +1542,7 @@ Content-Transfer-Encoding: 8bit self.addCleanup(os_helper.unlink, os.path.join(TEMPDIR, "ustar/symtype")) with open(os.path.join(TEMPDIR, "ustar/symtype"), "rb") as f: data = f.read() -@@ -632,13 +638,14 @@ class MiscReadTestBase(CommonReadTest): +@@ -633,13 +639,14 @@ class MiscReadTestBase(CommonReadTest): os.mkdir(DIR) try: directories = [t for t in tar if t.isdir()] @@ -1559,7 +1559,7 @@ Content-Transfer-Encoding: 8bit def format_mtime(mtime): if isinstance(mtime, float): return "{} ({})".format(mtime, mtime.hex()) -@@ -661,7 +668,7 @@ class MiscReadTestBase(CommonReadTest): +@@ -662,7 +669,7 @@ class MiscReadTestBase(CommonReadTest): try: with tarfile.open(tarname, encoding="iso8859-1") as tar: tarinfo = tar.getmember(dirtype) @@ -1568,7 +1568,7 @@ Content-Transfer-Encoding: 8bit extracted = os.path.join(DIR, dirtype) self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime) if sys.platform != "win32": -@@ -674,7 +681,7 @@ class MiscReadTestBase(CommonReadTest): +@@ -675,7 +682,7 @@ class MiscReadTestBase(CommonReadTest): with os_helper.temp_dir(DIR), \ tarfile.open(tarname, encoding="iso8859-1") as tar: directories = [t for t in tar if t.isdir()] @@ -1577,7 +1577,7 @@ Content-Transfer-Encoding: 8bit for tarinfo in directories: path = DIR / tarinfo.name self.assertEqual(os.path.getmtime(path), tarinfo.mtime) -@@ -685,7 +692,7 @@ class MiscReadTestBase(CommonReadTest): +@@ -686,7 +693,7 @@ class MiscReadTestBase(CommonReadTest): with os_helper.temp_dir(DIR), \ tarfile.open(tarname, encoding="iso8859-1") as tar: tarinfo = tar.getmember(dirtype) @@ -1586,7 +1586,7 @@ Content-Transfer-Encoding: 8bit extracted = DIR / dirtype self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime) -@@ -1041,7 +1048,7 @@ class GNUReadTest(LongnameTest, ReadTest +@@ -1042,7 +1049,7 @@ class GNUReadTest(LongnameTest, ReadTest # an all platforms, and after that a test that will work only on # platforms/filesystems that prove to support sparse files. def _test_sparse_file(self, name): @@ -1595,7 +1595,7 @@ Content-Transfer-Encoding: 8bit filename = os.path.join(TEMPDIR, name) with open(filename, "rb") as fobj: data = fobj.read() -@@ -1408,7 +1415,8 @@ class WriteTest(WriteTestBase, unittest. +@@ -1409,7 +1416,8 @@ class WriteTest(WriteTestBase, unittest. with tarfile.open(temparchive, errorlevel=2) as tar: # this should not raise OSError: [Errno 17] File exists try: @@ -1605,7 +1605,7 @@ Content-Transfer-Encoding: 8bit except OSError: self.fail("extractall failed with symlinked files") finally: -@@ -2440,6 +2448,15 @@ class CommandLineTest(unittest.TestCase) +@@ -2441,6 +2449,15 @@ class CommandLineTest(unittest.TestCase) for tardata in files: tf.add(tardata, arcname=os.path.basename(tardata)) @@ -1621,7 +1621,7 @@ Content-Transfer-Encoding: 8bit def test_bad_use(self): rc, out, err = self.tarfilecmd_failure() self.assertEqual(out, b'') -@@ -2596,6 +2613,25 @@ class CommandLineTest(unittest.TestCase) +@@ -2597,6 +2614,25 @@ class CommandLineTest(unittest.TestCase) finally: os_helper.rmtree(tarextdir) @@ -1647,7 +1647,7 @@ Content-Transfer-Encoding: 8bit def test_extract_command_different_directory(self): self.make_simple_tarfile(tmpname) try: -@@ -2679,7 +2715,7 @@ class LinkEmulationTest(ReadTest, unitte +@@ -2680,7 +2716,7 @@ class LinkEmulationTest(ReadTest, unitte # symbolic or hard links tarfile tries to extract these types of members # as the regular files they point to. def _test_link_extraction(self, name): @@ -1656,7 +1656,7 @@ Content-Transfer-Encoding: 8bit with open(os.path.join(TEMPDIR, name), "rb") as f: data = f.read() self.assertEqual(sha256sum(data), sha256_regtype) -@@ -2811,8 +2847,10 @@ class NumericOwnerTest(unittest.TestCase +@@ -2812,8 +2848,10 @@ class NumericOwnerTest(unittest.TestCase mock_chown): with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, filename_2): @@ -1669,7 +1669,7 @@ Content-Transfer-Encoding: 8bit # convert to filesystem paths f_filename_1 = os.path.join(TEMPDIR, filename_1) -@@ -2830,7 +2868,8 @@ class NumericOwnerTest(unittest.TestCase +@@ -2831,7 +2869,8 @@ class NumericOwnerTest(unittest.TestCase mock_chown): with self._setup_test(mock_geteuid) as (tarfl, filename_1, dirname_1, filename_2): @@ -1679,7 +1679,7 @@ Content-Transfer-Encoding: 8bit # convert to filesystem paths f_filename_1 = os.path.join(TEMPDIR, filename_1) -@@ -2855,7 +2894,8 @@ class NumericOwnerTest(unittest.TestCase +@@ -2856,7 +2895,8 @@ class NumericOwnerTest(unittest.TestCase def test_extract_without_numeric_owner(self, mock_geteuid, mock_chmod, mock_chown): with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _): @@ -1689,7 +1689,7 @@ Content-Transfer-Encoding: 8bit # convert to filesystem paths f_filename_1 = os.path.join(TEMPDIR, filename_1) -@@ -2869,6 +2909,890 @@ class NumericOwnerTest(unittest.TestCase +@@ -2870,6 +2910,890 @@ class NumericOwnerTest(unittest.TestCase tarfl.extract, filename_1, TEMPDIR, False, True) diff --git a/Python-3.10.10.tar.xz b/Python-3.10.10.tar.xz deleted file mode 100644 index bb9a380..0000000 --- a/Python-3.10.10.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0419e9085bf51b7a672009b3f50dbf1859acdf18ba725d0ec19aa5c8503f0ea3 -size 19627028 diff --git a/Python-3.10.10.tar.xz.asc b/Python-3.10.10.tar.xz.asc deleted file mode 100644 index 0bd900c..0000000 --- a/Python-3.10.10.tar.xz.asc +++ /dev/null @@ -1,16 +0,0 @@ ------BEGIN PGP SIGNATURE----- - -iQIzBAABCAAdFiEEz9yiRbEEPPKl+Xhl/+h0BBaL2EcFAmPiQfoACgkQ/+h0BBaL -2EcB8hAAmFEIHZopWn+A4tDxd001eViLrOmjygqPn1doAQ3dAgyESt4Z/HDtN6rB -+6z5rsx+qdcP9kfb/+3V0gKBh/3V4bEpnD+EQtpONWhKbCcqOfq1ok1V+uNH8uOF -ixxWkY+MWJzPPhlQiW/sm9FP6CdnaeriKf1JMCUt9aiganpo2CQv5gPE/0PlSGO5 -BEKjCcyHHPIEAxC6jLm/+33PSzbhGq+YstK/1tcqUrJfkifipovmSZeFyzULPonK -MATPyliOupo3ixPs3LoJUjNpGD4fH+p2Lg1ZOgYv7vGmeLcadNVanRlqRg76m+ke -zvp/MAqQg4Fr75m2+mfDG/Md+PrSMvz71i55a1Q1NcYdW6QR62m08FCZg7/+t5pD -H91ywhMqTv1nySsEZGfuETPTs7gMCtyBeDjIhXBMcfbhGivd7r5zZJ8MUD/FSASC -fQ/vEVeHWQeWpfFgxLfLmRnkjIS7JCGlM9z6zsZqbppWqeA94sBIf4ka2JG2DnGP -1Pvn+ragiHt1++i2yVhmoAB0t44/SgXacCce5AT3yB71brT21cOXQs0Gq80MwVPI -nVbzdOtuGNGcvEi2fbO2IEcgegSHaOHo9PvYTRropSz3V7A95x8mA1xjZf2y77H5 -/mfJ4687YIItCIcNE5Zzj6GspWlWP31OvRFIIefnKYf2JuU+qt8= -=B3xo ------END PGP SIGNATURE----- diff --git a/Python-3.10.11.tar.xz b/Python-3.10.11.tar.xz new file mode 100644 index 0000000..c07774c --- /dev/null +++ b/Python-3.10.11.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c3bc3048303721c904a03eb8326b631e921f11cc3be2988456a42f115daf04c +size 19640792 diff --git a/Python-3.10.11.tar.xz.asc b/Python-3.10.11.tar.xz.asc new file mode 100644 index 0000000..d3012ea --- /dev/null +++ b/Python-3.10.11.tar.xz.asc @@ -0,0 +1,16 @@ +-----BEGIN PGP SIGNATURE----- + +iQIzBAABCAAdFiEEz9yiRbEEPPKl+Xhl/+h0BBaL2EcFAmQsoHwACgkQ/+h0BBaL +2Efs9BAAheWCnenhHhXi0m0DgyB6eEVH8xmZrBqA1WMgGQOqWVZmEnJdc0IXyFWQ +1A4C59d6rEvu8jvXTLvsqGEmehofKqq0bXB1tMUBn9CwSiELOm19WvCHc/Htwo2U +DsvAsXXO7vBkKBT9+CQ4BmkGzPUTrBLZRHsQX/M/tpx81jnQVunoMojyPK19sf1I +C+YnxE0cQVL9+INd0WtbVByJIwzBBDCLqTQWL//73CqFs8IO6PsjFXqmlVqVfpmz +aEXuGeRkRgy7kZaDdLcnhBq7a6vgaecfgfRUGyBgwgakfrHA5SOdsWdAonjA676J +6DHmFIf82R4wo7Vu0WAfFAq9jJfVxXN7n5Y/N/cxzqjhrfO341vCflN1c16VAFnu +ok7n50poENO/tMRerOEj5baL+mToi8Wh+cYHY6tNpaM2iP+bSyjoS+Ff225xhdNV +fqGuyaH7cPgGgoXECrSb7iTWYZxJxQV9S8OlR2gX8IlA+XrbGWQl0PvmErhO3FqN +W88gBmYrzrSl6+dzF62yn2gKFc2K5k6NmCcySFfjY87G7RhEf1ixPeDyMSvbKlVw +sJWeoXuCNPL+PQV+V76UAbn3bEvH87fyImxoYHNAIbHh8JaTvO5vIKDrrsw92siQ +6Pud3Oy6DcD5gWX2KcaAjQjruh18dljsbYN+2KVFfQHM8SYeXns= +=enP4 +-----END PGP SIGNATURE----- diff --git a/invalid-json.patch b/invalid-json.patch deleted file mode 100644 index d3a25ba..0000000 --- a/invalid-json.patch +++ /dev/null @@ -1,44 +0,0 @@ ---- - Doc/howto/logging-cookbook.rst | 24 ++++++++++++++---------- - 1 file changed, 14 insertions(+), 10 deletions(-) - ---- a/Doc/howto/logging-cookbook.rst -+++ b/Doc/howto/logging-cookbook.rst -@@ -340,10 +340,12 @@ adding a ``filters`` section parallel to - - .. code-block:: json - -- "filters": { -- "warnings_and_below": { -- "()" : "__main__.filter_maker", -- "level": "WARNING" -+ { -+ "filters": { -+ "warnings_and_below": { -+ "()" : "__main__.filter_maker", -+ "level": "WARNING" -+ } - } - } - -@@ -351,12 +353,14 @@ and changing the section on the ``stdout - - .. code-block:: json - -- "stdout": { -- "class": "logging.StreamHandler", -- "level": "INFO", -- "formatter": "simple", -- "stream": "ext://sys.stdout", -- "filters": ["warnings_and_below"] -+ { -+ "stdout": { -+ "class": "logging.StreamHandler", -+ "level": "INFO", -+ "formatter": "simple", -+ "stream": "ext://sys.stdout", -+ "filters": ["warnings_and_below"] -+ } - } - - A filter is just a function, so we can define the ``filter_maker`` (a factory diff --git a/python310.changes b/python310.changes index bae45db..1ff6218 100644 --- a/python310.changes +++ b/python310.changes @@ -1,10 +1,93 @@ ------------------------------------------------------------------- -Thu Apr 27 21:19:52 UTC 2023 - Matej Cepl +Thu Apr 27 21:23:19 UTC 2023 - Matej Cepl - Add CVE-2007-4559-filter-tarfile_extractall.patch to fix CVE-2007-4559 (bsc#1203750) by adding the filter for tarfile.extractall (PEP 706). +------------------------------------------------------------------- +Thu Apr 27 21:19:52 UTC 2023 - Matej Cepl + +- Update to 3.10.11: + - Core and Builtins + - gh-102416: Do not memoize incorrectly automatically + generated loop rules in the parser. Patch by Pablo Galindo. + - gh-102356: Fix a bug that caused a crash when deallocating + deeply nested filter objects. Patch by Marta Gómez Macías. + - gh-102397: Fix segfault from race condition in signal + handling during garbage collection. Patch by Kumar Aditya. + - gh-102126: Fix deadlock at shutdown when clearing thread + states if any finalizer tries to acquire the runtime head + lock. Patch by Kumar Aditya. + - gh-102027: Fix SSE2 and SSE3 detection in _blake2 internal + module. Patch by Max Bachmann. + - gh-101967: Fix possible segfault in + positional_only_passed_as_keyword function, when new list + created. + - gh-101765: Fix SystemError / segmentation fault in iter + __reduce__ when internal access of builtins.__dict__ keys + mutates the iter object. + - Library + - gh-102947: Improve traceback when dataclasses.fields() is + called on a non-dataclass. Patch by Alex Waygood + - gh-101979: Fix a bug where parentheses in the metavar + argument to argparse.ArgumentParser.add_argument() were + dropped. Patch by Yeojin Kim. + - gh-102179: Fix os.dup2() error message for negative fds. + - gh-101961: For the binary mode, fileinput.hookcompressed() + doesn’t set the encoding value even if the value is + None. Patch by Gihwan Kim. + - gh-101936: The default value of fp becomes io.BytesIO + if HTTPError is initialized without a designated fp + parameter. Patch by Long Vo. + - gh-101566: In zipfile, apply fix for extractall on the + underlying zipfile after being wrapped in Path. + - gh-101997: Upgrade pip wheel bundled with ensurepip (pip + 23.0.1) + - gh-101892: Callable iterators no longer raise SystemError + when the callable object exhausts the iterator but forgets + to either return a sentinel value or raise StopIteration. + - gh-97786: Fix potential undefined behaviour in corner cases + of floating-point-to-time conversions. + - gh-101517: Fixed bug where bdb looks up the source line + with linecache with a lineno=None, which causes it to fail + with an unhandled exception. + - gh-101673: Fix a pdb bug where ll clears the changes to + local variables. + - gh-96931: Fix incorrect results from + ssl.SSLSocket.shared_ciphers() + - gh-88233: Correctly preserve “extra” fields in zipfile + regardless of their ordering relative to a zip64 “extra.” + - gh-95495: When built against OpenSSL 3.0, the ssl module + had a bug where it reported unauthenticated EOFs (i.e. + without close_notify) as a clean TLS-level EOF. It now + raises SSLEOFError, matching the behavior in previous + versions of OpenSSL. The options attribute on SSLContext + also no longer includes OP_IGNORE_UNEXPECTED_EOF by + default. This option may be set to specify the previous + OpenSSL 3.0 behavior. + - gh-94440: Fix a concurrent.futures.process bug where + ProcessPoolExecutor shutdown could hang after a future has + been quickly submitted and canceled. + - Documentation + - gh-103112: Add docstring to http.client.HTTPResponse.read() + to fix pydoc output. + - gh-85417: Update cmath documentation to clarify behaviour + on branch cuts. + - gh-97725: Fix asyncio.Task.print_stack() description for + file=None. Patch by Oleg Iarygin. + - Tests + - gh-102980: Improve test coverage on pdb. + - gh-102537: Adjust the error handling strategy in + test_zoneinfo.TzPathTest.python_tzpath_context. Patch by + Paul Ganssle. + - gh-101377: Improved test_locale_calendar_formatweekday of + calendar. + - Build + - gh-102711: Fix -Wstrict-prototypes compiler warnings. +- Removed upstreamed: + - invalid-json.patch + ------------------------------------------------------------------- Mon Mar 13 08:39:53 UTC 2023 - Matej Cepl diff --git a/python310.spec b/python310.spec index 045fccd..238bbf6 100644 --- a/python310.spec +++ b/python310.spec @@ -103,7 +103,7 @@ Obsoletes: python39%{?1:-%{1}} %define dynlib() %{sitedir}/lib-dynload/%{1}.cpython-%{abi_tag}-%{archname}-%{_os}%{?_gnu}%{?armsuffix}.so %bcond_without profileopt Name: %{python_pkg_name}%{psuffix} -Version: 3.10.10 +Version: 3.10.11 Release: 0 Summary: Python 3 Interpreter License: Python-2.0 @@ -170,12 +170,9 @@ Patch36: support-expat-CVE-2022-25236-patched.patch # blocklist bypass via the urllib.parse component when supplying # a URL that starts with blank characters Patch37: CVE-2023-24329-blank-URL-bypass.patch -# PATCH-FIX-UPSTREAM invalid-json.patch gh#python/cpython#102582 mcepl@suse.com -# We require valid JSON in documentation -Patch38: invalid-json.patch # PATCH-FIX-UPSTREAM CVE-2007-4559-filter-tarfile_extractall.patch bsc#1203750 mcepl@suse.com # PEP 706 – Filter for tarfile.extractall -Patch39: CVE-2007-4559-filter-tarfile_extractall.patch +Patch38: CVE-2007-4559-filter-tarfile_extractall.patch BuildRequires: autoconf-archive BuildRequires: automake BuildRequires: fdupes @@ -450,7 +447,6 @@ other applications. %patch36 -p1 %patch37 -p1 %patch38 -p1 -%patch39 -p1 # drop Autoconf version requirement sed -i 's/^AC_PREREQ/dnl AC_PREREQ/' configure.ac From d6d44792969c0f5be3e78354aeb01084c1dd39fc9049793df17e6d654410fcf2 Mon Sep 17 00:00:00 2001 From: Matej Cepl Date: Thu, 27 Apr 2023 22:49:00 +0000 Subject: [PATCH 5/8] There is no wasi in 3.10 OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python310?expand=0&rev=86 --- CVE-2007-4559-filter-tarfile_extractall.patch | 23 +++---------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/CVE-2007-4559-filter-tarfile_extractall.patch b/CVE-2007-4559-filter-tarfile_extractall.patch index 11e1e3e..9ce6bb4 100644 --- a/CVE-2007-4559-filter-tarfile_extractall.patch +++ b/CVE-2007-4559-filter-tarfile_extractall.patch @@ -13,9 +13,9 @@ Content-Transfer-Encoding: 8bit Lib/shutil.py | 17 Lib/tarfile.py | 361 +++ Lib/test/test_shutil.py | 41 - Lib/test/test_tarfile.py | 964 +++++++++- + Lib/test/test_tarfile.py | 947 +++++++++- Misc/NEWS.d/next/Library/2023-03-23-15-24-38.gh-issue-102953.YR4KaK.rst | 4 - 7 files changed, 1770 insertions(+), 98 deletions(-) + 7 files changed, 1753 insertions(+), 98 deletions(-) --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -1689,7 +1689,7 @@ Content-Transfer-Encoding: 8bit # convert to filesystem paths f_filename_1 = os.path.join(TEMPDIR, filename_1) -@@ -2870,6 +2910,890 @@ class NumericOwnerTest(unittest.TestCase +@@ -2870,6 +2910,873 @@ class NumericOwnerTest(unittest.TestCase tarfl.extract, filename_1, TEMPDIR, False, True) @@ -2018,18 +2018,6 @@ Content-Transfer-Encoding: 8bit + bio = io.BytesIO(self.contents) + return tarfile.open(fileobj=bio, **kwargs) + -+# Under WASI, `os_helper.can_symlink` is False to make -+# `skip_unless_symlink` skip symlink tests. " -+# But in the following tests we use can_symlink to *determine* which -+# behavior is expected. -+# Like other symlink tests, skip these on WASI for now. -+if support.is_wasi: -+ def symlink_test(f): -+ return unittest.skip("WASI: Skip symlink test for now")(f) -+else: -+ def symlink_test(f): -+ return f -+ + +class TestExtractionFilters(unittest.TestCase): + @@ -2140,7 +2128,6 @@ Content-Transfer-Encoding: 8bit + tarfile.AbsolutePathError, + """['"].*escaped.evil['"] has an absolute path""") + -+ @symlink_test + def test_parent_symlink(self): + # Test interplaying symlinks + # Inspired by 'dirsymlink2a' in jwilk/traversal-archives @@ -2184,7 +2171,6 @@ Content-Transfer-Encoding: 8bit + with self.check_context(arc.open(), 'data'): + self.expect_file('parent/evil') + -+ @symlink_test + def test_parent_symlink2(self): + # Test interplaying symlinks + # Inspired by 'dirsymlink2b' in jwilk/traversal-archives @@ -2219,7 +2205,6 @@ Content-Transfer-Encoding: 8bit + """'current/parent' would link to ['"].*['"], """ + + "which is outside the destination") + -+ @symlink_test + def test_absolute_symlink(self): + # Test symlink to an absolute path + # Inspired by 'dirsymlink' in jwilk/traversal-archives @@ -2249,7 +2234,6 @@ Content-Transfer-Encoding: 8bit + tarfile.AbsoluteLinkError, + "'parent' is a symlink to an absolute path") + -+ @symlink_test + def test_sly_relative0(self): + # Inspired by 'relative0' in jwilk/traversal-archives + with ArchiveMaker() as arc: @@ -2283,7 +2267,6 @@ Content-Transfer-Encoding: 8bit + + "'.*moo', which is outside " + + "the destination") + -+ @symlink_test + def test_sly_relative2(self): + # Inspired by 'relative2' in jwilk/traversal-archives + with ArchiveMaker() as arc: From 6a2f407ebc40f7ab42b71900726710515721db8bc6bc45bfd9f51f4bb8e98182 Mon Sep 17 00:00:00 2001 From: Matej Cepl Date: Thu, 27 Apr 2023 23:43:26 +0000 Subject: [PATCH 6/8] We can always chmod OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python310?expand=0&rev=87 --- CVE-2007-4559-filter-tarfile_extractall.patch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CVE-2007-4559-filter-tarfile_extractall.patch b/CVE-2007-4559-filter-tarfile_extractall.patch index 9ce6bb4..ab47bf9 100644 --- a/CVE-2007-4559-filter-tarfile_extractall.patch +++ b/CVE-2007-4559-filter-tarfile_extractall.patch @@ -2066,7 +2066,7 @@ Content-Transfer-Encoding: 8bit + path = pathlib.Path(os.path.normpath(self.destdir / name)) + self.assertIn(path, self.expected_paths) + self.expected_paths.remove(path) -+ if mode is not None and os_helper.can_chmod(): ++ if mode is not None: + got = stat.filemode(stat.S_IMODE(path.stat().st_mode)) + self.assertEqual(got, mode) + if type is None and isinstance(name, str) and name.endswith('/'): From 1ab2e0976bbe8ae513858f3a641f9e85d316da1961927201e01f1305e04165ae Mon Sep 17 00:00:00 2001 From: Matej Cepl Date: Sun, 30 Apr 2023 18:19:12 +0000 Subject: [PATCH 7/8] Why in the world we download from HTTP? OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python310?expand=0&rev=88 --- python310.changes | 5 +++++ python310.spec | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/python310.changes b/python310.changes index 1ff6218..d14b79a 100644 --- a/python310.changes +++ b/python310.changes @@ -1,3 +1,8 @@ +------------------------------------------------------------------- +Sun Apr 30 18:19:01 UTC 2023 - Matej Cepl + +- Why in the world we download from HTTP? + ------------------------------------------------------------------- Thu Apr 27 21:23:19 UTC 2023 - Matej Cepl diff --git a/python310.spec b/python310.spec index 238bbf6..c9cc36a 100644 --- a/python310.spec +++ b/python310.spec @@ -108,8 +108,8 @@ Release: 0 Summary: Python 3 Interpreter License: Python-2.0 URL: https://www.python.org/ -Source0: http://www.python.org/ftp/python/%{folderversion}/%{tarname}.tar.xz -Source1: http://www.python.org/ftp/python/%{folderversion}/%{tarname}.tar.xz.asc +Source0: https://www.python.org/ftp/python/%{folderversion}/%{tarname}.tar.xz +Source1: https://www.python.org/ftp/python/%{folderversion}/%{tarname}.tar.xz.asc Source2: baselibs.conf Source3: README.SUSE Source7: macros.python3 From 54a90c01cb733279022464757bef64b32623abe0cd88c851245471ab1fac89b7 Mon Sep 17 00:00:00 2001 From: Matej Cepl Date: Wed, 3 May 2023 14:07:47 +0000 Subject: [PATCH 8/8] Adjust CVE-2007-4559-filter-tarfile_extractall.patch. OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python310?expand=0&rev=89 --- CVE-2007-4559-filter-tarfile_extractall.patch | 376 +++++++++--------- 1 file changed, 199 insertions(+), 177 deletions(-) diff --git a/CVE-2007-4559-filter-tarfile_extractall.patch b/CVE-2007-4559-filter-tarfile_extractall.patch index ab47bf9..f0c85bf 100644 --- a/CVE-2007-4559-filter-tarfile_extractall.patch +++ b/CVE-2007-4559-filter-tarfile_extractall.patch @@ -1,25 +1,8 @@ -From cde089c808a2c21dd311905ba7f1b7e1004c0ada Mon Sep 17 00:00:00 2001 -From: Petr Viktorin -Date: Tue, 31 Jan 2023 14:40:52 +0100 -Subject: [PATCH 01/15] =?UTF-8?q?Implement=20PEP=20706=20=E2=80=93=20Filte?= - =?UTF-8?q?r=20for=20tarfile.extractall?= -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - ---- - Doc/library/shutil.rst | 24 - Doc/library/tarfile.rst | 457 ++++ - Lib/shutil.py | 17 - Lib/tarfile.py | 361 +++ - Lib/test/test_shutil.py | 41 - Lib/test/test_tarfile.py | 947 +++++++++- - Misc/NEWS.d/next/Library/2023-03-23-15-24-38.gh-issue-102953.YR4KaK.rst | 4 - 7 files changed, 1753 insertions(+), 98 deletions(-) - +diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst +index 311aae414ae..3864e03898d 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst -@@ -620,7 +620,7 @@ provided. They rely on the :mod:`zipfil +@@ -620,7 +620,7 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. Remove the archive format *name* from the list of supported formats. @@ -28,12 +11,13 @@ Content-Transfer-Encoding: 8bit Unpack an archive. *filename* is the full path of the archive. -@@ -634,6 +634,14 @@ provided. They rely on the :mod:`zipfil +@@ -634,6 +634,15 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. registered for that extension. In case none is found, a :exc:`ValueError` is raised. -+ The keyword-only *filter* argument is passed to the underlying unpacking -+ function. For zip files, *filter* is not accepted. ++ The keyword-only *filter* argument, which was added in Python 3.11.4, ++ is passed to the underlying unpacking function. ++ For zip files, *filter* is not accepted. + For tar files, it is recommended to set it to ``'data'``, + unless using features specific to tar and UNIX-like filesystems. + (See :ref:`tarfile-extraction-filter` for details.) @@ -43,26 +27,27 @@ Content-Transfer-Encoding: 8bit .. audit-event:: shutil.unpack_archive filename,extract_dir,format shutil.unpack_archive .. warning:: -@@ -646,6 +654,9 @@ provided. They rely on the :mod:`zipfil +@@ -646,6 +655,9 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. .. versionchanged:: 3.7 Accepts a :term:`path-like object` for *filename* and *extract_dir*. -+ .. versionchanged:: 3.12 ++ .. versionchanged:: 3.11.4 + Added the *filter* argument. + .. function:: register_unpack_format(name, extensions, function[, extra_args[, description]]) Registers an unpack format. *name* is the name of the format and -@@ -653,11 +664,14 @@ provided. They rely on the :mod:`zipfil +@@ -653,11 +665,14 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. ``.zip`` for Zip files. *function* is the callable that will be used to unpack archives. The - callable will receive the path of the archive, followed by the directory - the archive must be extracted to. -+ callable will receive: - +- - When provided, *extra_args* is a sequence of ``(name, value)`` tuples that - will be passed as keywords arguments to the callable. ++ callable will receive: ++ + - the path of the archive, as a positional argument; + - the directory the archive must be extracted to, as a positional argument; + - possibly a *filter* keyword argument, if it was given to @@ -72,23 +57,11 @@ Content-Transfer-Encoding: 8bit *description* can be provided to describe the format, and will be returned by the :func:`get_unpack_formats` function. +diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst +index 226513f5fc1..836444ebb34 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst -@@ -36,6 +36,13 @@ Some facts and figures: - .. versionchanged:: 3.3 - Added support for :mod:`lzma` compression. - -+.. versionchanged:: 3.12 -+ Archives are extracted using a :ref:`filter `, -+ which makes it possible to either limit surprising/dangerous features, -+ or to acknowledge that they are expected and the archive is fully trusted. -+ By default, archives are fully trusted, but this default is deprecated -+ and slated to change in Python 3.14. -+ - - .. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, **kwargs) - -@@ -206,6 +213,38 @@ The :mod:`tarfile` module defines the fo +@@ -206,6 +206,38 @@ The :mod:`tarfile` module defines the following exceptions: Is raised by :meth:`TarInfo.frombuf` if the buffer it gets is invalid. @@ -127,7 +100,7 @@ Content-Transfer-Encoding: 8bit The following constants are available at the module level: .. data:: ENCODING -@@ -316,11 +355,8 @@ be finalized; only the internally used f +@@ -316,11 +348,8 @@ be finalized; only the internally used file object will be closed. See the *debug* can be set from ``0`` (no debug messages) up to ``3`` (all debug messages). The messages are written to ``sys.stderr``. @@ -141,7 +114,7 @@ Content-Transfer-Encoding: 8bit The *encoding* and *errors* arguments define the character encoding to be used for reading or writing the archive and how conversion errors are going -@@ -387,7 +423,7 @@ be finalized; only the internally used f +@@ -387,7 +416,7 @@ be finalized; only the internally used file object will be closed. See the available. @@ -150,12 +123,12 @@ Content-Transfer-Encoding: 8bit Extract all members from the archive to the current working directory or directory *path*. If optional *members* is given, it must be a subset of the -@@ -401,6 +437,12 @@ be finalized; only the internally used f +@@ -401,6 +430,12 @@ be finalized; only the internally used file object will be closed. See the are used to set the owner/group for the extracted files. Otherwise, the named values from the tarfile are used. -+ The *filter* argument specifies how ``members`` are modified or rejected -+ before extraction. ++ The *filter* argument, which was added in Python 3.11.4, specifies how ++ ``members`` are modified or rejected before extraction. + See :ref:`tarfile-extraction-filter` for details. + It is recommended to set this explicitly depending on which *tar* features + you need to support. @@ -163,7 +136,7 @@ Content-Transfer-Encoding: 8bit .. warning:: Never extract archives from untrusted sources without prior inspection. -@@ -408,14 +450,20 @@ be finalized; only the internally used f +@@ -408,14 +443,20 @@ be finalized; only the internally used file object will be closed. See the that have absolute filenames starting with ``"/"`` or filenames with two dots ``".."``. @@ -176,7 +149,7 @@ Content-Transfer-Encoding: 8bit .. versionchanged:: 3.6 The *path* parameter accepts a :term:`path-like object`. -+ .. versionchanged:: 3.12 ++ .. versionchanged:: 3.11.4 + Added the *filter* parameter. + @@ -185,7 +158,7 @@ Content-Transfer-Encoding: 8bit Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. *member* -@@ -423,9 +471,8 @@ be finalized; only the internally used f +@@ -423,9 +464,8 @@ be finalized; only the internally used file object will be closed. See the directory using *path*. *path* may be a :term:`path-like object`. File attributes (owner, mtime, mode) are set unless *set_attrs* is false. @@ -197,7 +170,7 @@ Content-Transfer-Encoding: 8bit .. note:: -@@ -436,6 +483,9 @@ be finalized; only the internally used f +@@ -436,6 +476,9 @@ be finalized; only the internally used file object will be closed. See the See the warning for :meth:`extractall`. @@ -207,17 +180,17 @@ Content-Transfer-Encoding: 8bit .. versionchanged:: 3.2 Added the *set_attrs* parameter. -@@ -445,6 +495,9 @@ be finalized; only the internally used f +@@ -445,6 +488,9 @@ be finalized; only the internally used file object will be closed. See the .. versionchanged:: 3.6 The *path* parameter accepts a :term:`path-like object`. -+ .. versionchanged:: 3.12 ++ .. versionchanged:: 3.11.4 + Added the *filter* parameter. + .. method:: TarFile.extractfile(member) -@@ -457,6 +510,55 @@ be finalized; only the internally used f +@@ -457,6 +503,57 @@ be finalized; only the internally used file object will be closed. See the .. versionchanged:: 3.3 Return an :class:`io.BufferedReader` object. @@ -244,7 +217,7 @@ Content-Transfer-Encoding: 8bit + +.. attribute:: TarFile.extraction_filter + -+ .. versionadded:: 3.12 ++ .. versionadded:: 3.11.4 + + The :ref:`extraction filter ` used + as a default for the *filter* argument of :meth:`~TarFile.extract` @@ -255,10 +228,12 @@ Content-Transfer-Encoding: 8bit + argument to :meth:`~TarFile.extract`. + + If ``extraction_filter`` is ``None`` (the default), -+ calling an extraction method without a *filter* argument will raise a -+ ``DeprecationWarning``, -+ and fall back to the :func:`fully_trusted ` filter, -+ whose dangerous behavior matches previous versions of Python. ++ calling an extraction method without a *filter* argument will ++ use the :func:`fully_trusted ` filter for ++ compatibility with previous Python versions. ++ ++ In Python 3.12+, leaving ``extraction_filter=None`` will emit a ++ ``DeprecationWarning``. + + In Python 3.14+, leaving ``extraction_filter=None`` will cause + extraction methods to use the :func:`data ` filter by default. @@ -273,14 +248,14 @@ Content-Transfer-Encoding: 8bit .. method:: TarFile.add(name, arcname=None, recursive=True, *, filter=None) -@@ -532,8 +634,23 @@ permissions, owner etc.), it provides so +@@ -532,7 +629,27 @@ permissions, owner etc.), it provides some useful methods to determine its type. It does *not* contain the file's data itself. :class:`TarInfo` objects are returned by :class:`TarFile`'s methods -:meth:`getmember`, :meth:`getmembers` and :meth:`gettarinfo`. +:meth:`~TarFile.getmember`, :meth:`~TarFile.getmembers` and +:meth:`~TarFile.gettarinfo`. - ++ +Modifying the objects returned by :meth:`~!TarFile.getmember` or +:meth:`~!TarFile.getmembers` will affect all subsequent +operations on the archive. @@ -295,10 +270,14 @@ Content-Transfer-Encoding: 8bit + ignore the corresponding metadata, leaving it set to a default. +- :meth:`~TarFile.addfile` will fail. +- :meth:`~TarFile.list` will print a placeholder string. ++ ++ ++.. versionchanged:: 3.11.4 ++ Added :meth:`~TarInfo.replace` and handling of ``None``. + .. class:: TarInfo(name="") - -@@ -566,24 +683,39 @@ A ``TarInfo`` object has the following p +@@ -566,24 +683,39 @@ A ``TarInfo`` object has the following public data attributes: .. attribute:: TarInfo.name @@ -320,7 +299,7 @@ Content-Transfer-Encoding: 8bit + as in :attr:`os.stat_result.st_mtime`. - Time of last modification. -+ .. versionchanged:: 3.12 ++ .. versionchanged:: 3.11.4 + Can be set to ``None`` for :meth:`~TarFile.extract` and + :meth:`~TarFile.extractall`, causing extraction to skip applying this @@ -332,7 +311,7 @@ Content-Transfer-Encoding: 8bit - Permission bits. + Permission bits, as for :func:`os.chmod`. -+ .. versionchanged:: 3.12 ++ .. versionchanged:: 3.11.4 + + Can be set to ``None`` for :meth:`~TarFile.extract` and + :meth:`~TarFile.extractall`, causing extraction to skip applying this @@ -340,7 +319,7 @@ Content-Transfer-Encoding: 8bit .. attribute:: TarInfo.type -@@ -595,35 +727,76 @@ A ``TarInfo`` object has the following p +@@ -595,35 +727,76 @@ A ``TarInfo`` object has the following public data attributes: .. attribute:: TarInfo.linkname @@ -355,7 +334,7 @@ Content-Transfer-Encoding: 8bit User ID of the user who originally stored this member. -+ .. versionchanged:: 3.12 ++ .. versionchanged:: 3.11.4 + + Can be set to ``None`` for :meth:`~TarFile.extract` and + :meth:`~TarFile.extractall`, causing extraction to skip applying this @@ -366,7 +345,7 @@ Content-Transfer-Encoding: 8bit Group ID of the user who originally stored this member. -+ .. versionchanged:: 3.12 ++ .. versionchanged:: 3.11.4 + + Can be set to ``None`` for :meth:`~TarFile.extract` and + :meth:`~TarFile.extractall`, causing extraction to skip applying this @@ -377,7 +356,7 @@ Content-Transfer-Encoding: 8bit User name. -+ .. versionchanged:: 3.12 ++ .. versionchanged:: 3.11.4 + + Can be set to ``None`` for :meth:`~TarFile.extract` and + :meth:`~TarFile.extractall`, causing extraction to skip applying this @@ -388,7 +367,7 @@ Content-Transfer-Encoding: 8bit Group name. -+ .. versionchanged:: 3.12 ++ .. versionchanged:: 3.11.4 + + Can be set to ``None`` for :meth:`~TarFile.extract` and + :meth:`~TarFile.extractall`, causing extraction to skip applying this @@ -403,7 +382,7 @@ Content-Transfer-Encoding: 8bit + uid=..., gid=..., uname=..., gname=..., + deep=True) + -+ .. versionadded:: 3.12 ++ .. versionadded:: 3.11.4 + + Return a *new* copy of the :class:`!TarInfo` object with the given attributes + changed. For example, to return a ``TarInfo`` with the group name set to @@ -417,7 +396,7 @@ Content-Transfer-Encoding: 8bit A :class:`TarInfo` object also provides some convenient query methods: -@@ -673,9 +846,258 @@ A :class:`TarInfo` object also provides +@@ -673,9 +846,259 @@ A :class:`TarInfo` object also provides some convenient query methods: Return :const:`True` if it is one of character device, block device or FIFO. @@ -426,7 +405,7 @@ Content-Transfer-Encoding: 8bit +Extraction filters +------------------ + -+.. versionadded:: 3.12 ++.. versionadded:: 3.11.4 + +The *tar* format is designed to capture all details of a UNIX-like filesystem, +which makes it very powerful. @@ -463,9 +442,10 @@ Content-Transfer-Encoding: 8bit + +* ``None`` (default): Use :attr:`TarFile.extraction_filter`. + -+ If that is also ``None`` (the default), raise a ``DeprecationWarning``, -+ and fall back to the ``'fully_trusted'`` filter, whose dangerous behavior -+ matches previous versions of Python. ++ If that is also ``None`` (the default), the ``'fully_trusted'`` ++ filter will be used (for compatibility with earlier versions of Python). ++ ++ In Python 3.12, the default will emit a ``DeprecationWarning``. + + In Python 3.14, the ``'data'`` filter will become the default instead. + It's possible to switch earlier; see :attr:`TarFile.extraction_filter`. @@ -602,7 +582,7 @@ Content-Transfer-Encoding: 8bit +Supporting older Python versions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + -+Extraction filters were added to Python 3.12, but may be backported to older ++Extraction filters were added to Python 3.12, and are backported to older +versions as security updates. +To check whether the feature is available, use e.g. +``hasattr(tarfile, 'data_filter')`` rather than checking the Python version. @@ -676,7 +656,7 @@ Content-Transfer-Encoding: 8bit Command-Line Interface ---------------------- -@@ -745,6 +1167,13 @@ Command-line options +@@ -745,6 +1168,15 @@ Command-line options Verbose output. @@ -686,22 +666,41 @@ Content-Transfer-Encoding: 8bit + See :ref:`tarfile-extraction-filter` for details. + Only string names are accepted (that is, ``fully_trusted``, ``tar``, + and ``data``). ++ ++ .. versionadded:: 3.11.4 + .. _tar-examples: Examples -@@ -754,7 +1183,7 @@ How to extract an entire tar archive to - - import tarfile - tar = tarfile.open("sample.tar.gz") -- tar.extractall() -+ tar.extractall(filter='data') - tar.close() - - How to extract a subset of a tar archive with :meth:`TarFile.extractall` using +diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst +index 47e38ae76ba..43da72aece9 100644 +--- a/Doc/whatsnew/3.10.rst ++++ b/Doc/whatsnew/3.10.rst +@@ -2332,3 +2332,19 @@ The deprecated :mod:`mailcap` module now refuses to inject unsafe text + text, it will warn and act as if a match was not found (or for test commands, + as if the test failed). + (Contributed by Petr Viktorin in :gh:`98966`.) ++ ++Notable Changes in 3.10.12 ++========================== ++ ++tarfile ++------- ++ ++* The extraction methods in :mod:`tarfile`, and :func:`shutil.unpack_archive`, ++ have a new a *filter* argument that allows limiting tar features than may be ++ surprising or dangerous, such as creating files outside the destination ++ directory. ++ See :ref:`tarfile-extraction-filter` for details. ++ In Python 3.12, use without the *filter* argument will show a ++ :exc:`DeprecationWarning`. ++ In Python 3.14, the default will switch to ``'data'``. ++ (Contributed by Petr Viktorin in :pep:`706`.) +diff --git a/Lib/shutil.py b/Lib/shutil.py +index b7bffa3ea41..482ce95a7b2 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py -@@ -1222,7 +1222,7 @@ def _unpack_zipfile(filename, extract_di +@@ -1222,7 +1222,7 @@ def _unpack_zipfile(filename, extract_dir): finally: zip.close() @@ -710,7 +709,7 @@ Content-Transfer-Encoding: 8bit """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir` """ import tarfile # late import for breaking circular dependency -@@ -1232,7 +1232,7 @@ def _unpack_tarfile(filename, extract_di +@@ -1232,7 +1232,7 @@ def _unpack_tarfile(filename, extract_dir): raise ReadError( "%s is not a compressed or uncompressed tar file" % filename) try: @@ -728,7 +727,7 @@ Content-Transfer-Encoding: 8bit """Unpack an archive. `filename` is the name of the archive. -@@ -1279,6 +1279,9 @@ def unpack_archive(filename, extract_dir +@@ -1279,6 +1279,9 @@ def unpack_archive(filename, extract_dir=None, format=None): was registered for that extension. In case none is found, a ValueError is raised. @@ -738,7 +737,7 @@ Content-Transfer-Encoding: 8bit """ sys.audit("shutil.unpack_archive", filename, extract_dir, format) -@@ -1288,6 +1291,10 @@ def unpack_archive(filename, extract_dir +@@ -1288,6 +1291,10 @@ def unpack_archive(filename, extract_dir=None, format=None): extract_dir = os.fspath(extract_dir) filename = os.fspath(filename) @@ -749,7 +748,7 @@ Content-Transfer-Encoding: 8bit if format is not None: try: format_info = _UNPACK_FORMATS[format] -@@ -1295,7 +1302,7 @@ def unpack_archive(filename, extract_dir +@@ -1295,7 +1302,7 @@ def unpack_archive(filename, extract_dir=None, format=None): raise ValueError("Unknown unpack format '{0}'".format(format)) from None func = format_info[1] @@ -758,7 +757,7 @@ Content-Transfer-Encoding: 8bit else: # we need to look at the registered unpackers supported extensions format = _find_unpack_format(filename) -@@ -1303,7 +1310,7 @@ def unpack_archive(filename, extract_dir +@@ -1303,7 +1310,7 @@ def unpack_archive(filename, extract_dir=None, format=None): raise ReadError("Unknown archive format '{0}'".format(filename)) func = _UNPACK_FORMATS[format][1] @@ -767,9 +766,11 @@ Content-Transfer-Encoding: 8bit func(filename, extract_dir, **kwargs) +diff --git a/Lib/tarfile.py b/Lib/tarfile.py +index dea150e8dbb..40599f27bce 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py -@@ -46,6 +46,7 @@ import time +@@ -46,6 +46,7 @@ import struct import copy import re @@ -777,20 +778,15 @@ Content-Transfer-Encoding: 8bit try: import pwd -@@ -69,7 +70,11 @@ except NameError: - __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError", - "CompressionError", "StreamError", "ExtractError", "HeaderError", +@@ -71,6 +72,7 @@ "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT", -- "DEFAULT_FORMAT", "open"] -+ "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter", -+ "tar_filter", "FilterError", "AbsoluteLinkError", -+ "OutsideDestinationError", "SpecialFileError", "AbsolutePathError", -+ "LinkOutsideDestinationError"] -+ + "DEFAULT_FORMAT", "open"] ++ #--------------------------------------------------------- # tar constants -@@ -158,6 +163,8 @@ else: + #--------------------------------------------------------- +@@ -158,6 +160,8 @@ def stn(s, length, encoding, errors): """Convert a string to a null-terminated bytes object. """ @@ -799,7 +795,7 @@ Content-Transfer-Encoding: 8bit s = s.encode(encoding, errors) return s[:length] + (length - len(s)) * NUL -@@ -709,9 +716,127 @@ class ExFileObject(io.BufferedReader): +@@ -709,9 +713,127 @@ def __init__(self, tarfile, tarinfo): super().__init__(fileobj) #class ExFileObject @@ -927,7 +923,7 @@ Content-Transfer-Encoding: 8bit class TarInfo(object): """Informational class which holds the details about an archive member given by a tar header block. -@@ -792,12 +917,44 @@ class TarInfo(object): +@@ -792,12 +914,44 @@ def linkpath(self, linkname): def __repr__(self): return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) @@ -973,7 +969,7 @@ Content-Transfer-Encoding: 8bit "uid": self.uid, "gid": self.gid, "size": self.size, -@@ -820,6 +977,9 @@ class TarInfo(object): +@@ -820,6 +974,9 @@ def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescap """Return a tar header as a string of 512 byte blocks. """ info = self.get_info() @@ -983,7 +979,7 @@ Content-Transfer-Encoding: 8bit if format == USTAR_FORMAT: return self.create_ustar_header(info, encoding, errors) -@@ -950,6 +1110,12 @@ class TarInfo(object): +@@ -950,6 +1107,12 @@ def _create_header(info, format, encoding, errors): devmajor = stn("", 8, encoding, errors) devminor = stn("", 8, encoding, errors) @@ -996,7 +992,7 @@ Content-Transfer-Encoding: 8bit parts = [ stn(info.get("name", ""), 100, encoding, errors), itn(info.get("mode", 0) & 0o7777, 8, format), -@@ -958,7 +1124,7 @@ class TarInfo(object): +@@ -958,7 +1121,7 @@ def _create_header(info, format, encoding, errors): itn(info.get("size", 0), 12, format), itn(info.get("mtime", 0), 12, format), b" ", # checksum field @@ -1005,7 +1001,7 @@ Content-Transfer-Encoding: 8bit stn(info.get("linkname", ""), 100, encoding, errors), info.get("magic", POSIX_MAGIC), stn(info.get("uname", ""), 32, encoding, errors), -@@ -1468,6 +1634,8 @@ class TarFile(object): +@@ -1468,6 +1631,8 @@ class TarFile(object): fileobject = ExFileObject # The file-object for extractfile(). @@ -1014,7 +1010,7 @@ Content-Transfer-Encoding: 8bit def __init__(self, name=None, mode="r", fileobj=None, format=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, errors="surrogateescape", pax_headers=None, debug=None, -@@ -1940,7 +2108,10 @@ class TarFile(object): +@@ -1940,7 +2105,10 @@ def list(self, verbose=True, *, members=None): members = self for tarinfo in members: if verbose: @@ -1026,7 +1022,7 @@ Content-Transfer-Encoding: 8bit _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid, tarinfo.gname or tarinfo.gid)) if tarinfo.ischr() or tarinfo.isblk(): -@@ -1948,8 +2119,11 @@ class TarFile(object): +@@ -1948,8 +2116,11 @@ def list(self, verbose=True, *, members=None): ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor))) else: _safe_print("%10d" % tarinfo.size) @@ -1040,7 +1036,7 @@ Content-Transfer-Encoding: 8bit _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else "")) -@@ -2036,32 +2210,63 @@ class TarFile(object): +@@ -2036,32 +2207,58 @@ def addfile(self, tarinfo, fileobj=None): self.members.append(tarinfo) @@ -1049,11 +1045,6 @@ Content-Transfer-Encoding: 8bit + if filter is None: + filter = self.extraction_filter + if filter is None: -+ warnings.warn( -+ 'Python 3.14 will, by default, filter extracted tar ' -+ + 'archives and reject files or modify their metadata. ' -+ + 'Use the filter argument to control this behavior.', -+ DeprecationWarning) + return fully_trusted_filter + if isinstance(filter, str): + raise TypeError( @@ -1114,7 +1105,7 @@ Content-Transfer-Encoding: 8bit # Set correct owner, mtime and filemode on directories. for tarinfo in directories: -@@ -2071,12 +2276,10 @@ class TarFile(object): +@@ -2071,12 +2268,10 @@ def extractall(self, path=".", members=None, *, numeric_owner=False): self.utime(tarinfo, dirpath) self.chmod(tarinfo, dirpath) except ExtractError as e: @@ -1130,7 +1121,7 @@ Content-Transfer-Encoding: 8bit """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. `member' may be a filename or a TarInfo object. You can -@@ -2084,35 +2287,70 @@ class TarFile(object): +@@ -2084,35 +2279,70 @@ def extract(self, member, path="", set_attrs=True, *, numeric_owner=False): mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` is True, only the numbers for user/group names are used and not the names. @@ -1212,7 +1203,7 @@ Content-Transfer-Encoding: 8bit def extractfile(self, member): """Extract a member from the archive as a file object. `member' may be -@@ -2199,9 +2437,13 @@ class TarFile(object): +@@ -2199,9 +2429,13 @@ def makedir(self, tarinfo, targetpath): """Make a directory called targetpath. """ try: @@ -1229,7 +1220,7 @@ Content-Transfer-Encoding: 8bit except FileExistsError: pass -@@ -2244,6 +2486,9 @@ class TarFile(object): +@@ -2244,6 +2478,9 @@ def makedev(self, tarinfo, targetpath): raise ExtractError("special devices not supported by system") mode = tarinfo.mode @@ -1239,7 +1230,7 @@ Content-Transfer-Encoding: 8bit if tarinfo.isblk(): mode |= stat.S_IFBLK else: -@@ -2265,7 +2510,6 @@ class TarFile(object): +@@ -2265,7 +2502,6 @@ def makelink(self, tarinfo, targetpath): os.unlink(targetpath) os.symlink(tarinfo.linkname, targetpath) else: @@ -1247,7 +1238,7 @@ Content-Transfer-Encoding: 8bit if os.path.exists(tarinfo._link_target): os.link(tarinfo._link_target, targetpath) else: -@@ -2290,15 +2534,19 @@ class TarFile(object): +@@ -2290,15 +2526,19 @@ def chown(self, tarinfo, targetpath, numeric_owner): u = tarinfo.uid if not numeric_owner: try: @@ -1269,7 +1260,7 @@ Content-Transfer-Encoding: 8bit try: if tarinfo.issym() and hasattr(os, "lchown"): os.lchown(targetpath, u, g) -@@ -2310,6 +2558,8 @@ class TarFile(object): +@@ -2310,6 +2550,8 @@ def chown(self, tarinfo, targetpath, numeric_owner): def chmod(self, tarinfo, targetpath): """Set file permissions of targetpath according to tarinfo. """ @@ -1278,7 +1269,7 @@ Content-Transfer-Encoding: 8bit try: os.chmod(targetpath, tarinfo.mode) except OSError as e: -@@ -2318,10 +2568,13 @@ class TarFile(object): +@@ -2318,10 +2560,13 @@ def chmod(self, tarinfo, targetpath): def utime(self, tarinfo, targetpath): """Set modification time of targetpath according to tarinfo. """ @@ -1293,7 +1284,7 @@ Content-Transfer-Encoding: 8bit except OSError as e: raise ExtractError("could not change modification time") from e -@@ -2397,13 +2650,26 @@ class TarFile(object): +@@ -2397,13 +2642,26 @@ def _getmember(self, name, tarinfo=None, normalize=False): members = self.getmembers() # Limit the member search list up to tarinfo. @@ -1321,7 +1312,7 @@ Content-Transfer-Encoding: 8bit if normalize: member_name = os.path.normpath(member.name) else: -@@ -2412,6 +2678,10 @@ class TarFile(object): +@@ -2412,6 +2670,10 @@ def _getmember(self, name, tarinfo=None, normalize=False): if name == member_name: return member @@ -1332,7 +1323,7 @@ Content-Transfer-Encoding: 8bit def _load(self): """Read through the entire archive file and look for readable members. -@@ -2504,6 +2774,7 @@ class TarFile(object): +@@ -2504,6 +2766,7 @@ def __exit__(self, type, value, traceback): #-------------------- # exported functions #-------------------- @@ -1340,7 +1331,7 @@ Content-Transfer-Encoding: 8bit def is_tarfile(name): """Return True if name points to a tar archive that we are able to handle, else return False. -@@ -2530,6 +2801,10 @@ def main(): +@@ -2530,6 +2793,10 @@ def main(): parser = argparse.ArgumentParser(description=description) parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Verbose output') @@ -1351,7 +1342,7 @@ Content-Transfer-Encoding: 8bit group = parser.add_mutually_exclusive_group(required=True) group.add_argument('-l', '--list', metavar='', help='Show listing of a tarfile') -@@ -2541,8 +2816,12 @@ def main(): +@@ -2541,8 +2808,12 @@ def main(): help='Create tarfile from sources') group.add_argument('-t', '--test', metavar='', help='Test if a tarfile is valid') @@ -1364,7 +1355,7 @@ Content-Transfer-Encoding: 8bit if args.test is not None: src = args.test if is_tarfile(src): -@@ -2573,7 +2852,7 @@ def main(): +@@ -2573,7 +2844,7 @@ def main(): if is_tarfile(src): with TarFile.open(src, 'r:*') as tf: @@ -1373,9 +1364,11 @@ Content-Transfer-Encoding: 8bit if args.verbose: if curdir == '.': msg = '{!r} file is extracted.'.format(src) +diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py +index 0935b60d4c2..72fb3afcbef 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py -@@ -32,6 +32,7 @@ except ImportError: +@@ -32,6 +32,7 @@ from test import support from test.support import os_helper from test.support.os_helper import TESTFN, FakePath @@ -1383,7 +1376,7 @@ Content-Transfer-Encoding: 8bit TESTFN2 = TESTFN + "2" TESTFN_SRC = TESTFN + "_SRC" -@@ -1610,12 +1611,14 @@ class TestArchives(BaseTest, unittest.Te +@@ -1610,12 +1611,14 @@ def test_register_archive_format(self): ### shutil.unpack_archive @@ -1403,7 +1396,7 @@ Content-Transfer-Encoding: 8bit root_dir, base_dir = self._create_files() expected = rlistdir(root_dir) expected.remove('outer') -@@ -1625,36 +1628,48 @@ class TestArchives(BaseTest, unittest.Te +@@ -1625,36 +1628,47 @@ def check_unpack_archive_with_converter(self, format, converter): # let's try to unpack it now tmpdir2 = self.mkdtemp() @@ -1428,8 +1421,7 @@ Content-Transfer-Encoding: 8bit + def check_unpack_tarball(self, format): + self.check_unpack_archive(format, filter='fully_trusted') + self.check_unpack_archive(format, filter='data') -+ with warnings_helper.check_warnings( -+ ('Python 3.14', DeprecationWarning)): ++ with warnings_helper.check_no_warnings(self): + self.check_unpack_archive(format) def test_unpack_archive_tar(self): @@ -1460,14 +1452,12 @@ Content-Transfer-Encoding: 8bit def test_unpack_registry(self): +diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py +index 89f5a561b4a..0d8d91b4d03 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py -@@ -2,9 +2,13 @@ import sys - import os - import io - from hashlib import sha256 --from contextlib import contextmanager -+from contextlib import contextmanager, ExitStack +@@ -5,6 +5,10 @@ + from contextlib import contextmanager from random import Random import pathlib +import shutil @@ -1477,7 +1467,7 @@ Content-Transfer-Encoding: 8bit import unittest import unittest.mock -@@ -13,6 +17,7 @@ import tarfile +@@ -13,6 +17,7 @@ from test import support from test.support import os_helper from test.support import script_helper @@ -1485,7 +1475,7 @@ Content-Transfer-Encoding: 8bit # Check for our compression modules. try: -@@ -108,7 +113,7 @@ class UstarReadTest(ReadTest, unittest.T +@@ -108,7 +113,7 @@ def test_fileobj_regular_file(self): "regular file extraction failed") def test_fileobj_readlines(self): @@ -1494,7 +1484,7 @@ Content-Transfer-Encoding: 8bit tarinfo = self.tar.getmember("ustar/regtype") with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1: lines1 = fobj1.readlines() -@@ -126,7 +131,7 @@ class UstarReadTest(ReadTest, unittest.T +@@ -126,7 +131,7 @@ def test_fileobj_readlines(self): "fileobj.readlines() failed") def test_fileobj_iter(self): @@ -1503,7 +1493,7 @@ Content-Transfer-Encoding: 8bit tarinfo = self.tar.getmember("ustar/regtype") with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1: lines1 = fobj1.readlines() -@@ -136,7 +141,8 @@ class UstarReadTest(ReadTest, unittest.T +@@ -136,7 +141,8 @@ def test_fileobj_iter(self): "fileobj.__iter__() failed") def test_fileobj_seek(self): @@ -1513,7 +1503,7 @@ Content-Transfer-Encoding: 8bit with open(os.path.join(TEMPDIR, "ustar/regtype"), "rb") as fobj: data = fobj.read() -@@ -455,7 +461,7 @@ class CommonReadTest(ReadTest): +@@ -455,7 +461,7 @@ def test_premature_end_of_archive(self): t = tar.next() with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): @@ -1522,7 +1512,7 @@ Content-Transfer-Encoding: 8bit with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): tar.extractfile(t).read() -@@ -610,16 +616,16 @@ class MiscReadTestBase(CommonReadTest): +@@ -610,16 +616,16 @@ def test_find_members(self): def test_extract_hardlink(self): # Test hardlink extraction (e.g. bug #857297). with tarfile.open(tarname, errorlevel=1, encoding="iso8859-1") as tar: @@ -1542,7 +1532,7 @@ Content-Transfer-Encoding: 8bit self.addCleanup(os_helper.unlink, os.path.join(TEMPDIR, "ustar/symtype")) with open(os.path.join(TEMPDIR, "ustar/symtype"), "rb") as f: data = f.read() -@@ -633,13 +639,14 @@ class MiscReadTestBase(CommonReadTest): +@@ -633,13 +639,14 @@ def test_extractall(self): os.mkdir(DIR) try: directories = [t for t in tar if t.isdir()] @@ -1559,7 +1549,7 @@ Content-Transfer-Encoding: 8bit def format_mtime(mtime): if isinstance(mtime, float): return "{} ({})".format(mtime, mtime.hex()) -@@ -662,7 +669,7 @@ class MiscReadTestBase(CommonReadTest): +@@ -662,7 +669,7 @@ def test_extract_directory(self): try: with tarfile.open(tarname, encoding="iso8859-1") as tar: tarinfo = tar.getmember(dirtype) @@ -1568,7 +1558,7 @@ Content-Transfer-Encoding: 8bit extracted = os.path.join(DIR, dirtype) self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime) if sys.platform != "win32": -@@ -675,7 +682,7 @@ class MiscReadTestBase(CommonReadTest): +@@ -675,7 +682,7 @@ def test_extractall_pathlike_name(self): with os_helper.temp_dir(DIR), \ tarfile.open(tarname, encoding="iso8859-1") as tar: directories = [t for t in tar if t.isdir()] @@ -1577,7 +1567,7 @@ Content-Transfer-Encoding: 8bit for tarinfo in directories: path = DIR / tarinfo.name self.assertEqual(os.path.getmtime(path), tarinfo.mtime) -@@ -686,7 +693,7 @@ class MiscReadTestBase(CommonReadTest): +@@ -686,7 +693,7 @@ def test_extract_pathlike_name(self): with os_helper.temp_dir(DIR), \ tarfile.open(tarname, encoding="iso8859-1") as tar: tarinfo = tar.getmember(dirtype) @@ -1586,7 +1576,7 @@ Content-Transfer-Encoding: 8bit extracted = DIR / dirtype self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime) -@@ -1042,7 +1049,7 @@ class GNUReadTest(LongnameTest, ReadTest +@@ -1042,7 +1049,7 @@ class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase): # an all platforms, and after that a test that will work only on # platforms/filesystems that prove to support sparse files. def _test_sparse_file(self, name): @@ -1595,7 +1585,7 @@ Content-Transfer-Encoding: 8bit filename = os.path.join(TEMPDIR, name) with open(filename, "rb") as fobj: data = fobj.read() -@@ -1409,7 +1416,8 @@ class WriteTest(WriteTestBase, unittest. +@@ -1409,7 +1416,8 @@ def test_extractall_symlinks(self): with tarfile.open(temparchive, errorlevel=2) as tar: # this should not raise OSError: [Errno 17] File exists try: @@ -1605,7 +1595,21 @@ Content-Transfer-Encoding: 8bit except OSError: self.fail("extractall failed with symlinked files") finally: -@@ -2441,6 +2449,15 @@ class CommandLineTest(unittest.TestCase) +@@ -2406,7 +2414,12 @@ def test__all__(self): + 'PAX_NUMBER_FIELDS', 'stn', 'nts', 'nti', 'itn', 'calc_chksums', + 'copyfileobj', 'filemode', 'EmptyHeaderError', + 'TruncatedHeaderError', 'EOFHeaderError', 'InvalidHeaderError', +- 'SubsequentHeaderError', 'ExFileObject', 'main'} ++ 'SubsequentHeaderError', 'ExFileObject', 'main', ++ "fully_trusted_filter", "data_filter", ++ "tar_filter", "FilterError", "AbsoluteLinkError", ++ "OutsideDestinationError", "SpecialFileError", "AbsolutePathError", ++ "LinkOutsideDestinationError", ++ } + support.check__all__(self, tarfile, not_exported=not_exported) + + def test_useful_error_message_when_modules_missing(self): +@@ -2441,6 +2454,15 @@ def make_simple_tarfile(self, tar_name): for tardata in files: tf.add(tardata, arcname=os.path.basename(tardata)) @@ -1621,7 +1625,7 @@ Content-Transfer-Encoding: 8bit def test_bad_use(self): rc, out, err = self.tarfilecmd_failure() self.assertEqual(out, b'') -@@ -2597,6 +2614,25 @@ class CommandLineTest(unittest.TestCase) +@@ -2597,6 +2619,25 @@ def test_extract_command_verbose(self): finally: os_helper.rmtree(tarextdir) @@ -1647,7 +1651,7 @@ Content-Transfer-Encoding: 8bit def test_extract_command_different_directory(self): self.make_simple_tarfile(tmpname) try: -@@ -2680,7 +2716,7 @@ class LinkEmulationTest(ReadTest, unitte +@@ -2680,7 +2721,7 @@ class LinkEmulationTest(ReadTest, unittest.TestCase): # symbolic or hard links tarfile tries to extract these types of members # as the regular files they point to. def _test_link_extraction(self, name): @@ -1656,7 +1660,7 @@ Content-Transfer-Encoding: 8bit with open(os.path.join(TEMPDIR, name), "rb") as f: data = f.read() self.assertEqual(sha256sum(data), sha256_regtype) -@@ -2812,8 +2848,10 @@ class NumericOwnerTest(unittest.TestCase +@@ -2812,8 +2853,10 @@ def test_extract_with_numeric_owner(self, mock_geteuid, mock_chmod, mock_chown): with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, filename_2): @@ -1669,7 +1673,7 @@ Content-Transfer-Encoding: 8bit # convert to filesystem paths f_filename_1 = os.path.join(TEMPDIR, filename_1) -@@ -2831,7 +2869,8 @@ class NumericOwnerTest(unittest.TestCase +@@ -2831,7 +2874,8 @@ def test_extractall_with_numeric_owner(self, mock_geteuid, mock_chmod, mock_chown): with self._setup_test(mock_geteuid) as (tarfl, filename_1, dirname_1, filename_2): @@ -1679,7 +1683,7 @@ Content-Transfer-Encoding: 8bit # convert to filesystem paths f_filename_1 = os.path.join(TEMPDIR, filename_1) -@@ -2856,7 +2895,8 @@ class NumericOwnerTest(unittest.TestCase +@@ -2856,7 +2900,8 @@ def test_extractall_with_numeric_owner(self, mock_geteuid, mock_chmod, def test_extract_without_numeric_owner(self, mock_geteuid, mock_chmod, mock_chown): with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _): @@ -1689,7 +1693,7 @@ Content-Transfer-Encoding: 8bit # convert to filesystem paths f_filename_1 = os.path.join(TEMPDIR, filename_1) -@@ -2870,6 +2910,873 @@ class NumericOwnerTest(unittest.TestCase +@@ -2870,6 +2915,888 @@ def test_keyword_only(self, mock_geteuid): tarfl.extract, filename_1, TEMPDIR, False, True) @@ -1747,11 +1751,7 @@ Content-Transfer-Encoding: 8bit + tar = tarfile.open(tarname, mode='r', encoding="iso8859-1") + cls.control_dir = pathlib.Path(TEMPDIR) / "extractall_ctrl" + tar.errorlevel = 0 -+ with ExitStack() as cm: -+ if cls.extraction_filter is None: -+ cm.enter_context(warnings.catch_warnings( -+ action="ignore", category=DeprecationWarning)) -+ tar.extractall(cls.control_dir, filter=cls.extraction_filter) ++ tar.extractall(cls.control_dir, filter=cls.extraction_filter) + tar.close() + cls.control_paths = set( + p.relative_to(cls.control_dir) @@ -2295,15 +2295,35 @@ Content-Transfer-Encoding: 8bit + arc.add('exec_group_other', mode='?rw-rwxrwx') + arc.add('read_group_only', mode='?---r-----') + arc.add('no_bits', mode='?---------') -+ arc.add('dir/', mode='?---rwsrwt', type=tarfile.DIRTYPE) ++ arc.add('dir/', mode='?---rwsrwt') ++ ++ # On some systems, setting the sticky bit is a no-op. ++ # Check if that's the case. ++ tmp_filename = os.path.join(TEMPDIR, "tmp.file") ++ with open(tmp_filename, 'w'): ++ pass ++ os.chmod(tmp_filename, os.stat(tmp_filename).st_mode | stat.S_ISVTX) ++ have_sticky_files = (os.stat(tmp_filename).st_mode & stat.S_ISVTX) ++ os.unlink(tmp_filename) ++ ++ os.mkdir(tmp_filename) ++ os.chmod(tmp_filename, os.stat(tmp_filename).st_mode | stat.S_ISVTX) ++ have_sticky_dirs = (os.stat(tmp_filename).st_mode & stat.S_ISVTX) ++ os.rmdir(tmp_filename) + + with self.check_context(arc.open(), 'fully_trusted'): -+ self.expect_file('all_bits', mode='?rwsrwsrwt') ++ if have_sticky_files: ++ self.expect_file('all_bits', mode='?rwsrwsrwt') ++ else: ++ self.expect_file('all_bits', mode='?rwsrwsrwx') + self.expect_file('perm_bits', mode='?rwxrwxrwx') + self.expect_file('exec_group_other', mode='?rw-rwxrwx') + self.expect_file('read_group_only', mode='?---r-----') + self.expect_file('no_bits', mode='?---------') -+ self.expect_file('dir', type=tarfile.DIRTYPE, mode='?---rwsrwt') ++ if have_sticky_dirs: ++ self.expect_file('dir/', mode='?---rwsrwt') ++ else: ++ self.expect_file('dir/', mode='?---rwsrwx') + + with self.check_context(arc.open(), 'tar'): + self.expect_file('all_bits', mode='?rwxr-xr-x') @@ -2311,7 +2331,7 @@ Content-Transfer-Encoding: 8bit + self.expect_file('exec_group_other', mode='?rw-r-xr-x') + self.expect_file('read_group_only', mode='?---r-----') + self.expect_file('no_bits', mode='?---------') -+ self.expect_file('dir/', type=tarfile.DIRTYPE, mode='?---r-xr-x') ++ self.expect_file('dir/', mode='?---r-xr-x') + + with self.check_context(arc.open(), 'data'): + normal_dir_mode = stat.filemode(stat.S_IMODE( @@ -2321,7 +2341,7 @@ Content-Transfer-Encoding: 8bit + self.expect_file('exec_group_other', mode='?rw-r--r--') + self.expect_file('read_group_only', mode='?rw-r-----') + self.expect_file('no_bits', mode='?rw-------') -+ self.expect_file('dir/', type=tarfile.DIRTYPE, mode=normal_dir_mode) ++ self.expect_file('dir/', mode=normal_dir_mode) + + def test_pipe(self): + # Test handling of a special file @@ -2385,12 +2405,11 @@ Content-Transfer-Encoding: 8bit + self.assertIs(filtered.name, tarinfo.name) + self.assertIs(filtered.type, tarinfo.type) + -+ def test_default_filter_warns(self): -+ """Ensure the default filter warns""" ++ def test_default_filter_warns_not(self): ++ """Ensure the default filter does not warn (like in 3.12)""" + with ArchiveMaker() as arc: + arc.add('foo') -+ with warnings_helper.check_warnings( -+ ('Python 3.14', DeprecationWarning)): ++ with warnings_helper.check_no_warnings(self): + with self.check_context(arc.open(), None): + self.expect_file('foo') + @@ -2563,6 +2582,9 @@ Content-Transfer-Encoding: 8bit def setUpModule(): os_helper.unlink(TEMPDIR) os.makedirs(TEMPDIR) +diff --git a/Misc/NEWS.d/next/Library/2023-03-23-15-24-38.gh-issue-102953.YR4KaK.rst b/Misc/NEWS.d/next/Library/2023-03-23-15-24-38.gh-issue-102953.YR4KaK.rst +new file mode 100644 +index 00000000000..48a105a4a17 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-23-15-24-38.gh-issue-102953.YR4KaK.rst @@ -0,0 +1,4 @@