Forbid extracting files with absolute path from 'ar' archives (boo#1122683)

Also fix and modernize the code, add tests.
2025-09-07 13:48:43 +02:00 · 2024-05-29 16:12:46 +02:00
parent a3ed68508b
commit d61b781976
4 changed files with 176 additions and 25 deletions
--- a/osc/util/ar.py
+++ b/osc/util/ar.py
@@ -19,36 +19,32 @@ import re
 import stat
 import sys
 from io import BytesIO
-
-
-# workaround for python24
-if not hasattr(os, 'SEEK_SET'):
-    os.SEEK_SET = 0
+from typing import Union


 class ArError(Exception):
    """Base class for all ar related errors"""

-    def __init__(self, fn, msg):
+    def __init__(self, fn: bytes, msg: str):
        super().__init__()
        self.file = fn
        self.msg = msg

    def __str__(self):
-        return 'ar error: %s' % self.msg
+        return f"{self.msg}: {self.file.decode('utf-8')}"


 class ArHdr:
    """Represents an ar header entry"""

-    def __init__(self, fn, date, uid, gid, mode, size, fmag, off):
+    def __init__(self, fn: bytes, date: bytes, uid: bytes, gid: bytes, mode: bytes, size: bytes, fmag: bytes, off: bytes):
        self.file = fn.strip()
        self.date = date.strip()
        self.uid = uid.strip()
        self.gid = gid.strip()
        if not mode.strip():
            # provide a dummy mode for the ext_fn hdr
-            mode = '0'
+            mode = b"0"
        self.mode = stat.S_IMODE(int(mode, 8))
        self.size = int(size)
        self.fmag = fmag
@@ -75,9 +71,17 @@ class ArFile(BytesIO):
        working dir is used. Additionally it tries to set the owner/group
        and permissions.
        """
+        if self.name.startswith(b"/"):
+            raise ArError(self.name, "Extracting files with absolute paths is not supported for security reasons")
+
        if not dir:
            dir = os.getcwdb()
-        fn = os.path.join(dir, self.name)
+        fn = os.path.join(dir, self.name.decode("utf-8"))
+
+        dir_path, _ = os.path.split(fn)
+        if dir_path:
+            os.makedirs(dir_path, exist_ok=True)
+
        with open(fn, 'wb') as f:
            f.write(self.getvalue())
        os.chmod(fn, self.mode)
@@ -88,6 +92,7 @@ class ArFile(BytesIO):
        if gid not in os.getgroups() or os.getegid() != 0:
            gid = -1
        os.chown(fn, uid, gid)
+        return fn

    def __str__(self):
        return '%s %s %s %s' % (self.name, self.uid,
@@ -140,26 +145,30 @@ class Ar:
        data section. The end of such a filename is indicated with a trailing '/'.
        Another special file is the '/' which contains the symbol lookup table.
        """
+
+        # read extended header with long file names and then only seek into the right offsets
+        self.__file.seek(self.ext_fnhdr.dataoff, os.SEEK_SET)
+        ext_fnhdr_data = self.__file.read(self.ext_fnhdr.size)
+
        for h in self.hdrs:
            if h.file == b'/':
                continue
-            # remove slashes which are appended by ar
-            h.file = h.file.rstrip(b'/')
-            if not h.file.startswith(b'/'):
+
+            if h.file.endswith(b"/"):
+                # regular file name
+                h.file = h.file[:-1]
                continue
-            # handle long filename
-            off = int(h.file[1:len(h.file)])
-            start = self.ext_fnhdr.dataoff + off
-            self.__file.seek(start, os.SEEK_SET)
-            # XXX: is it safe to read all the data in one chunk? I assume the '//' data section
-            #      won't be too large
-            data = self.__file.read(self.ext_fnhdr.size)
-            end = data.find(b'/')
-            if end != -1:
-                h.file = data[0:end]
-            else:
+
+            # long file name
+            assert h.file[0:1] == b"/"
+            start = int(h.file[1:])
+            end = ext_fnhdr_data.find(b'/', start)
+
+            if end == -1:
                raise ArError(b'//', 'invalid data section - trailing slash (off: %d)' % start)

+            h.file = ext_fnhdr_data[start:end]
+
    def _get_file(self, hdr):
        self.__file.seek(hdr.dataoff, os.SEEK_SET)
        return ArFile(hdr.file, hdr.uid, hdr.gid, hdr.mode,
@@ -193,7 +202,10 @@ class Ar:
            pos += hdr.size + (hdr.size & 1)
        self._fixupFilenames()

-    def get_file(self, fn):
+    def get_file(self, fn: Union[str, bytes]):
+        # accept str for better user experience
+        if isinstance(fn, str):
+            fn = fn.encode("utf-8")
        for h in self.hdrs:
            if h.file == fn:
                return self._get_file(h)