- added ar module: this can be used to read ar archives. This will be used later to extract files from a debian package which uses this format. Currently we only support the GNU format (note: maybe something is still missing but it's sufficient for our needs)

2025-08-05 15:13:39 +02:00 · 2009-03-04 22:15:11 +00:00
parent affef176a6
commit 3b08741a80
2 changed files with 155 additions and 0 deletions
--- a/osc/util/init.py
+++ b/osc/util/init.py
@@ -0,0 +1 @@
+__all__ = ['ar']
--- a/osc/util/ar.py
+++ b/osc/util/ar.py
@@ -0,0 +1,154 @@
+import os
+import re
+import sys
+import StringIO
+import stat
+
+class ArError(Exception):
+    """Base class for all ar related errors"""
+    def __init__(self, fn, msg):
+        Exception.__init__(self)
+        self.file = fn
+        self.msg = msg
+
+    def __str__(self):
+        return 'ar error: %s' % self.msg
+
+class ArHdr():
+    """Represents an ar header entry"""
+    def __init__(self, fn, date, uid, gid, mode, size, fmag, off):
+        self.file = fn.strip()
+        self.date = date.strip()
+        self.uid = uid.strip()
+        self.gid = gid.strip()
+        self.mode = stat.S_IMODE(int(mode, 8))
+        self.size = int(size)
+        self.fmag = fmag
+        # data section starts at off and ends at off + size
+        self.dataoff = int(off)
+
+    def __str__(self):
+        return '%16s %d' % (self.file, self.size)
+
+class ArFile(StringIO.StringIO):
+    """Represents a file which resides in the archive"""
+    def __init__(self, fn, uid, gid, mode, buf):
+        StringIO.StringIO.__init__(self, buf)
+        self.name = fn
+        self.uid = uid
+        self.gid = gid
+        self.mode = mode
+
+    def saveTo(self, dir = None):
+        if not dir:
+            dir = os.getcwd()
+        fn = os.path.join(dir, self.name)
+        f = open(fn, 'wb')
+        f.write(self.getvalue())
+        f.close()
+        os.chmod(fn, self.mode)
+        uid = self.uid
+        if uid != os.geteuid() or os.geteuid() != 0:
+            uid = -1
+        gid = self.gid
+        if not gid in os.getgroups() or os.getegid() != 0:
+            gid = -1
+        os.chown(fn, uid, gid)
+
+class Ar():
+    """
+    Represents an ar archive (only GNU format is supported).
+    Readonly access.
+    """
+    hdr_len = 60
+    hdr_pat = re.compile('^(.{16})(.{12})(.{6})(.{6})(.{8})(.{10})(.{2})', re.DOTALL)
+
+    def __init__(self, fn):
+        self.filename = fn
+        self.hdrs = []
+        self.ext_fnhdr = None
+        # file object: will be closed in __del__()
+        self.__file = None
+
+    def __del__(self):
+        if self.__file:
+            self.__file.close()
+
+    def _appendHdr(self, hdr):
+        # GNU uses an internal '//' file to store very long filenames
+        if hdr.file.startswith('//'):
+            self.ext_fnhdr = hdr
+        else:
+            self.hdrs.append(hdr)
+
+    def _fixupFilenames(self):
+        """
+        support the GNU approach for very long filenames:
+        every filename which exceeds 16 bytes is stored in the data section of a special file ('//')
+        and the filename in the header of this long file specifies the offset in the special file's
+        data section. The end of such a filename is indicated with a trailing '/'.
+        Another special file is the '/' which contains the symbol lookup table.
+        """
+        for h in self.hdrs:
+            if h.file == '/':
+                continue
+            # remove slashes which are appended by ar
+            h.file = h.file.rstrip('/')
+            if not h.file.startswith('/'):
+                continue
+            # handle long filename
+            off = int(h.file[1:len(h.file)])
+            start = self.ext_fnhdr.dataoff + off
+            self.__file.seek(start, os.SEEK_SET)
+            # XXX: is it safe to read all the data in one chunk? I assume the '//' data section
+            #      won't be too large
+            data = self.__file.read(self.ext_fnhdr.size)
+            end = data.find('/')
+            if end != -1:
+                h.file = data[0:end]
+            else:
+                raise ArError('//', 'invalid data section - trailing slash (off: %d)' % start)
+                
+
+    def read(self):
+        """reads in the archive. It tries to use mmap due to performance reasons (in case of large files)"""
+        if not self.__file:
+            import mmap
+            self.__file = open(self.filename, 'rb')
+            try:
+                self.__file = mmap.mmap(self.__file.fileno(), 0, prot=mmap.PROT_READ)
+            except EnvironmentError, e:
+                if e.errno == 19:
+                    print >>sys.stderr, 'cannot use mmap to read the file, falling back to the default io'
+                else:
+                    raise e
+        else:
+            self.__file.seek(0, os.SEEK_SET)
+        data = self.__file.read(7)
+        if data != '!<arch>':
+            raise ArError(self.filename, 'no ar archive')
+        pos = 8
+        while (len(data) != 0):
+            self.__file.seek(pos, os.SEEK_SET)
+            data = self.__file.read(self.hdr_len)
+            if not data:
+                break
+            pos += self.hdr_len
+            m = self.hdr_pat.search(data)
+            if not m:
+                raise ArError(self.filename, 'unexpected hdr entry')
+            args = m.groups() + (pos, )
+            hdr = ArHdr(*args)
+            self._appendHdr(hdr)
+            # data blocks are 2 bytes aligned - if they end on an odd
+            # offset ARFMAG[0] will be used for padding (according to the current binutils code)
+            pos += hdr.size + (hdr.size & 1)
+        self._fixupFilenames()
+
+    def get_file(self, fn):
+        for h in self.hdrs:
+            if h.file == fn:
+                self.__file.seek(h.dataoff, os.SEEK_SET)
+                return ArFile(h.file, h.uid, h.gid, h.mode,
+                              self.__file.read(h.size))
+        return None