1
0
mirror of https://github.com/openSUSE/osc.git synced 2024-12-28 10:46:15 +01:00

Port the ar module to python3

Since an ar archive can contain arbitary filenames (that is a
filename can be an invalid utf-8 encoding (for instance,
"foo\xff\xffbar")), the ar module provides a bytes only API. A
user can decode filenames as she wishes.
Note: if a "fn" parameter is passed to Ar.__init__ it should be a
bytes (a str is also ok, but then be aware that an ArError's file
attribute might be a str or a bytes).
This commit is contained in:
Marcus Huewe 2019-01-15 16:56:46 +01:00
parent 68cf974c78
commit 6fdce86fc9

View File

@ -20,12 +20,8 @@ import re
import sys import sys
import stat import stat
#XXX: python 2.7 contains io.StringIO, which needs unicode instead of str from io import BytesIO
#therefor try to import old stuff before new one here
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
# workaround for python24 # workaround for python24
if not hasattr(os, 'SEEK_SET'): if not hasattr(os, 'SEEK_SET'):
@ -60,10 +56,10 @@ class ArHdr:
def __str__(self): def __str__(self):
return '%16s %d' % (self.file, self.size) return '%16s %d' % (self.file, self.size)
class ArFile(StringIO): class ArFile(BytesIO):
"""Represents a file which resides in the archive""" """Represents a file which resides in the archive"""
def __init__(self, fn, uid, gid, mode, buf): def __init__(self, fn, uid, gid, mode, buf):
StringIO.__init__(self, buf) BytesIO.__init__(self, buf)
self.name = fn self.name = fn
self.uid = uid self.uid = uid
self.gid = gid self.gid = gid
@ -100,7 +96,8 @@ class Ar:
Readonly access. Readonly access.
""" """
hdr_len = 60 hdr_len = 60
hdr_pat = re.compile('^(.{16})(.{12})(.{6})(.{6})(.{8})(.{10})(.{2})', re.DOTALL) hdr_pat = re.compile(b'^(.{16})(.{12})(.{6})(.{6})(.{8})(.{10})(.{2})',
re.DOTALL)
def __init__(self, fn = None, fh = None): def __init__(self, fn = None, fh = None):
if fn == None and fh == None: if fn == None and fh == None:
@ -126,7 +123,7 @@ class Ar:
def _appendHdr(self, hdr): def _appendHdr(self, hdr):
# GNU uses an internal '//' file to store very long filenames # GNU uses an internal '//' file to store very long filenames
if hdr.file.startswith('//'): if hdr.file.startswith(b'//'):
self.ext_fnhdr = hdr self.ext_fnhdr = hdr
else: else:
self.hdrs.append(hdr) self.hdrs.append(hdr)
@ -140,11 +137,11 @@ class Ar:
Another special file is the '/' which contains the symbol lookup table. Another special file is the '/' which contains the symbol lookup table.
""" """
for h in self.hdrs: for h in self.hdrs:
if h.file == '/': if h.file == b'/':
continue continue
# remove slashes which are appended by ar # remove slashes which are appended by ar
h.file = h.file.rstrip('/') h.file = h.file.rstrip(b'/')
if not h.file.startswith('/'): if not h.file.startswith(b'/'):
continue continue
# handle long filename # handle long filename
off = int(h.file[1:len(h.file)]) off = int(h.file[1:len(h.file)])
@ -153,11 +150,11 @@ class Ar:
# XXX: is it safe to read all the data in one chunk? I assume the '//' data section # XXX: is it safe to read all the data in one chunk? I assume the '//' data section
# won't be too large # won't be too large
data = self.__file.read(self.ext_fnhdr.size) data = self.__file.read(self.ext_fnhdr.size)
end = data.find('/') end = data.find(b'/')
if end != -1: if end != -1:
h.file = data[0:end] h.file = data[0:end]
else: else:
raise ArError('//', 'invalid data section - trailing slash (off: %d)' % start) raise ArError(b'//', 'invalid data section - trailing slash (off: %d)' % start)
def _get_file(self, hdr): def _get_file(self, hdr):
self.__file.seek(hdr.dataoff, os.SEEK_SET) self.__file.seek(hdr.dataoff, os.SEEK_SET)
@ -172,7 +169,7 @@ class Ar:
self.__file.seek(0, os.SEEK_SET) self.__file.seek(0, os.SEEK_SET)
self._init_datastructs() self._init_datastructs()
data = self.__file.read(7) data = self.__file.read(7)
if data != '!<arch>': if data != b'!<arch>':
raise ArError(self.filename, 'no ar archive') raise ArError(self.filename, 'no ar archive')
pos = 8 pos = 8
while (len(data) != 0): while (len(data) != 0):
@ -200,7 +197,7 @@ class Ar:
def __iter__(self): def __iter__(self):
for h in self.hdrs: for h in self.hdrs:
if h.file == '/': if h.file == b'/':
continue continue
yield self._get_file(h) yield self._get_file(h)