import fcntl import itertools import logging import os import osc.conf import re import struct import sys import tempfile from lxml import etree as ET from osc.core import makeurl, http_GET from osc.util.cpio import CpioHdr from urllib.parse import quote_plus logger = logging.getLogger('RepoMirror') class RepoMirror: cpio_struct = struct.Struct('6s8s8s8s8s8s8s8s8s8s8s8s8s8s') cpio_name_re = re.compile('^([^/]+)-([0-9a-f]{32})$') def __init__(self, apiurl: str, nameignore: str = '-debug(info|source|info-32bit).rpm$'): """ Class to mirror RPM headers of all binaries in a repo on OBS (full tree). Debug packages are ignored by default, see the nameignore parameter. """ self.apiurl = apiurl self.nameignorere = re.compile(nameignore) def extract_cpio_stream(self, destdir: str, stream): while True: hdrtuples = self.cpio_struct.unpack( # Read and parse the CPIO header if hdrtuples[0] != b'070701': raise NotImplementedError(f'CPIO format {hdrtuples[0]} not implemented') # The new-ascii format has padding for 4 byte alignment def align(): - (stream.tell() % 4)) % 4) hdr = CpioHdr(*hdrtuples) hdr.filename = - 1).decode('ascii') # Skip terminator align() binarymatch = self.cpio_name_re.match(hdr.filename) if hdr.filename == '.errors': content = raise RuntimeError('Download has errors: ' + content.decode('ascii')) elif binarymatch: name = md5 = destpath = os.path.join(destdir, f'{md5}-{name}.rpm') with tempfile.NamedTemporaryFile(mode='wb', dir=destdir) as tmpfile: # Probably not big enough to need chunking tmpfile.write(, destpath) # Would be nice to use O_TMPFILE + link here, but python passes # O_EXCL which breaks that. #'/proc/self/fd/{tmpfile.fileno()}', destpath) align() elif hdr.filename == 'TRAILER!!!': if raise RuntimeError('Expected end of CPIO') break else: raise NotImplementedError(f'Unhandled file {hdr.filename} in archive') def _mirror(self, destdir: str, prj: str, repo: str, arch: str) -> None: "Using the _repositories endpoint, download all RPM headers into destdir."'Mirroring {prj}/{repo}/{arch}') pkglistxml = http_GET(makeurl(self.apiurl, ['build', prj, repo, arch, '_repository'], query={'view': 'binaryversions', 'nometa': 1})) root = ET.parse(pkglistxml).getroot() remotebins: dict[str, str] = {} for binary in root.findall('binary'): name = binary.get('name') if name.endswith('.rpm') and not hdrmd5 = binary.get('hdrmd5') remotebins[f'{hdrmd5}-{name}'] = name[:-4] to_delete: list[str] = [] for filename in os.listdir(destdir): if not filename.endswith('.rpm'): continue if filename in remotebins: del remotebins[filename] # Already downloaded else: to_delete.append(os.path.join(destdir, filename)) if to_delete:'Deleting {len(to_delete)} old packages') for path in to_delete: os.unlink(path) if remotebins:'Downloading {len(remotebins)} new packages') binaries = remotebins.values() # Download in batches of 50 for chunk in range(0, len(binaries), 50): query = 'view=cpioheaders' for binary in itertools.islice(binaries, chunk, chunk + 50): query += '&binary=' + quote_plus(binary) req = http_GET(makeurl(self.apiurl, ['build', prj, repo, arch, '_repository'], query=query)) self.extract_cpio_stream(destdir, req) def mirror(self, destdir: str, prj: str, repo: str, arch: str) -> None: "Creates destdir and locks destdir/.lock before mirroring." os.makedirs(destdir, exist_ok=True) with open(os.path.join(destdir, '.lock'), 'w') as lockfile: try: fcntl.flock(lockfile, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: + 'is locked, waiting... ') fcntl.flock(lockfile, fcntl.LOCK_EX)'Lock acquired!') return self._mirror(destdir, prj, repo, arch) if __name__ == '__main__': if len(sys.argv) != 6: print("Usage: apiurl destdir prj repo arch") else: osc.conf.get_config() rm = RepoMirror(sys.argv[1]) rm.mirror(sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])