From 96210b6dac16d71912949280e813f82cb168d94b Mon Sep 17 00:00:00 2001 From: Luke Imhoff Date: Mon, 18 Jan 2010 09:12:10 -0600 Subject: [PATCH] Allow --prefer-pkgs to parse repodata Any directory passed to --prefer-pkgs will be searched for a repodata directory. If the directory does not contain a repodata directory, then each ancestor directory is checked. This allows for the user error of specifying an individual architecture directory (e.g. x86_64) instead of the parent repository directory that contains the repodata: repository/ x86_64/ *.rpm repodata/ *.xml.gz The use case for this feature is it allows snapshots of the OBS repositories to be offloaded to an network-attached filesystem. repodata directories are used as the xml.gz files are faster to read than the 100s of rpms in a given snapshot. These snapshots are used to track older rpm sets that may be deployed for testing. --- osc/build.py | 68 ++++++++++++----- osc/util/debquery.py | 5 ++ osc/util/packagequery.py | 42 +++++++++- osc/util/repodata.py | 161 +++++++++++++++++++++++++++++++++++++++ osc/util/rpmquery.py | 4 + 5 files changed, 260 insertions(+), 20 deletions(-) create mode 100644 osc/util/repodata.py diff --git a/osc/build.py b/osc/build.py index 91d172ce..7721c3d5 100644 --- a/osc/build.py +++ b/osc/build.py @@ -223,38 +223,68 @@ def get_built_files(pacdir, pactype): stdout=subprocess.PIPE).stdout.read().strip() return s_built, b_built +def get_repo(path): + """Walks up path looking for any repodata directories. + + @param path path to a directory + @return str path to repository directory containing repodata directory + """ + oldDirectory = None + currentDirectory = os.path.abspath(path) + repositoryDirectory = None + + # while there are still parent directories + while currentDirectory != oldDirectory: + children = os.listdir(currentDirectory) + + if "repodata" in children: + repositoryDirectory = currentDirectory + break + + # ascend + oldDirectory = currentDirectory + currentDirectory = os.path.abspath(os.path.join(oldDirectory, + os.pardir)) + + return repositoryDirectory def get_prefer_pkgs(dirs, wanted_arch, type): import glob - from util import packagequery, cpio - # map debian arches to common obs arches - arch_map = {'i386': ['i586', 'i686'], 'amd64': ['x86_64']} + from util import repodata, packagequery, cpio paths = [] + suffix = '*.rpm' if type == 'dsc': suffix = '*.deb' + for dir in dirs: - paths += glob.glob(os.path.join(os.path.abspath(dir), suffix)) - prefer_pkgs = {} - pkgqs = {} + # check for repodata + repository = get_repo(dir) + if repository is None: + paths += glob.glob(os.path.join(os.path.abspath(dir), suffix)) + else: + repositories.append(repository) + + packageQueries = osc.util.packagequery.PackageQueries(wanted_arch) + + for repository in repositories: + repodataPackageQueries = osc.util.repodata.queries(repository) + + for packageQuery in repodataPackageQueries: + packageQueries.add(packageQuery) + for path in paths: if path.endswith('src.rpm'): continue if path.find('-debuginfo-') > 0: continue - pkgq = packagequery.PackageQuery.query(path) - arch = pkgq.arch() - name = pkgq.name() - # instead of thip assumption, we should probably rather take the - # requested arch for this package from buildinfo - # also, it will ignore i686 packages, how to handle those? - if arch in [wanted_arch, 'noarch', 'all'] or wanted_arch in arch_map.get(arch, []): - curpkgq = pkgqs.get(name) - if curpkgq is not None and curpkgq.vercmp(pkgq) > 0: - continue - prefer_pkgs[name] = path - pkgqs[name] = pkgq - depfile = create_deps(pkgqs.values()) + packageQuery = packagequery.PackageQuery.query(path) + packageQueries.add(packageQuery) + + prefer_pkgs = dict((name, packageQuery.path()) + for (name, packageQuery) in packageQueries.iteritems()) + + depfile = create_deps(packageQueries.values()) cpio = cpio.CpioWrite() cpio.add('deps', '\n'.join(depfile)) return prefer_pkgs, cpio diff --git a/osc/util/debquery.py b/osc/util/debquery.py index 16fe1706..8e2539bf 100644 --- a/osc/util/debquery.py +++ b/osc/util/debquery.py @@ -1,4 +1,5 @@ import ar +import os.path import re import tarfile import packagequery @@ -13,6 +14,7 @@ class DebQuery(packagequery.PackageQuery): def __init__(self, fh): self.__file = fh + self.__path = os.path.abspath(fh.name) self.filename_suffix = 'deb' self.fields = {} @@ -93,6 +95,9 @@ class DebQuery(packagequery.PackageQuery): def description(self): return self.fields['description'] + def path(self): + return self.__path + def provides(self): return self.fields['provides'] diff --git a/osc/util/packagequery.py b/osc/util/packagequery.py index a0a686dc..d2529caf 100644 --- a/osc/util/packagequery.py +++ b/osc/util/packagequery.py @@ -4,6 +4,43 @@ class PackageError(Exception): Exception.__init__(self) self.msg = msg +class PackageQueries(dict): + """Dict of package name keys and package query values. When assigning a + package query, to a name, the package is evaluated to see if it matches the + wanted architecture and if it has a greater version than the current value. + """ + + # map debian arches to common obs arches + architectureMap = {'i386': ['i586', 'i686'], 'amd64': ['x86_64']} + + def __init__(self, wantedArchitecture): + self.wantedArchitecture = wantedArchitecture + super(PackageQueries, self).__init__() + + def add(self, query): + """Adds package query to dict if it is of the correct architecture and + is newer (has a greater version) than the currently assigned package. + + @param a PackageQuery + """ + self.__setitem__(query.name(), query) + + def __setitem__(self, name, query): + if name != query.name(): + raise ValueError("key '%s' does not match " + "package query name '%s'" % (name, query.name())) + + architecture = query.arch() + + if (architecture in [self.wantedArchitecture, 'noarch', 'all'] or + self.wantedArchitecture in self.architectureMap.get(architecture, + [])): + currentQuery = self.get(name) + + # if current query does not exist or is older than this new query + if currentQuery is None or currentQuery.vercmp(query) <= 0: + super(PackageQueries, self).__setitem__(name, query) + class PackageQuery: """abstract base class for all package types""" def read(self, all_tags = False, *extra_tags): @@ -26,7 +63,10 @@ class PackageQuery: def description(self): raise NotImplementedError - + + def path(self): + raise NotImplementedError + def provides(self): raise NotImplementedError diff --git a/osc/util/repodata.py b/osc/util/repodata.py new file mode 100644 index 00000000..ed540623 --- /dev/null +++ b/osc/util/repodata.py @@ -0,0 +1,161 @@ +"""Module for reading repodata directory (created with createrepo) for package +information instead of scanning individual rpms.""" + +# standard modules +import gzip +import os.path + +# cElementTree can be standard or 3rd-party depending on python version +try: + from xml.etree import cElementTree as ET +except ImportError: + import cElementTree as ET + +# project modules +import osc.util.rpmquery + +def namespace(name): + return "{http://linux.duke.edu/metadata/%s}" % name + +OPERATOR_BY_FLAGS = { + "EQ" : "=", + "LE" : "<=", + "GE" : ">=" +} + +def primaryPath(directory): + """Returns path to the primary repository data file. + + @param directory repository directory that contains the repodata subdirectory + @return str path to primary repository data file + @raise IOError if repomd.xml contains no primary location + """ + metaDataPath = os.path.join(directory, "repodata", "repomd.xml") + elementTree = ET.parse(metaDataPath) + root = elementTree.getroot() + + for dataElement in root: + if dataElement.get("type") == "primary": + locationElement = dataElement.find(namespace("repo") + "location") + # even though the repomd.xml file is under repodata, the location a + # attribute is relative to parent directory (directory). + primaryPath = os.path.join(directory, locationElement.get("href")) + break + else: + raise IOError("'%s' contains no primary location" % metaDataPath) + + return primaryPath + +def queries(directory): + """Returns a list of RepoDataQueries constructed from the repodata under + the directory. + + @param directory path to a repository directory (parent directory of + repodata directory) + @return list of RepoDataQuery instances + @raise IOError if repomd.xml contains no primary location + """ + path = primaryPath(directory) + + gunzippedPrimary = gzip.GzipFile(path) + elementTree = ET.parse(gunzippedPrimary) + root = elementTree.getroot() + + packageQueries = [] + for packageElement in root: + packageQuery = RepoDataQuery(directory, packageElement) + packageQueries.append(packageQuery) + + return packageQueries + +class RepoDataQuery(object): + """PackageQuery that reads in data from the repodata directory files.""" + + def __init__(self, directory, element): + """Creates a RepoDataQuery from the a package Element under a metadata + Element in a primary.xml file. + + @param directory repository directory path. Used to convert relative + paths to full paths. + @param element package Element + """ + self.__directory = os.path.abspath(directory) + self.__element = element + + def __formatElement(self): + return self.__element.find(namespace("common") + "format") + + def __parseEntry(self, element): + entry = element.get("name") + flags = element.get("flags") + + if flags is not None: + version = element.get("ver") + operator = OPERATOR_BY_FLAGS[flags] + entry += " %s %s" % (operator, version) + + release = element.get("rel") + if release is not None: + entry += "-%s" % (release,) + + return entry + + def __parseEntryCollection(self, collection): + formatElement = self.__formatElement() + collectionElement = formatElement.find(namespace("rpm") + collection) + + entries = [] + if collectionElement is not None: + for entryElement in collectionElement.findall(namespace("rpm") + + "entry"): + entry = self.__parseEntry(entryElement) + entries.append(entry) + + return entries + + def __versionElement(self): + return self.__element.find(namespace("common") + "version") + + def arch(self): + return self.__element.find(namespace("common") + "arch").text + + def description(self): + return self.__element.find(namespace("common") + "description").text + + def distribution(self): + return None + + def epoch(self): + return self.__versionElement().get("epoch") + + def name(self): + return self.__element.find(namespace("common") + "name").text + + def path(self): + locationElement = self.__element.find(namespace("common") + "location") + relativePath = locationElement.get("href") + absolutePath = os.path.join(self.__directory, relativePath) + + return absolutePath + + def provides(self): + return self.__parseEntryCollection("provides") + + def release(self): + return self.__versionElement().get("rel") + + def requires(self): + return self.__parseEntryCollection("requires") + + def vercmp(self, other): + res = osc.util.rpmquery.RpmQuery.rpmvercmp(str(self.epoch()), str(other.epoch())) + if res != 0: + return res + res = osc.util.rpmquery.RpmQuery.rpmvercmp(self.version(), other.version()) + if res != 0: + return res + res = osc.util.rpmquery.RpmQuery.rpmvercmp(self.release(), other.release()) + return res + + def version(self): + return self.__versionElement().get("ver") diff --git a/osc/util/rpmquery.py b/osc/util/rpmquery.py index 6c5638c1..970943d8 100644 --- a/osc/util/rpmquery.py +++ b/osc/util/rpmquery.py @@ -62,6 +62,7 @@ class RpmQuery(packagequery.PackageQuery): def __init__(self, fh): self.__file = fh + self.__file = os.path.abspath(fh.name) self.filename_suffix = 'rpm' self.header = None @@ -207,6 +208,9 @@ class RpmQuery(packagequery.PackageQuery): return None return entry.data + def path(self): + return self.__path + def provides(self): return self.__reqprov(1047, 1112, 1113)