1
0
mirror of https://github.com/openSUSE/osc.git synced 2024-12-27 10:16:14 +01:00

Allow --prefer-pkgs to parse repodata

Any directory passed to --prefer-pkgs will be searched for a repodata
directory.  If the directory does not contain a repodata directory, then
each ancestor directory is checked.  This allows for the user error of
specifying an individual architecture directory (e.g. x86_64) instead of the
parent repository directory that contains the repodata:

repository/
  x86_64/
    *.rpm
  repodata/
    *.xml.gz

The use case for this feature is it allows snapshots of the OBS repositories
to be offloaded to an network-attached filesystem.  repodata directories are
used as the xml.gz files are faster to read than the 100s of rpms in a given
snapshot.  These snapshots are used to track older rpm sets that may be
deployed for testing.
This commit is contained in:
Luke Imhoff 2010-01-18 09:12:10 -06:00
parent 39ac814169
commit 96210b6dac
5 changed files with 260 additions and 20 deletions

View File

@ -223,38 +223,68 @@ def get_built_files(pacdir, pactype):
stdout=subprocess.PIPE).stdout.read().strip()
return s_built, b_built
def get_repo(path):
"""Walks up path looking for any repodata directories.
@param path path to a directory
@return str path to repository directory containing repodata directory
"""
oldDirectory = None
currentDirectory = os.path.abspath(path)
repositoryDirectory = None
# while there are still parent directories
while currentDirectory != oldDirectory:
children = os.listdir(currentDirectory)
if "repodata" in children:
repositoryDirectory = currentDirectory
break
# ascend
oldDirectory = currentDirectory
currentDirectory = os.path.abspath(os.path.join(oldDirectory,
os.pardir))
return repositoryDirectory
def get_prefer_pkgs(dirs, wanted_arch, type):
import glob
from util import packagequery, cpio
# map debian arches to common obs arches
arch_map = {'i386': ['i586', 'i686'], 'amd64': ['x86_64']}
from util import repodata, packagequery, cpio
paths = []
suffix = '*.rpm'
if type == 'dsc':
suffix = '*.deb'
for dir in dirs:
paths += glob.glob(os.path.join(os.path.abspath(dir), suffix))
prefer_pkgs = {}
pkgqs = {}
# check for repodata
repository = get_repo(dir)
if repository is None:
paths += glob.glob(os.path.join(os.path.abspath(dir), suffix))
else:
repositories.append(repository)
packageQueries = osc.util.packagequery.PackageQueries(wanted_arch)
for repository in repositories:
repodataPackageQueries = osc.util.repodata.queries(repository)
for packageQuery in repodataPackageQueries:
packageQueries.add(packageQuery)
for path in paths:
if path.endswith('src.rpm'):
continue
if path.find('-debuginfo-') > 0:
continue
pkgq = packagequery.PackageQuery.query(path)
arch = pkgq.arch()
name = pkgq.name()
# instead of thip assumption, we should probably rather take the
# requested arch for this package from buildinfo
# also, it will ignore i686 packages, how to handle those?
if arch in [wanted_arch, 'noarch', 'all'] or wanted_arch in arch_map.get(arch, []):
curpkgq = pkgqs.get(name)
if curpkgq is not None and curpkgq.vercmp(pkgq) > 0:
continue
prefer_pkgs[name] = path
pkgqs[name] = pkgq
depfile = create_deps(pkgqs.values())
packageQuery = packagequery.PackageQuery.query(path)
packageQueries.add(packageQuery)
prefer_pkgs = dict((name, packageQuery.path())
for (name, packageQuery) in packageQueries.iteritems())
depfile = create_deps(packageQueries.values())
cpio = cpio.CpioWrite()
cpio.add('deps', '\n'.join(depfile))
return prefer_pkgs, cpio

View File

@ -1,4 +1,5 @@
import ar
import os.path
import re
import tarfile
import packagequery
@ -13,6 +14,7 @@ class DebQuery(packagequery.PackageQuery):
def __init__(self, fh):
self.__file = fh
self.__path = os.path.abspath(fh.name)
self.filename_suffix = 'deb'
self.fields = {}
@ -93,6 +95,9 @@ class DebQuery(packagequery.PackageQuery):
def description(self):
return self.fields['description']
def path(self):
return self.__path
def provides(self):
return self.fields['provides']

View File

@ -4,6 +4,43 @@ class PackageError(Exception):
Exception.__init__(self)
self.msg = msg
class PackageQueries(dict):
"""Dict of package name keys and package query values. When assigning a
package query, to a name, the package is evaluated to see if it matches the
wanted architecture and if it has a greater version than the current value.
"""
# map debian arches to common obs arches
architectureMap = {'i386': ['i586', 'i686'], 'amd64': ['x86_64']}
def __init__(self, wantedArchitecture):
self.wantedArchitecture = wantedArchitecture
super(PackageQueries, self).__init__()
def add(self, query):
"""Adds package query to dict if it is of the correct architecture and
is newer (has a greater version) than the currently assigned package.
@param a PackageQuery
"""
self.__setitem__(query.name(), query)
def __setitem__(self, name, query):
if name != query.name():
raise ValueError("key '%s' does not match "
"package query name '%s'" % (name, query.name()))
architecture = query.arch()
if (architecture in [self.wantedArchitecture, 'noarch', 'all'] or
self.wantedArchitecture in self.architectureMap.get(architecture,
[])):
currentQuery = self.get(name)
# if current query does not exist or is older than this new query
if currentQuery is None or currentQuery.vercmp(query) <= 0:
super(PackageQueries, self).__setitem__(name, query)
class PackageQuery:
"""abstract base class for all package types"""
def read(self, all_tags = False, *extra_tags):
@ -26,7 +63,10 @@ class PackageQuery:
def description(self):
raise NotImplementedError
def path(self):
raise NotImplementedError
def provides(self):
raise NotImplementedError

161
osc/util/repodata.py Normal file
View File

@ -0,0 +1,161 @@
"""Module for reading repodata directory (created with createrepo) for package
information instead of scanning individual rpms."""
# standard modules
import gzip
import os.path
# cElementTree can be standard or 3rd-party depending on python version
try:
from xml.etree import cElementTree as ET
except ImportError:
import cElementTree as ET
# project modules
import osc.util.rpmquery
def namespace(name):
return "{http://linux.duke.edu/metadata/%s}" % name
OPERATOR_BY_FLAGS = {
"EQ" : "=",
"LE" : "<=",
"GE" : ">="
}
def primaryPath(directory):
"""Returns path to the primary repository data file.
@param directory repository directory that contains the repodata subdirectory
@return str path to primary repository data file
@raise IOError if repomd.xml contains no primary location
"""
metaDataPath = os.path.join(directory, "repodata", "repomd.xml")
elementTree = ET.parse(metaDataPath)
root = elementTree.getroot()
for dataElement in root:
if dataElement.get("type") == "primary":
locationElement = dataElement.find(namespace("repo") + "location")
# even though the repomd.xml file is under repodata, the location a
# attribute is relative to parent directory (directory).
primaryPath = os.path.join(directory, locationElement.get("href"))
break
else:
raise IOError("'%s' contains no primary location" % metaDataPath)
return primaryPath
def queries(directory):
"""Returns a list of RepoDataQueries constructed from the repodata under
the directory.
@param directory path to a repository directory (parent directory of
repodata directory)
@return list of RepoDataQuery instances
@raise IOError if repomd.xml contains no primary location
"""
path = primaryPath(directory)
gunzippedPrimary = gzip.GzipFile(path)
elementTree = ET.parse(gunzippedPrimary)
root = elementTree.getroot()
packageQueries = []
for packageElement in root:
packageQuery = RepoDataQuery(directory, packageElement)
packageQueries.append(packageQuery)
return packageQueries
class RepoDataQuery(object):
"""PackageQuery that reads in data from the repodata directory files."""
def __init__(self, directory, element):
"""Creates a RepoDataQuery from the a package Element under a metadata
Element in a primary.xml file.
@param directory repository directory path. Used to convert relative
paths to full paths.
@param element package Element
"""
self.__directory = os.path.abspath(directory)
self.__element = element
def __formatElement(self):
return self.__element.find(namespace("common") + "format")
def __parseEntry(self, element):
entry = element.get("name")
flags = element.get("flags")
if flags is not None:
version = element.get("ver")
operator = OPERATOR_BY_FLAGS[flags]
entry += " %s %s" % (operator, version)
release = element.get("rel")
if release is not None:
entry += "-%s" % (release,)
return entry
def __parseEntryCollection(self, collection):
formatElement = self.__formatElement()
collectionElement = formatElement.find(namespace("rpm") + collection)
entries = []
if collectionElement is not None:
for entryElement in collectionElement.findall(namespace("rpm") +
"entry"):
entry = self.__parseEntry(entryElement)
entries.append(entry)
return entries
def __versionElement(self):
return self.__element.find(namespace("common") + "version")
def arch(self):
return self.__element.find(namespace("common") + "arch").text
def description(self):
return self.__element.find(namespace("common") + "description").text
def distribution(self):
return None
def epoch(self):
return self.__versionElement().get("epoch")
def name(self):
return self.__element.find(namespace("common") + "name").text
def path(self):
locationElement = self.__element.find(namespace("common") + "location")
relativePath = locationElement.get("href")
absolutePath = os.path.join(self.__directory, relativePath)
return absolutePath
def provides(self):
return self.__parseEntryCollection("provides")
def release(self):
return self.__versionElement().get("rel")
def requires(self):
return self.__parseEntryCollection("requires")
def vercmp(self, other):
res = osc.util.rpmquery.RpmQuery.rpmvercmp(str(self.epoch()), str(other.epoch()))
if res != 0:
return res
res = osc.util.rpmquery.RpmQuery.rpmvercmp(self.version(), other.version())
if res != 0:
return res
res = osc.util.rpmquery.RpmQuery.rpmvercmp(self.release(), other.release())
return res
def version(self):
return self.__versionElement().get("ver")

View File

@ -62,6 +62,7 @@ class RpmQuery(packagequery.PackageQuery):
def __init__(self, fh):
self.__file = fh
self.__file = os.path.abspath(fh.name)
self.filename_suffix = 'rpm'
self.header = None
@ -207,6 +208,9 @@ class RpmQuery(packagequery.PackageQuery):
return None
return entry.data
def path(self):
return self.__path
def provides(self):
return self.__reqprov(1047, 1112, 1113)