forked from importers/git-importer
Split out OBS, ProxySHA256 and Importer
This commit is contained in:
parent
bdc1dc0cc9
commit
e53b030dcf
@ -25,6 +25,7 @@ BINARY = {
|
|||||||
".zst",
|
".zst",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def is_binary_or_large(filename, size):
|
def is_binary_or_large(filename, size):
|
||||||
"""Decide if is a binary file based on the extension or size"""
|
"""Decide if is a binary file based on the extension or size"""
|
||||||
binary_suffix = BINARY
|
binary_suffix = BINARY
|
||||||
@ -83,4 +84,3 @@ def is_binary_or_large(filename, size):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
592
git-importer.py
592
git-importer.py
@ -1,67 +1,14 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import errno
|
|
||||||
import functools
|
|
||||||
import hashlib
|
|
||||||
import logging
|
import logging
|
||||||
import pathlib
|
import pathlib
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
import time
|
|
||||||
import urllib.parse
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
from urllib.error import HTTPError
|
|
||||||
|
|
||||||
import osc.core
|
import osc.core
|
||||||
import requests
|
|
||||||
|
|
||||||
from request import Request
|
|
||||||
from git import Git
|
|
||||||
from history import History
|
|
||||||
from binary import is_binary_or_large
|
|
||||||
|
|
||||||
# Add a retry wrapper for some of the HTTP actions.
|
|
||||||
def retry(func):
|
|
||||||
def wrapper(*args, **kwargs):
|
|
||||||
retry = 0
|
|
||||||
while retry < 5:
|
|
||||||
try:
|
|
||||||
return func(*args, **kwargs)
|
|
||||||
except HTTPError as e:
|
|
||||||
if 500 <= e.code <= 599:
|
|
||||||
retry += 1
|
|
||||||
logging.warning(
|
|
||||||
f"HTTPError {e.code} -- Retrying {args[0]} ({retry})"
|
|
||||||
)
|
|
||||||
# TODO: remove when move to async
|
|
||||||
time.sleep(0.5)
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
except urllib.error.URLError as e:
|
|
||||||
if e.reason.errno in (errno.ENETUNREACH, errno.EADDRNOTAVAIL):
|
|
||||||
retry += 1
|
|
||||||
logging.warning(f"URLError {e} -- Retrying {args[0]} ({retry})")
|
|
||||||
time.sleep(0.5)
|
|
||||||
else:
|
|
||||||
logging.warning(f"URLError {e.errno} uncaught")
|
|
||||||
raise
|
|
||||||
except OSError as e:
|
|
||||||
if e.errno in (errno.ENETUNREACH, errno.EADDRNOTAVAIL): # sporadically hits cloud VMs :(
|
|
||||||
retry += 1
|
|
||||||
logging.warning(f"OSError {e} -- Retrying {args[0]} ({retry})")
|
|
||||||
# TODO: remove when move to async
|
|
||||||
time.sleep(0.5)
|
|
||||||
else:
|
|
||||||
logging.warning(f"OSError {e.errno} uncaught")
|
|
||||||
raise
|
|
||||||
|
|
||||||
return wrapper
|
|
||||||
|
|
||||||
|
|
||||||
osc.core.http_GET = retry(osc.core.http_GET)
|
|
||||||
|
|
||||||
|
|
||||||
|
from importer import Importer
|
||||||
|
|
||||||
URL_OBS = "https://api.opensuse.org"
|
URL_OBS = "https://api.opensuse.org"
|
||||||
URL_IBS = "https://api.suse.de"
|
URL_IBS = "https://api.suse.de"
|
||||||
@ -94,543 +41,6 @@ PROJECTS = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def _hash(hash_alg, file_or_path):
|
|
||||||
h = hash_alg()
|
|
||||||
|
|
||||||
def __hash(f):
|
|
||||||
while chunk := f.read(1024 * 4):
|
|
||||||
h.update(chunk)
|
|
||||||
|
|
||||||
if hasattr(file_or_path, "read"):
|
|
||||||
__hash(file_or_path)
|
|
||||||
else:
|
|
||||||
with file_or_path.open("rb") as f:
|
|
||||||
__hash(f)
|
|
||||||
return h.hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
md5 = functools.partial(_hash, hashlib.md5)
|
|
||||||
sha256 = functools.partial(_hash, hashlib.sha256)
|
|
||||||
|
|
||||||
|
|
||||||
def _files_hash(hash_alg, dirpath):
|
|
||||||
"""List of (filepath, md5) for a directory"""
|
|
||||||
# TODO: do it async or multythread
|
|
||||||
files = [f for f in dirpath.iterdir() if f.is_file()]
|
|
||||||
return [(f.parts[-1], hash_alg(f)) for f in files]
|
|
||||||
|
|
||||||
|
|
||||||
files_md5 = functools.partial(_files_hash, md5)
|
|
||||||
files_sha256 = functools.partial(_files_hash, sha256)
|
|
||||||
|
|
||||||
class OBS:
|
|
||||||
def __init__(self, url=None):
|
|
||||||
if url:
|
|
||||||
self.change_url(url)
|
|
||||||
|
|
||||||
def change_url(self, url):
|
|
||||||
self.url = url
|
|
||||||
osc.conf.get_config(override_apiurl=url)
|
|
||||||
|
|
||||||
def _xml(self, url_path, **params):
|
|
||||||
url = osc.core.makeurl(self.url, [url_path], params)
|
|
||||||
logging.debug(f"GET {url}")
|
|
||||||
return ET.parse(osc.core.http_GET(url)).getroot()
|
|
||||||
|
|
||||||
def _meta(self, project, package, **params):
|
|
||||||
try:
|
|
||||||
root = self._xml(f"source/{project}/{package}/_meta", **params)
|
|
||||||
except HTTPError:
|
|
||||||
logging.error(f"Package [{project}/{package} {params}] has no meta")
|
|
||||||
return None
|
|
||||||
return root
|
|
||||||
|
|
||||||
def _history(self, project, package, **params):
|
|
||||||
try:
|
|
||||||
root = self._xml(f"source/{project}/{package}/_history", **params)
|
|
||||||
except HTTPError:
|
|
||||||
logging.error(f"Package [{project}/{package} {params}] has no history")
|
|
||||||
return None
|
|
||||||
return root
|
|
||||||
|
|
||||||
def _link(self, project, package, rev):
|
|
||||||
try:
|
|
||||||
root = self._xml(f"source/{project}/{package}/_link", rev=rev)
|
|
||||||
except HTTPError:
|
|
||||||
logging.info("Package has no link")
|
|
||||||
return None
|
|
||||||
except ET.ParseError:
|
|
||||||
logging.error(
|
|
||||||
f"Package [{project}/{package} rev={rev}] _link can't be parsed"
|
|
||||||
)
|
|
||||||
return root
|
|
||||||
|
|
||||||
def _request(self, requestid):
|
|
||||||
try:
|
|
||||||
root = self._xml(f"request/{requestid}")
|
|
||||||
except HTTPError:
|
|
||||||
logging.warning(f"Cannot fetch request {requestid}")
|
|
||||||
return None
|
|
||||||
return root
|
|
||||||
|
|
||||||
def exists(self, project, package):
|
|
||||||
root = self._meta(project, package)
|
|
||||||
if root is None:
|
|
||||||
return False
|
|
||||||
return root.get("project") == project
|
|
||||||
|
|
||||||
def devel_project(self, project, package):
|
|
||||||
root = self._meta(project, package)
|
|
||||||
devel = root.find("devel")
|
|
||||||
if devel is None:
|
|
||||||
return None
|
|
||||||
return devel.get("project")
|
|
||||||
|
|
||||||
def request(self, requestid):
|
|
||||||
root = self._request(requestid)
|
|
||||||
if root is not None:
|
|
||||||
return Request().parse(root)
|
|
||||||
|
|
||||||
def files(self, project, package, revision):
|
|
||||||
root = self._xml(f"source/{project}/{package}", rev=revision, expand=1)
|
|
||||||
return [
|
|
||||||
(e.get("name"), int(e.get("size")), e.get("md5"))
|
|
||||||
for e in root.findall("entry")
|
|
||||||
]
|
|
||||||
|
|
||||||
def _download(self, project, package, name, revision):
|
|
||||||
url = osc.core.makeurl(
|
|
||||||
self.url,
|
|
||||||
["source", project, package, urllib.parse.quote(name)],
|
|
||||||
{"rev": revision, "expand": 1},
|
|
||||||
)
|
|
||||||
return osc.core.http_GET(url)
|
|
||||||
|
|
||||||
def download(self, project, package, name, revision, dirpath):
|
|
||||||
with (dirpath / name).open("wb") as f:
|
|
||||||
f.write(self._download(project, package, name, revision).read())
|
|
||||||
|
|
||||||
|
|
||||||
class ProxySHA256:
|
|
||||||
def __init__(self, obs, url=None, enabled=True):
|
|
||||||
self.obs = obs
|
|
||||||
self.url = url if url else "http://source.dyn.cloud.suse.de"
|
|
||||||
self.enabled = enabled
|
|
||||||
self.hashes = None
|
|
||||||
self.texts = set()
|
|
||||||
|
|
||||||
def load_package(self, package):
|
|
||||||
# _project is unreachable for the proxy - due to being a fake package
|
|
||||||
if package == "_project":
|
|
||||||
self.enabled = False
|
|
||||||
self.texts = set(["_config", "_service"])
|
|
||||||
self.hashes = dict()
|
|
||||||
return
|
|
||||||
logging.info("Retrieve all previously defined SHA256")
|
|
||||||
response = requests.get(f"http://source.dyn.cloud.suse.de/package/{package}")
|
|
||||||
if response.status_code == 200:
|
|
||||||
json = response.json()
|
|
||||||
self.hashes = json["shas"]
|
|
||||||
self.texts = set(json["texts"])
|
|
||||||
|
|
||||||
def get(self, package, name, file_md5):
|
|
||||||
key = f"{file_md5}-{name}"
|
|
||||||
if self.hashes is None:
|
|
||||||
if self.enabled:
|
|
||||||
self.load_package(package)
|
|
||||||
else:
|
|
||||||
self.hashes = {}
|
|
||||||
return self.hashes.get(key, None)
|
|
||||||
|
|
||||||
def _proxy_put(self, project, package, name, revision, file_md5, size):
|
|
||||||
quoted_name = urllib.parse.quote(name)
|
|
||||||
url = f"{self.obs.url}/public/source/{project}/{package}/{quoted_name}?rev={revision}"
|
|
||||||
response = requests.put(
|
|
||||||
self.url,
|
|
||||||
data={
|
|
||||||
"hash": file_md5,
|
|
||||||
"filename": name,
|
|
||||||
"url": url,
|
|
||||||
"package": package,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
if response.status_code != 200:
|
|
||||||
raise Exception(f"Redirector error on {self.url} for {url}")
|
|
||||||
|
|
||||||
key = (file_md5, name)
|
|
||||||
self.hashes[key] = {
|
|
||||||
"sha256": response.content.decode("utf-8"),
|
|
||||||
"fsize": size,
|
|
||||||
}
|
|
||||||
return self.hashes[key]
|
|
||||||
|
|
||||||
def _obs_put(self, project, package, name, revision, file_md5, size):
|
|
||||||
key = (file_md5, name)
|
|
||||||
self.hashes[key] = {
|
|
||||||
"sha256": sha256(self.obs._download(project, package, name, revision)),
|
|
||||||
"fsize": size,
|
|
||||||
}
|
|
||||||
return self.hashes[key]
|
|
||||||
|
|
||||||
def put(self, project, package, name, revision, file_md5, size):
|
|
||||||
if not self.enabled:
|
|
||||||
return self._obs_put(project, package, name, revision, file_md5, size)
|
|
||||||
return self._proxy_put(project, package, name, revision, file_md5, size)
|
|
||||||
|
|
||||||
def is_text(self, filename):
|
|
||||||
return filename in self.texts
|
|
||||||
|
|
||||||
def get_or_put(self, project, package, name, revision, file_md5, size):
|
|
||||||
result = self.get(package, name, file_md5)
|
|
||||||
if not result:
|
|
||||||
result = self.put(project, package, name, revision, file_md5, size)
|
|
||||||
|
|
||||||
# Sanity check
|
|
||||||
if result["fsize"] != size:
|
|
||||||
raise Exception(f"Redirector has different size for {name}")
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Importer:
|
|
||||||
def __init__(self, projects, package, repodir, search_ancestor, rebase_devel):
|
|
||||||
# The idea is to create each commit in order, and draw the
|
|
||||||
# same graph described by the revisions timeline. For that we
|
|
||||||
# need first to fetch all the revisions and sort them
|
|
||||||
# linearly, based on the timestamp.
|
|
||||||
#
|
|
||||||
# After that we recreate the commits, and if one revision is a
|
|
||||||
# request that contains a target inside the projects in the
|
|
||||||
# "history", we create a merge commit.
|
|
||||||
#
|
|
||||||
# Optionally, if a flag is set, we will try to find a common
|
|
||||||
# "Initial commit" from a reference branch (the first one in
|
|
||||||
# "projects", that is safe to assume to be "openSUSE:Factory".
|
|
||||||
# This is not always a good idea. For example, in a normal
|
|
||||||
# situation the "devel" project history is older than
|
|
||||||
# "factory", and we can root the tree on it. But for some
|
|
||||||
# other projects we lost partially the "devel" history project
|
|
||||||
# (could be moved), and "factory" is not the root.
|
|
||||||
|
|
||||||
self.package = package
|
|
||||||
self.search_ancestor = search_ancestor
|
|
||||||
self.rebase_devel = rebase_devel
|
|
||||||
|
|
||||||
self.obs = OBS()
|
|
||||||
self.git = Git(
|
|
||||||
repodir,
|
|
||||||
committer="Git OBS Bridge",
|
|
||||||
committer_email="obsbridge@suse.de",
|
|
||||||
).create()
|
|
||||||
self.proxy_sha256 = ProxySHA256(self.obs, enabled=True)
|
|
||||||
|
|
||||||
self.history = History(self.obs, self.package)
|
|
||||||
|
|
||||||
# Add the "devel" project
|
|
||||||
(project, branch, api_url) = projects[0]
|
|
||||||
assert project == "openSUSE:Factory"
|
|
||||||
self.obs.change_url(api_url)
|
|
||||||
devel_project = self.obs.devel_project(project, package)
|
|
||||||
if devel_project:
|
|
||||||
self.projects = [(devel_project, "devel", api_url)] + projects
|
|
||||||
else:
|
|
||||||
self.projects = projects
|
|
||||||
|
|
||||||
# Associate the branch and api_url information per project
|
|
||||||
self.projects_info = {
|
|
||||||
project: (branch, api_url) for (project, branch, api_url) in self.projects
|
|
||||||
}
|
|
||||||
|
|
||||||
def download(self, revision):
|
|
||||||
obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5)
|
|
||||||
git_files = {
|
|
||||||
(f.name, f.stat().st_size, md5(f))
|
|
||||||
for f in self.git.path.iterdir()
|
|
||||||
if f.is_file() and f.name not in (".gitattributes")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Overwrite ".gitattributes" with the
|
|
||||||
self.git.add_default_lfs_gitattributes(force=True)
|
|
||||||
|
|
||||||
# Download each file in OBS if it is not a binary (or large)
|
|
||||||
# file
|
|
||||||
for (name, size, file_md5) in obs_files:
|
|
||||||
# this file creates easily 100k commits and is just useless data :(
|
|
||||||
# unfortunately it's stored in the same meta package as the project config
|
|
||||||
if revision.package == "_project" and name == "_staging_workflow":
|
|
||||||
continue
|
|
||||||
# have such files been detected as text mimetype before?
|
|
||||||
is_text = self.proxy_sha256.is_text(name)
|
|
||||||
if not is_text and is_binary_or_large(name, size):
|
|
||||||
file_sha256 = self.proxy_sha256.get_or_put(
|
|
||||||
revision.project,
|
|
||||||
revision.package,
|
|
||||||
name,
|
|
||||||
revision.srcmd5,
|
|
||||||
file_md5,
|
|
||||||
size,
|
|
||||||
)
|
|
||||||
self.git.add_lfs(name, file_sha256["sha256"], size)
|
|
||||||
else:
|
|
||||||
if (name, size, file_md5) not in git_files:
|
|
||||||
print(f"Download {name}")
|
|
||||||
self.obs.download(
|
|
||||||
revision.project,
|
|
||||||
revision.package,
|
|
||||||
name,
|
|
||||||
revision.srcmd5,
|
|
||||||
self.git.path,
|
|
||||||
)
|
|
||||||
# Validate the MD5 of the downloaded file
|
|
||||||
if md5(self.git.path / name) != file_md5:
|
|
||||||
raise Exception(f"Download error in {name}")
|
|
||||||
self.git.add(name)
|
|
||||||
|
|
||||||
# Remove extra files
|
|
||||||
obs_names = {n for (n, _, _) in obs_files}
|
|
||||||
git_names = {n for (n, _, _) in git_files}
|
|
||||||
for name in git_names - obs_names:
|
|
||||||
print(f"Remove {name}")
|
|
||||||
self.git.remove(name)
|
|
||||||
|
|
||||||
def import_all_revisions(self, gc):
|
|
||||||
# Fetch all the requests and sort them. Ideally we should
|
|
||||||
# build the graph here, to avoid new commits before the merge.
|
|
||||||
# For now we will sort them and invalidate the commits if
|
|
||||||
# "rebase_devel" is set.
|
|
||||||
self.history.fetch_all_revisions(self.projects)
|
|
||||||
revisions = self.history.sort_all_revisions()
|
|
||||||
|
|
||||||
logging.debug(f"Selected import order for {self.package}")
|
|
||||||
for revision in revisions:
|
|
||||||
logging.debug(revision)
|
|
||||||
|
|
||||||
gc_cnt = gc
|
|
||||||
for revision in revisions:
|
|
||||||
gc_cnt -= 1
|
|
||||||
if gc_cnt <= 0 and gc:
|
|
||||||
self.git.gc()
|
|
||||||
gc_cnt = gc
|
|
||||||
self.import_revision(revision)
|
|
||||||
|
|
||||||
def import_new_revision_with_request(self, revision, request):
|
|
||||||
"""Create a new branch as a result of a merge"""
|
|
||||||
|
|
||||||
submitted_revision = self.history.find_revision(
|
|
||||||
request.source, request.revisionid, revision.time
|
|
||||||
)
|
|
||||||
if not submitted_revision:
|
|
||||||
logging.warning(f"Request {request} does not connect to a known revision")
|
|
||||||
return False
|
|
||||||
|
|
||||||
if not submitted_revision.commit:
|
|
||||||
# If the revision appointed by the request is not part of
|
|
||||||
# the git history, we can have an ordering problem. One
|
|
||||||
# example is "premake4".
|
|
||||||
self.import_revision(submitted_revision)
|
|
||||||
|
|
||||||
assert submitted_revision.commit is not None
|
|
||||||
|
|
||||||
project = revision.project
|
|
||||||
branch, _ = self.projects_info[project]
|
|
||||||
|
|
||||||
# TODO: add an empty commit marking the acceptenace of the request (see discussion in PR 2858)
|
|
||||||
self.git.branch(branch, submitted_revision.commit)
|
|
||||||
self.git.clean()
|
|
||||||
self.git.checkout(branch)
|
|
||||||
|
|
||||||
logging.info(f"Create new branch based on {submitted_revision.commit}")
|
|
||||||
revision.commit = submitted_revision.commit
|
|
||||||
|
|
||||||
def _rebase_branch_history(self, project, revision):
|
|
||||||
branch, _ = self.projects_info[project]
|
|
||||||
history = self.history[project]
|
|
||||||
revision_index = history.index(revision)
|
|
||||||
for index in range(revision_index + 1, len(history)):
|
|
||||||
revision = history[index]
|
|
||||||
# We are done when we have one non-commited revision
|
|
||||||
if not revision.commit:
|
|
||||||
return
|
|
||||||
logging.info(f"Rebasing {revision} from {branch}")
|
|
||||||
revision.commit = None
|
|
||||||
self.import_revision(revision)
|
|
||||||
|
|
||||||
def import_revision_with_request(self, revision, request):
|
|
||||||
"""Import a single revision via a merge"""
|
|
||||||
|
|
||||||
submitted_revision = self.history.find_revision(
|
|
||||||
request.source, request.revisionid, revision.time
|
|
||||||
)
|
|
||||||
if not submitted_revision:
|
|
||||||
logging.warning(f"Request {request} does not connect to a known revision")
|
|
||||||
return False
|
|
||||||
assert submitted_revision.commit is not None
|
|
||||||
|
|
||||||
# TODO: detect a revision, case in point
|
|
||||||
# Base:System/bash/284 -> rq683701 -> accept O:F/151
|
|
||||||
# -> autocommit Base:System/bash/285
|
|
||||||
# Revert lead to openSUSE:Factory/bash/152
|
|
||||||
# Base:System/286 restored the reverted code in devel project
|
|
||||||
# rq684575 was created and accepted as O:F/153
|
|
||||||
# But the 284-285 and the 285-286 changeset is seen as empty
|
|
||||||
# as the revert was never in Base:System, so the
|
|
||||||
# submitted_revision of 684575 has no commit
|
|
||||||
if submitted_revision.commit == "EMPTY":
|
|
||||||
logging.warning("Empty commit submitted?!")
|
|
||||||
return False
|
|
||||||
|
|
||||||
message = (
|
|
||||||
f"Accepting request {revision.requestid}: {revision.comment}\n\n{revision}"
|
|
||||||
)
|
|
||||||
commit = self.git.merge(
|
|
||||||
# TODO: revision.userid or request.creator?
|
|
||||||
f"OBS User {revision.userid}",
|
|
||||||
"null@suse.de",
|
|
||||||
revision.time,
|
|
||||||
message,
|
|
||||||
submitted_revision.commit,
|
|
||||||
)
|
|
||||||
|
|
||||||
if commit == "EMPTY":
|
|
||||||
logging.warning("Empty merge. Ignoring the revision and the request")
|
|
||||||
self.git.merge_abort()
|
|
||||||
revision.commit = commit
|
|
||||||
return False
|
|
||||||
|
|
||||||
if commit == "CONFLICT":
|
|
||||||
logging.info("Merge conflict. Downloading revision")
|
|
||||||
self.download(revision)
|
|
||||||
message = f"CONFLICT {message}"
|
|
||||||
commit = self.git.merge(
|
|
||||||
f"OBS User {revision.userid}",
|
|
||||||
"null@suse.de",
|
|
||||||
revision.time,
|
|
||||||
message,
|
|
||||||
submitted_revision.commit,
|
|
||||||
merged=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
assert commit and commit != "CONFLICT"
|
|
||||||
logging.info(f"Merge with {submitted_revision.commit} into {commit}")
|
|
||||||
revision.commit = commit
|
|
||||||
|
|
||||||
# TODO: There are more checks to do, like for example, the
|
|
||||||
# last commit into the non-devel branch should be a merge from
|
|
||||||
# the devel branch
|
|
||||||
if self.rebase_devel:
|
|
||||||
branch, _ = self.projects_info.get(request.source, (None, None))
|
|
||||||
if branch == "devel":
|
|
||||||
self.git.repo.references[f"refs/heads/{branch}"].set_target(commit)
|
|
||||||
self._rebase_branch_history(request.source, submitted_revision)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def matching_request(self, revision):
|
|
||||||
request = self.obs.request(revision.requestid)
|
|
||||||
if not request:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# to be handled by the caller
|
|
||||||
if request.type() != "submit":
|
|
||||||
return request
|
|
||||||
|
|
||||||
if request.source not in self.projects_info:
|
|
||||||
logging.info("Request from a non exported project")
|
|
||||||
return None
|
|
||||||
|
|
||||||
if request.target != revision.project:
|
|
||||||
# This seems to happen when the devel project gets
|
|
||||||
# reinitialized (for example, SR#943593 in 7zip, or
|
|
||||||
# SR#437901 in ColorFull)
|
|
||||||
logging.info("Request target different from current project")
|
|
||||||
return None
|
|
||||||
|
|
||||||
if request.source == request.target:
|
|
||||||
# this is not a merge, but a different way to do a
|
|
||||||
# contribution to the (devel) project - see bindfs's rev 1
|
|
||||||
logging.info("Request within the same project")
|
|
||||||
return None
|
|
||||||
|
|
||||||
return request
|
|
||||||
|
|
||||||
def import_revision(self, revision):
|
|
||||||
"""Import a single revision into git"""
|
|
||||||
project = revision.project
|
|
||||||
branch, api_url = self.projects_info[project]
|
|
||||||
|
|
||||||
logging.info(f"Importing [{revision}] to {branch}")
|
|
||||||
|
|
||||||
self.obs.change_url(api_url)
|
|
||||||
|
|
||||||
# Populate linkrev and replace srcmd5 from the linked
|
|
||||||
# revision. If the expansion fails, the revision will be ignored
|
|
||||||
# and not imported.
|
|
||||||
if not revision.check_expanded():
|
|
||||||
logging.warning(f"Broken revision")
|
|
||||||
revision.ignored = True
|
|
||||||
return
|
|
||||||
|
|
||||||
# When doing a SR, we see also a revision in the origin
|
|
||||||
# project with the outgoing request, but without changes in
|
|
||||||
# the project. We can ignore them.
|
|
||||||
#
|
|
||||||
# If there is a request ID, it will be filtered out later,
|
|
||||||
# when the target project is different from itself.
|
|
||||||
if revision.userid == "autobuild" and not revision.requestid:
|
|
||||||
logging.info("Ignoring autocommit")
|
|
||||||
revision.ignored = True
|
|
||||||
return
|
|
||||||
|
|
||||||
if revision.userid == "buildservice-autocommit":
|
|
||||||
logging.info("Ignoring autocommit")
|
|
||||||
revision.ignored = True
|
|
||||||
return
|
|
||||||
|
|
||||||
# Create the reference if the branch is new. If so return
|
|
||||||
# True.
|
|
||||||
new_branch = self.git.checkout(branch)
|
|
||||||
|
|
||||||
if revision.requestid:
|
|
||||||
request = self.matching_request(revision)
|
|
||||||
if request:
|
|
||||||
if request.type() == "delete":
|
|
||||||
# TODO: after this comes a restore, this should be collapsed
|
|
||||||
# before even hitting git
|
|
||||||
logging.info("Delete request ignored")
|
|
||||||
revision.ignored = True
|
|
||||||
return
|
|
||||||
|
|
||||||
logging.debug(f"Found matching request: #{revision.project} #{request}")
|
|
||||||
if new_branch:
|
|
||||||
self.import_new_revision_with_request(revision, request)
|
|
||||||
return
|
|
||||||
if self.import_revision_with_request(revision, request):
|
|
||||||
return
|
|
||||||
|
|
||||||
# Import revision as a single commit (without merging)
|
|
||||||
self.download(revision)
|
|
||||||
|
|
||||||
if new_branch or self.git.is_dirty():
|
|
||||||
commit = self.git.commit(
|
|
||||||
f"OBS User {revision.userid}",
|
|
||||||
"null@suse.de",
|
|
||||||
revision.time,
|
|
||||||
# TODO: Normalize better the commit message
|
|
||||||
f"{revision.comment}\n\n{revision}",
|
|
||||||
# Create an empty commit only if is a new branch
|
|
||||||
allow_empty=new_branch,
|
|
||||||
)
|
|
||||||
revision.commit = commit
|
|
||||||
logging.info(f"Commit {commit}")
|
|
||||||
else:
|
|
||||||
logging.info("Skip empty commit")
|
|
||||||
revision.ignored = True
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="OBS history importer into git")
|
parser = argparse.ArgumentParser(description="OBS history importer into git")
|
||||||
parser.add_argument("package", help="OBS package name")
|
parser.add_argument("package", help="OBS package name")
|
||||||
|
1
git.py
1
git.py
@ -8,6 +8,7 @@ from binary import BINARY
|
|||||||
|
|
||||||
LFS_SUFFIX = "filter=lfs diff=lfs merge=lfs -text"
|
LFS_SUFFIX = "filter=lfs diff=lfs merge=lfs -text"
|
||||||
|
|
||||||
|
|
||||||
class Git:
|
class Git:
|
||||||
"""Local git repository"""
|
"""Local git repository"""
|
||||||
|
|
||||||
|
354
importer.py
Normal file
354
importer.py
Normal file
@ -0,0 +1,354 @@
|
|||||||
|
import functools
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from git import Git
|
||||||
|
from history import History
|
||||||
|
from binary import is_binary_or_large
|
||||||
|
from proxy_sha256 import ProxySHA256, md5, sha256
|
||||||
|
from obs import OBS
|
||||||
|
|
||||||
|
|
||||||
|
def _files_hash(hash_alg, dirpath):
|
||||||
|
"""List of (filepath, md5) for a directory"""
|
||||||
|
# TODO: do it async or multythread
|
||||||
|
files = [f for f in dirpath.iterdir() if f.is_file()]
|
||||||
|
return [(f.parts[-1], hash_alg(f)) for f in files]
|
||||||
|
|
||||||
|
|
||||||
|
files_md5 = functools.partial(_files_hash, md5)
|
||||||
|
files_sha256 = functools.partial(_files_hash, sha256)
|
||||||
|
|
||||||
|
|
||||||
|
class Importer:
|
||||||
|
def __init__(self, projects, package, repodir, search_ancestor, rebase_devel):
|
||||||
|
# The idea is to create each commit in order, and draw the
|
||||||
|
# same graph described by the revisions timeline. For that we
|
||||||
|
# need first to fetch all the revisions and sort them
|
||||||
|
# linearly, based on the timestamp.
|
||||||
|
#
|
||||||
|
# After that we recreate the commits, and if one revision is a
|
||||||
|
# request that contains a target inside the projects in the
|
||||||
|
# "history", we create a merge commit.
|
||||||
|
#
|
||||||
|
# Optionally, if a flag is set, we will try to find a common
|
||||||
|
# "Initial commit" from a reference branch (the first one in
|
||||||
|
# "projects", that is safe to assume to be "openSUSE:Factory".
|
||||||
|
# This is not always a good idea. For example, in a normal
|
||||||
|
# situation the "devel" project history is older than
|
||||||
|
# "factory", and we can root the tree on it. But for some
|
||||||
|
# other projects we lost partially the "devel" history project
|
||||||
|
# (could be moved), and "factory" is not the root.
|
||||||
|
|
||||||
|
self.package = package
|
||||||
|
self.search_ancestor = search_ancestor
|
||||||
|
self.rebase_devel = rebase_devel
|
||||||
|
|
||||||
|
self.obs = OBS()
|
||||||
|
self.git = Git(
|
||||||
|
repodir,
|
||||||
|
committer="Git OBS Bridge",
|
||||||
|
committer_email="obsbridge@suse.de",
|
||||||
|
).create()
|
||||||
|
self.proxy_sha256 = ProxySHA256(self.obs, enabled=True)
|
||||||
|
|
||||||
|
self.history = History(self.obs, self.package)
|
||||||
|
|
||||||
|
# Add the "devel" project
|
||||||
|
(project, branch, api_url) = projects[0]
|
||||||
|
assert project == "openSUSE:Factory"
|
||||||
|
self.obs.change_url(api_url)
|
||||||
|
devel_project = self.obs.devel_project(project, package)
|
||||||
|
if devel_project:
|
||||||
|
self.projects = [(devel_project, "devel", api_url)] + projects
|
||||||
|
else:
|
||||||
|
self.projects = projects
|
||||||
|
|
||||||
|
# Associate the branch and api_url information per project
|
||||||
|
self.projects_info = {
|
||||||
|
project: (branch, api_url) for (project, branch, api_url) in self.projects
|
||||||
|
}
|
||||||
|
|
||||||
|
def download(self, revision):
|
||||||
|
obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5)
|
||||||
|
git_files = {
|
||||||
|
(f.name, f.stat().st_size, md5(f))
|
||||||
|
for f in self.git.path.iterdir()
|
||||||
|
if f.is_file() and f.name not in (".gitattributes")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Overwrite ".gitattributes" with the
|
||||||
|
self.git.add_default_lfs_gitattributes(force=True)
|
||||||
|
|
||||||
|
# Download each file in OBS if it is not a binary (or large)
|
||||||
|
# file
|
||||||
|
for (name, size, file_md5) in obs_files:
|
||||||
|
# this file creates easily 100k commits and is just useless data :(
|
||||||
|
# unfortunately it's stored in the same meta package as the project config
|
||||||
|
if revision.package == "_project" and name == "_staging_workflow":
|
||||||
|
continue
|
||||||
|
# have such files been detected as text mimetype before?
|
||||||
|
is_text = self.proxy_sha256.is_text(name)
|
||||||
|
if not is_text and is_binary_or_large(name, size):
|
||||||
|
file_sha256 = self.proxy_sha256.get_or_put(
|
||||||
|
revision.project,
|
||||||
|
revision.package,
|
||||||
|
name,
|
||||||
|
revision.srcmd5,
|
||||||
|
file_md5,
|
||||||
|
size,
|
||||||
|
)
|
||||||
|
self.git.add_lfs(name, file_sha256["sha256"], size)
|
||||||
|
else:
|
||||||
|
if (name, size, file_md5) not in git_files:
|
||||||
|
print(f"Download {name}")
|
||||||
|
self.obs.download(
|
||||||
|
revision.project,
|
||||||
|
revision.package,
|
||||||
|
name,
|
||||||
|
revision.srcmd5,
|
||||||
|
self.git.path,
|
||||||
|
)
|
||||||
|
# Validate the MD5 of the downloaded file
|
||||||
|
if md5(self.git.path / name) != file_md5:
|
||||||
|
raise Exception(f"Download error in {name}")
|
||||||
|
self.git.add(name)
|
||||||
|
|
||||||
|
# Remove extra files
|
||||||
|
obs_names = {n for (n, _, _) in obs_files}
|
||||||
|
git_names = {n for (n, _, _) in git_files}
|
||||||
|
for name in git_names - obs_names:
|
||||||
|
print(f"Remove {name}")
|
||||||
|
self.git.remove(name)
|
||||||
|
|
||||||
|
def import_all_revisions(self, gc):
|
||||||
|
# Fetch all the requests and sort them. Ideally we should
|
||||||
|
# build the graph here, to avoid new commits before the merge.
|
||||||
|
# For now we will sort them and invalidate the commits if
|
||||||
|
# "rebase_devel" is set.
|
||||||
|
self.history.fetch_all_revisions(self.projects)
|
||||||
|
revisions = self.history.sort_all_revisions()
|
||||||
|
|
||||||
|
logging.debug(f"Selected import order for {self.package}")
|
||||||
|
for revision in revisions:
|
||||||
|
logging.debug(revision)
|
||||||
|
|
||||||
|
gc_cnt = gc
|
||||||
|
for revision in revisions:
|
||||||
|
gc_cnt -= 1
|
||||||
|
if gc_cnt <= 0 and gc:
|
||||||
|
self.git.gc()
|
||||||
|
gc_cnt = gc
|
||||||
|
self.import_revision(revision)
|
||||||
|
|
||||||
|
def import_new_revision_with_request(self, revision, request):
|
||||||
|
"""Create a new branch as a result of a merge"""
|
||||||
|
|
||||||
|
submitted_revision = self.history.find_revision(
|
||||||
|
request.source, request.revisionid, revision.time
|
||||||
|
)
|
||||||
|
if not submitted_revision:
|
||||||
|
logging.warning(f"Request {request} does not connect to a known revision")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not submitted_revision.commit:
|
||||||
|
# If the revision appointed by the request is not part of
|
||||||
|
# the git history, we can have an ordering problem. One
|
||||||
|
# example is "premake4".
|
||||||
|
self.import_revision(submitted_revision)
|
||||||
|
|
||||||
|
assert submitted_revision.commit is not None
|
||||||
|
|
||||||
|
project = revision.project
|
||||||
|
branch, _ = self.projects_info[project]
|
||||||
|
|
||||||
|
# TODO: add an empty commit marking the acceptenace of the request (see discussion in PR 2858)
|
||||||
|
self.git.branch(branch, submitted_revision.commit)
|
||||||
|
self.git.clean()
|
||||||
|
self.git.checkout(branch)
|
||||||
|
|
||||||
|
logging.info(f"Create new branch based on {submitted_revision.commit}")
|
||||||
|
revision.commit = submitted_revision.commit
|
||||||
|
|
||||||
|
def _rebase_branch_history(self, project, revision):
|
||||||
|
branch, _ = self.projects_info[project]
|
||||||
|
history = self.history[project]
|
||||||
|
revision_index = history.index(revision)
|
||||||
|
for index in range(revision_index + 1, len(history)):
|
||||||
|
revision = history[index]
|
||||||
|
# We are done when we have one non-commited revision
|
||||||
|
if not revision.commit:
|
||||||
|
return
|
||||||
|
logging.info(f"Rebasing {revision} from {branch}")
|
||||||
|
revision.commit = None
|
||||||
|
self.import_revision(revision)
|
||||||
|
|
||||||
|
def import_revision_with_request(self, revision, request):
|
||||||
|
"""Import a single revision via a merge"""
|
||||||
|
|
||||||
|
submitted_revision = self.history.find_revision(
|
||||||
|
request.source, request.revisionid, revision.time
|
||||||
|
)
|
||||||
|
if not submitted_revision:
|
||||||
|
logging.warning(f"Request {request} does not connect to a known revision")
|
||||||
|
return False
|
||||||
|
assert submitted_revision.commit is not None
|
||||||
|
|
||||||
|
# TODO: detect a revision, case in point
|
||||||
|
# Base:System/bash/284 -> rq683701 -> accept O:F/151
|
||||||
|
# -> autocommit Base:System/bash/285
|
||||||
|
# Revert lead to openSUSE:Factory/bash/152
|
||||||
|
# Base:System/286 restored the reverted code in devel project
|
||||||
|
# rq684575 was created and accepted as O:F/153
|
||||||
|
# But the 284-285 and the 285-286 changeset is seen as empty
|
||||||
|
# as the revert was never in Base:System, so the
|
||||||
|
# submitted_revision of 684575 has no commit
|
||||||
|
if submitted_revision.commit == "EMPTY":
|
||||||
|
logging.warning("Empty commit submitted?!")
|
||||||
|
return False
|
||||||
|
|
||||||
|
message = (
|
||||||
|
f"Accepting request {revision.requestid}: {revision.comment}\n\n{revision}"
|
||||||
|
)
|
||||||
|
commit = self.git.merge(
|
||||||
|
# TODO: revision.userid or request.creator?
|
||||||
|
f"OBS User {revision.userid}",
|
||||||
|
"null@suse.de",
|
||||||
|
revision.time,
|
||||||
|
message,
|
||||||
|
submitted_revision.commit,
|
||||||
|
)
|
||||||
|
|
||||||
|
if commit == "EMPTY":
|
||||||
|
logging.warning("Empty merge. Ignoring the revision and the request")
|
||||||
|
self.git.merge_abort()
|
||||||
|
revision.commit = commit
|
||||||
|
return False
|
||||||
|
|
||||||
|
if commit == "CONFLICT":
|
||||||
|
logging.info("Merge conflict. Downloading revision")
|
||||||
|
self.download(revision)
|
||||||
|
message = f"CONFLICT {message}"
|
||||||
|
commit = self.git.merge(
|
||||||
|
f"OBS User {revision.userid}",
|
||||||
|
"null@suse.de",
|
||||||
|
revision.time,
|
||||||
|
message,
|
||||||
|
submitted_revision.commit,
|
||||||
|
merged=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert commit and commit != "CONFLICT"
|
||||||
|
logging.info(f"Merge with {submitted_revision.commit} into {commit}")
|
||||||
|
revision.commit = commit
|
||||||
|
|
||||||
|
# TODO: There are more checks to do, like for example, the
|
||||||
|
# last commit into the non-devel branch should be a merge from
|
||||||
|
# the devel branch
|
||||||
|
if self.rebase_devel:
|
||||||
|
branch, _ = self.projects_info.get(request.source, (None, None))
|
||||||
|
if branch == "devel":
|
||||||
|
self.git.repo.references[f"refs/heads/{branch}"].set_target(commit)
|
||||||
|
self._rebase_branch_history(request.source, submitted_revision)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def matching_request(self, revision):
|
||||||
|
request = self.obs.request(revision.requestid)
|
||||||
|
if not request:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# to be handled by the caller
|
||||||
|
if request.type() != "submit":
|
||||||
|
return request
|
||||||
|
|
||||||
|
if request.source not in self.projects_info:
|
||||||
|
logging.info("Request from a non exported project")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if request.target != revision.project:
|
||||||
|
# This seems to happen when the devel project gets
|
||||||
|
# reinitialized (for example, SR#943593 in 7zip, or
|
||||||
|
# SR#437901 in ColorFull)
|
||||||
|
logging.info("Request target different from current project")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if request.source == request.target:
|
||||||
|
# this is not a merge, but a different way to do a
|
||||||
|
# contribution to the (devel) project - see bindfs's rev 1
|
||||||
|
logging.info("Request within the same project")
|
||||||
|
return None
|
||||||
|
|
||||||
|
return request
|
||||||
|
|
||||||
|
def import_revision(self, revision):
|
||||||
|
"""Import a single revision into git"""
|
||||||
|
project = revision.project
|
||||||
|
branch, api_url = self.projects_info[project]
|
||||||
|
|
||||||
|
logging.info(f"Importing [{revision}] to {branch}")
|
||||||
|
|
||||||
|
self.obs.change_url(api_url)
|
||||||
|
|
||||||
|
# Populate linkrev and replace srcmd5 from the linked
|
||||||
|
# revision. If the expansion fails, the revision will be ignored
|
||||||
|
# and not imported.
|
||||||
|
if not revision.check_expanded():
|
||||||
|
logging.warning(f"Broken revision")
|
||||||
|
revision.ignored = True
|
||||||
|
return
|
||||||
|
|
||||||
|
# When doing a SR, we see also a revision in the origin
|
||||||
|
# project with the outgoing request, but without changes in
|
||||||
|
# the project. We can ignore them.
|
||||||
|
#
|
||||||
|
# If there is a request ID, it will be filtered out later,
|
||||||
|
# when the target project is different from itself.
|
||||||
|
if revision.userid == "autobuild" and not revision.requestid:
|
||||||
|
logging.info("Ignoring autocommit")
|
||||||
|
revision.ignored = True
|
||||||
|
return
|
||||||
|
|
||||||
|
if revision.userid == "buildservice-autocommit":
|
||||||
|
logging.info("Ignoring autocommit")
|
||||||
|
revision.ignored = True
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create the reference if the branch is new. If so return
|
||||||
|
# True.
|
||||||
|
new_branch = self.git.checkout(branch)
|
||||||
|
|
||||||
|
if revision.requestid:
|
||||||
|
request = self.matching_request(revision)
|
||||||
|
if request:
|
||||||
|
if request.type() == "delete":
|
||||||
|
# TODO: after this comes a restore, this should be collapsed
|
||||||
|
# before even hitting git
|
||||||
|
logging.info("Delete request ignored")
|
||||||
|
revision.ignored = True
|
||||||
|
return
|
||||||
|
|
||||||
|
logging.debug(f"Found matching request: #{revision.project} #{request}")
|
||||||
|
if new_branch:
|
||||||
|
self.import_new_revision_with_request(revision, request)
|
||||||
|
return
|
||||||
|
if self.import_revision_with_request(revision, request):
|
||||||
|
return
|
||||||
|
|
||||||
|
# Import revision as a single commit (without merging)
|
||||||
|
self.download(revision)
|
||||||
|
|
||||||
|
if new_branch or self.git.is_dirty():
|
||||||
|
commit = self.git.commit(
|
||||||
|
f"OBS User {revision.userid}",
|
||||||
|
"null@suse.de",
|
||||||
|
revision.time,
|
||||||
|
# TODO: Normalize better the commit message
|
||||||
|
f"{revision.comment}\n\n{revision}",
|
||||||
|
# Create an empty commit only if is a new branch
|
||||||
|
allow_empty=new_branch,
|
||||||
|
)
|
||||||
|
revision.commit = commit
|
||||||
|
logging.info(f"Commit {commit}")
|
||||||
|
else:
|
||||||
|
logging.info("Skip empty commit")
|
||||||
|
revision.ignored = True
|
140
obs.py
Normal file
140
obs.py
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
import osc.core
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import logging
|
||||||
|
import urllib.parse
|
||||||
|
from urllib.error import HTTPError
|
||||||
|
import time
|
||||||
|
import errno
|
||||||
|
|
||||||
|
from request import Request
|
||||||
|
|
||||||
|
# Add a retry wrapper for some of the HTTP actions.
|
||||||
|
def retry(func):
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
retry = 0
|
||||||
|
while retry < 5:
|
||||||
|
try:
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
except HTTPError as e:
|
||||||
|
if 500 <= e.code <= 599:
|
||||||
|
retry += 1
|
||||||
|
logging.warning(
|
||||||
|
f"HTTPError {e.code} -- Retrying {args[0]} ({retry})"
|
||||||
|
)
|
||||||
|
# TODO: remove when move to async
|
||||||
|
time.sleep(0.5)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
except urllib.error.URLError as e:
|
||||||
|
if e.reason.errno in (errno.ENETUNREACH, errno.EADDRNOTAVAIL):
|
||||||
|
retry += 1
|
||||||
|
logging.warning(f"URLError {e} -- Retrying {args[0]} ({retry})")
|
||||||
|
time.sleep(0.5)
|
||||||
|
else:
|
||||||
|
logging.warning(f"URLError {e.errno} uncaught")
|
||||||
|
raise
|
||||||
|
except OSError as e:
|
||||||
|
if e.errno in (
|
||||||
|
errno.ENETUNREACH,
|
||||||
|
errno.EADDRNOTAVAIL,
|
||||||
|
): # sporadically hits cloud VMs :(
|
||||||
|
retry += 1
|
||||||
|
logging.warning(f"OSError {e} -- Retrying {args[0]} ({retry})")
|
||||||
|
# TODO: remove when move to async
|
||||||
|
time.sleep(0.5)
|
||||||
|
else:
|
||||||
|
logging.warning(f"OSError {e.errno} uncaught")
|
||||||
|
raise
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
osc.core.http_GET = retry(osc.core.http_GET)
|
||||||
|
|
||||||
|
|
||||||
|
class OBS:
|
||||||
|
def __init__(self, url=None):
|
||||||
|
if url:
|
||||||
|
self.change_url(url)
|
||||||
|
|
||||||
|
def change_url(self, url):
|
||||||
|
self.url = url
|
||||||
|
osc.conf.get_config(override_apiurl=url)
|
||||||
|
|
||||||
|
def _xml(self, url_path, **params):
|
||||||
|
url = osc.core.makeurl(self.url, [url_path], params)
|
||||||
|
logging.debug(f"GET {url}")
|
||||||
|
return ET.parse(osc.core.http_GET(url)).getroot()
|
||||||
|
|
||||||
|
def _meta(self, project, package, **params):
|
||||||
|
try:
|
||||||
|
root = self._xml(f"source/{project}/{package}/_meta", **params)
|
||||||
|
except HTTPError:
|
||||||
|
logging.error(f"Package [{project}/{package} {params}] has no meta")
|
||||||
|
return None
|
||||||
|
return root
|
||||||
|
|
||||||
|
def _history(self, project, package, **params):
|
||||||
|
try:
|
||||||
|
root = self._xml(f"source/{project}/{package}/_history", **params)
|
||||||
|
except HTTPError:
|
||||||
|
logging.error(f"Package [{project}/{package} {params}] has no history")
|
||||||
|
return None
|
||||||
|
return root
|
||||||
|
|
||||||
|
def _link(self, project, package, rev):
|
||||||
|
try:
|
||||||
|
root = self._xml(f"source/{project}/{package}/_link", rev=rev)
|
||||||
|
except HTTPError:
|
||||||
|
logging.info("Package has no link")
|
||||||
|
return None
|
||||||
|
except ET.ParseError:
|
||||||
|
logging.error(
|
||||||
|
f"Package [{project}/{package} rev={rev}] _link can't be parsed"
|
||||||
|
)
|
||||||
|
return root
|
||||||
|
|
||||||
|
def _request(self, requestid):
|
||||||
|
try:
|
||||||
|
root = self._xml(f"request/{requestid}")
|
||||||
|
except HTTPError:
|
||||||
|
logging.warning(f"Cannot fetch request {requestid}")
|
||||||
|
return None
|
||||||
|
return root
|
||||||
|
|
||||||
|
def exists(self, project, package):
|
||||||
|
root = self._meta(project, package)
|
||||||
|
if root is None:
|
||||||
|
return False
|
||||||
|
return root.get("project") == project
|
||||||
|
|
||||||
|
def devel_project(self, project, package):
|
||||||
|
root = self._meta(project, package)
|
||||||
|
devel = root.find("devel")
|
||||||
|
if devel is None:
|
||||||
|
return None
|
||||||
|
return devel.get("project")
|
||||||
|
|
||||||
|
def request(self, requestid):
|
||||||
|
root = self._request(requestid)
|
||||||
|
if root is not None:
|
||||||
|
return Request().parse(root)
|
||||||
|
|
||||||
|
def files(self, project, package, revision):
|
||||||
|
root = self._xml(f"source/{project}/{package}", rev=revision, expand=1)
|
||||||
|
return [
|
||||||
|
(e.get("name"), int(e.get("size")), e.get("md5"))
|
||||||
|
for e in root.findall("entry")
|
||||||
|
]
|
||||||
|
|
||||||
|
def _download(self, project, package, name, revision):
|
||||||
|
url = osc.core.makeurl(
|
||||||
|
self.url,
|
||||||
|
["source", project, package, urllib.parse.quote(name)],
|
||||||
|
{"rev": revision, "expand": 1},
|
||||||
|
)
|
||||||
|
return osc.core.http_GET(url)
|
||||||
|
|
||||||
|
def download(self, project, package, name, revision, dirpath):
|
||||||
|
with (dirpath / name).open("wb") as f:
|
||||||
|
f.write(self._download(project, package, name, revision).read())
|
105
proxy_sha256.py
Normal file
105
proxy_sha256.py
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
import logging
|
||||||
|
import requests
|
||||||
|
import urllib
|
||||||
|
import functools
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
|
||||||
|
def _hash(hash_alg, file_or_path):
|
||||||
|
h = hash_alg()
|
||||||
|
|
||||||
|
def __hash(f):
|
||||||
|
while chunk := f.read(1024 * 4):
|
||||||
|
h.update(chunk)
|
||||||
|
|
||||||
|
if hasattr(file_or_path, "read"):
|
||||||
|
__hash(file_or_path)
|
||||||
|
else:
|
||||||
|
with file_or_path.open("rb") as f:
|
||||||
|
__hash(f)
|
||||||
|
return h.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
md5 = functools.partial(_hash, hashlib.md5)
|
||||||
|
sha256 = functools.partial(_hash, hashlib.sha256)
|
||||||
|
|
||||||
|
|
||||||
|
class ProxySHA256:
|
||||||
|
def __init__(self, obs, url=None, enabled=True):
|
||||||
|
self.obs = obs
|
||||||
|
self.url = url if url else "http://source.dyn.cloud.suse.de"
|
||||||
|
self.enabled = enabled
|
||||||
|
self.hashes = None
|
||||||
|
self.texts = set()
|
||||||
|
|
||||||
|
def load_package(self, package):
|
||||||
|
# _project is unreachable for the proxy - due to being a fake package
|
||||||
|
if package == "_project":
|
||||||
|
self.enabled = False
|
||||||
|
self.texts = set(["_config", "_service"])
|
||||||
|
self.hashes = dict()
|
||||||
|
return
|
||||||
|
logging.info("Retrieve all previously defined SHA256")
|
||||||
|
response = requests.get(f"http://source.dyn.cloud.suse.de/package/{package}")
|
||||||
|
if response.status_code == 200:
|
||||||
|
json = response.json()
|
||||||
|
self.hashes = json["shas"]
|
||||||
|
self.texts = set(json["texts"])
|
||||||
|
|
||||||
|
def get(self, package, name, file_md5):
|
||||||
|
key = f"{file_md5}-{name}"
|
||||||
|
if self.hashes is None:
|
||||||
|
if self.enabled:
|
||||||
|
self.load_package(package)
|
||||||
|
else:
|
||||||
|
self.hashes = {}
|
||||||
|
return self.hashes.get(key, None)
|
||||||
|
|
||||||
|
def _proxy_put(self, project, package, name, revision, file_md5, size):
|
||||||
|
quoted_name = urllib.parse.quote(name)
|
||||||
|
url = f"{self.obs.url}/public/source/{project}/{package}/{quoted_name}?rev={revision}"
|
||||||
|
response = requests.put(
|
||||||
|
self.url,
|
||||||
|
data={
|
||||||
|
"hash": file_md5,
|
||||||
|
"filename": name,
|
||||||
|
"url": url,
|
||||||
|
"package": package,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f"Redirector error on {self.url} for {url}")
|
||||||
|
|
||||||
|
key = (file_md5, name)
|
||||||
|
self.hashes[key] = {
|
||||||
|
"sha256": response.content.decode("utf-8"),
|
||||||
|
"fsize": size,
|
||||||
|
}
|
||||||
|
return self.hashes[key]
|
||||||
|
|
||||||
|
def _obs_put(self, project, package, name, revision, file_md5, size):
|
||||||
|
key = (file_md5, name)
|
||||||
|
self.hashes[key] = {
|
||||||
|
"sha256": sha256(self.obs._download(project, package, name, revision)),
|
||||||
|
"fsize": size,
|
||||||
|
}
|
||||||
|
return self.hashes[key]
|
||||||
|
|
||||||
|
def put(self, project, package, name, revision, file_md5, size):
|
||||||
|
if not self.enabled:
|
||||||
|
return self._obs_put(project, package, name, revision, file_md5, size)
|
||||||
|
return self._proxy_put(project, package, name, revision, file_md5, size)
|
||||||
|
|
||||||
|
def is_text(self, filename):
|
||||||
|
return filename in self.texts
|
||||||
|
|
||||||
|
def get_or_put(self, project, package, name, revision, file_md5, size):
|
||||||
|
result = self.get(package, name, file_md5)
|
||||||
|
if not result:
|
||||||
|
result = self.put(project, package, name, revision, file_md5, size)
|
||||||
|
|
||||||
|
# Sanity check
|
||||||
|
if result["fsize"] != size:
|
||||||
|
raise Exception(f"Redirector has different size for {name}")
|
||||||
|
|
||||||
|
return result
|
Loading…
Reference in New Issue
Block a user