#!/usr/bin/env python3 import argparse import asyncio import datetime import errno import fnmatch import functools import hashlib import itertools import logging import os import pathlib import re import shutil import subprocess import sys import time import urllib.parse import xml.etree.ElementTree as ET from urllib.error import HTTPError import osc.core import pygit2 import requests from osclib.cache import Cache # Add a retry wrapper for some of the HTTP actions. def retry(func): def wrapper(*args, **kwargs): retry = 0 while retry < 5: try: return func(*args, **kwargs) except HTTPError as e: if 500 <= e.code <= 599: retry += 1 logging.warning( f"HTTPError {e.code} -- Retrying {args[0]} ({retry})" ) # TODO: remove when move to async time.sleep(0.5) else: raise except urllib.error.URLError as e: if e.reason.errno in (errno.ENETUNREACH, errno.EADDRNOTAVAIL): retry += 1 logging.warning(f"URLError {e} -- Retrying {args[0]} ({retry})") time.sleep(0.5) else: logging.warning(f"URLError {e.errno} uncaught") raise except OSError as e: if e.errno in (errno.ENETUNREACH, errno.EADDRNOTAVAIL): # sporadically hits cloud VMs :( retry += 1 logging.warning(f"OSError {e} -- Retrying {args[0]} ({retry})") # TODO: remove when move to async time.sleep(0.5) else: logging.warning(f"OSError {e.errno} uncaught") raise return wrapper osc.core.http_GET = retry(osc.core.http_GET) BINARY = { ".7z", ".bsp", ".bz2", ".gem", ".gz", ".jar", ".lz", ".lzma", ".obscpio", ".oxt", ".pdf", ".png", ".rpm", ".tbz", ".tbz2", ".tgz", ".ttf", ".txz", ".whl", ".xz", ".zip", ".zst", } LFS_SUFFIX = "filter=lfs diff=lfs merge=lfs -text" URL_OBS = "https://api.opensuse.org" URL_IBS = "https://api.suse.de" # The order is relevant (from older to newer initial codebase) PROJECTS = [ ("openSUSE:Factory", "factory", URL_OBS), # ("SUSE:SLE-12:GA", "SLE_12", URL_IBS), # ("SUSE:SLE-12:Update", "SLE_12", URL_IBS), # ("SUSE:SLE-12-SP1:GA", "SLE_12_SP1", URL_IBS), # ("SUSE:SLE-12-SP1:Update", "SLE_12_SP1", URL_IBS), # ("SUSE:SLE-12-SP2:GA", "SLE_12_SP2", URL_IBS), # ("SUSE:SLE-12-SP2:Update", "SLE_12_SP2", URL_IBS), # ("SUSE:SLE-12-SP3:GA", "SLE_12_SP3", URL_IBS), # ("SUSE:SLE-12-SP3:Update", "SLE_12_SP3", URL_IBS), # ("SUSE:SLE-12-SP4:GA", "SLE_12_SP4", URL_IBS), # ("SUSE:SLE-12-SP4:Update", "SLE_12_SP4", URL_IBS), # ("SUSE:SLE-12-SP5:GA", "SLE_12_SP5", URL_IBS), # ("SUSE:SLE-12-SP5:Update", "SLE_12_SP5", URL_IBS), # ("SUSE:SLE-15:GA", "SLE_15", URL_IBS), # ("SUSE:SLE-15:Update", "SLE_15", URL_IBS), # ("SUSE:SLE-15-SP1:GA", "SLE_15_SP1", URL_IBS), # ("SUSE:SLE-15-SP1:Update", "SLE_15_SP1", URL_IBS), # ("SUSE:SLE-15-SP2:GA", "SLE_15_SP2", URL_IBS), # ("SUSE:SLE-15-SP2:Update", "SLE_15_SP2", URL_IBS), # ("SUSE:SLE-15-SP3:GA", "SLE_15_SP3", URL_IBS), # ("SUSE:SLE-15-SP3:Update", "SLE_15_SP3", URL_IBS), # ("SUSE:SLE-15-SP4:GA", "SLE_15_SP4", URL_IBS), # ("SUSE:SLE-15-SP4:Update", "SLE_15_SP4", URL_IBS), ] def is_binary_or_large(filename, size): """Decide if is a binary file based on the extension or size""" binary_suffix = BINARY non_binary_suffix = { ".1", ".8", ".SUSE", ".asc", ".c", ".cabal", ".cfg", ".changes", ".conf", ".desktop", ".dif", ".diff", ".dsc", ".el", ".html", ".in", ".init", ".install", ".keyring", ".kiwi", ".logrotate", ".macros", ".md", ".obsinfo", ".pamd", ".patch", ".pl", ".pom", ".py", ".rpmlintrc", ".rules", ".script", ".service", ".sh", ".sig", ".sign", ".spec", ".sysconfig", ".test", ".txt", ".xml", ".xml", ".yml", } suffix = pathlib.Path(filename).suffix if suffix in binary_suffix: return True if suffix in non_binary_suffix: return False if size >= 6 * 1024: return True return False def _hash(hash_alg, file_or_path): h = hash_alg() def __hash(f): while chunk := f.read(1024 * 4): h.update(chunk) if hasattr(file_or_path, "read"): __hash(file_or_path) else: with file_or_path.open("rb") as f: __hash(f) return h.hexdigest() md5 = functools.partial(_hash, hashlib.md5) sha256 = functools.partial(_hash, hashlib.sha256) def _files_hash(hash_alg, dirpath): """List of (filepath, md5) for a directory""" # TODO: do it async or multythread files = [f for f in dirpath.iterdir() if f.is_file()] return [(f.parts[-1], hash_alg(f)) for f in files] files_md5 = functools.partial(_files_hash, md5) files_sha256 = functools.partial(_files_hash, sha256) class Git: """Local git repository""" def __init__(self, path, committer=None, committer_email=None): self.path = pathlib.Path(path) self.committer = committer self.committer_email = committer_email self.repo = None def is_open(self): return self.repo is not None # TODO: Extend it to packages and files def exists(self): """Check if the path is a valid git repository""" return (self.path / ".git").exists() def create(self): """Create a local git repository""" self.path.mkdir(parents=True, exist_ok=True) # Convert the path to string, to avoid some limitations in # older pygit2 self.repo = pygit2.init_repository(str(self.path)) return self def is_dirty(self): """Check if there is something to commit""" assert self.is_open() return self.repo.status() def branches(self): return list(self.repo.branches) def branch(self, branch, commit=None): if not commit: commit = self.repo.head else: commit = self.repo.get(commit) self.repo.branches.local.create(branch, commit) def checkout(self, branch): """Checkout into the branch HEAD""" new_branch = False ref = f"refs/heads/{branch}" if branch not in self.branches(): self.repo.references["HEAD"].set_target(ref) new_branch = True else: self.repo.checkout(ref) return new_branch def commit( self, user, user_email, user_time, message, parents=None, committer=None, committer_email=None, committer_time=None, allow_empty=False, ): """Add all the files and create a new commit in the current HEAD""" assert allow_empty or self.is_dirty() if not committer: committer = self.committer if self.committer else self.user committer_email = ( self.committer_email if self.committer_email else self.user_email ) committer_time = committer_time if committer_time else user_time try: self.repo.index.add_all() except pygit2.GitError as e: if not allow_empty: raise e self.repo.index.write() author = pygit2.Signature(user, user_email, int(user_time.timestamp())) committer = pygit2.Signature( committer, committer_email, int(committer_time.timestamp()) ) if not parents: try: parents = [self.repo.head.target] except pygit2.GitError as e: parents = [] if not allow_empty: raise e tree = self.repo.index.write_tree() return self.repo.create_commit( "HEAD", author, committer, message, tree, parents ) def merge( self, user, user_email, user_time, message, commit, committer=None, committer_email=None, committer_time=None, clean_on_conflict=True, merged=False, allow_empty=False, ): new_branch = False if not merged: try: self.repo.merge(commit) except KeyError: # If it is the first commit, we will have a missing # "HEAD", but the files will be there. We can proceed # to the commit directly. new_branch = True if not merged and self.repo.index.conflicts: for conflict in self.repo.index.conflicts: conflict = [c for c in conflict if c] if conflict: logging.info(f"CONFLICT {conflict[0].path}") if clean_on_conflict: self.clean() # Now I miss Rust enums return "CONFLICT" # Some merges are empty in OBS (no changes, not sure # why), for now we signal them if not allow_empty and not self.is_dirty(): # I really really do miss Rust enums return "EMPTY" if new_branch: parents = [commit] else: parents = [ self.repo.head.target, commit, ] commit = self.commit( user, user_email, user_time, message, parents, committer, committer_email, committer_time, allow_empty=allow_empty, ) return commit def merge_abort(self): self.repo.state_cleanup() def last_commit(self): try: return self.repo.head.target except: return None def gc(self): logging.info(f"Garbage recollec and repackage {self.path}") subprocess.run( ["git", "gc", "--auto"], cwd=self.path, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) def clean(self): for path, _ in self.repo.status().items(): logging.debug(f"Cleaning {path}") try: (self.path / path).unlink() self.repo.index.remove(path) except Exception as e: logging.warning(f"Error removing file {path}: {e}") def add(self, filename): self.repo.index.add(filename) def add_default_lfs_gitattributes(self, force=False): if not (self.path / ".gitattributes").exists() or force: with (self.path / ".gitattributes").open("w") as f: content = ["## Default LFS"] content += [f"*{b} {LFS_SUFFIX}" for b in sorted(BINARY)] f.write("\n".join(content)) f.write("\n") self.add(".gitattributes") def add_specific_lfs_gitattributes(self, binaries): self.add_default_lfs_gitattributes(force=True) if binaries: with (self.path / ".gitattributes").open("a") as f: content = ["## Specific LFS patterns"] content += [f"{b} {LFS_SUFFIX}" for b in sorted(binaries)] f.write("\n".join(content)) f.write("\n") self.add(".gitattributes") def get_specific_lfs_gitattributes(self): with (self.path / ".gitattributes").open() as f: patterns = [ line.split()[0] for line in f if line.strip() and not line.startswith("#") ] binary = {f"*{b}" for b in BINARY} return [p for p in patterns if p not in binary] def add_lfs(self, filename, sha256, size): with (self.path / filename).open("w") as f: f.write("version https://git-lfs.github.com/spec/v1\n") f.write(f"oid sha256:{sha256}\n") f.write(f"size {size}\n") self.add(filename) if not self.is_lfs_tracked(filename): logging.debug(f"Add specific LFS file {filename}") specific_patterns = self.get_specific_lfs_gitattributes() specific_patterns.append(filename) self.add_specific_lfs_gitattributes(specific_patterns) def is_lfs_tracked(self, filename): with (self.path / ".gitattributes").open() as f: patterns = ( line.split()[0] for line in f if line.strip() and not line.startswith("#") ) return any(fnmatch.fnmatch(filename, line) for line in patterns) def remove(self, filename): self.repo.index.remove(filename) (self.path / filename).unlink() patterns = self.get_specific_lfs_gitattributes() if filename in patterns: patterns.remove(filename) self.add_specific_lfs_gitattributes(patterns) class OBS: def __init__(self, url=None): if url: self.change_url(url) def change_url(self, url): self.url = url osc.conf.get_config(override_apiurl=url) def _xml(self, url_path, **params): url = osc.core.makeurl(self.url, [url_path], params) logging.debug(f"GET {url}") return ET.parse(osc.core.http_GET(url)).getroot() def _meta(self, project, package, **params): try: root = self._xml(f"source/{project}/{package}/_meta", **params) except HTTPError: logging.error(f"Package [{project}/{package} {params}] has no meta") return None return root def _history(self, project, package, **params): try: root = self._xml(f"source/{project}/{package}/_history", **params) except HTTPError: logging.error(f"Package [{project}/{package} {params}] has no history") return None return root def _link(self, project, package, rev): try: root = self._xml(f"source/{project}/{package}/_link", rev=rev) except HTTPError: logging.info("Package has no link") return None except ET.ParseError: logging.error( f"Package [{project}/{package} rev={rev}] _link can't be parsed" ) return root def _request(self, requestid): try: root = self._xml(f"request/{requestid}") except HTTPError: logging.warning(f"Cannot fetch request {requestid}") return None return root def exists(self, project, package): root = self._meta(project, package) if root is None: return False return root.get("project") == project def devel_project(self, project, package): root = self._meta(project, package) devel = root.find("devel") if devel is None: return None return devel.get("project") def request(self, requestid): root = self._request(requestid) if root is not None: return Request().parse(root) def files(self, project, package, revision): root = self._xml(f"source/{project}/{package}", rev=revision, expand=1) return [ (e.get("name"), int(e.get("size")), e.get("md5")) for e in root.findall("entry") ] def _download(self, project, package, name, revision): url = osc.core.makeurl( self.url, ["source", project, package, urllib.parse.quote(name)], {"rev": revision, "expand": 1}, ) return osc.core.http_GET(url) def download(self, project, package, name, revision, dirpath): with (dirpath / name).open("wb") as f: f.write(self._download(project, package, name, revision).read()) class ProxySHA256: def __init__(self, obs, url=None, enabled=True): self.obs = obs self.url = url if url else "http://source.dyn.cloud.suse.de" self.enabled = enabled self.hashes = None self.texts = set() def load_package(self, package): # _project is unreachable for the proxy - due to being a fake package if package == "_project": self.enabled = False self.texts = set(["_config", "_service"]) self.hashes = dict() return logging.info("Retrieve all previously defined SHA256") response = requests.get(f"http://source.dyn.cloud.suse.de/package/{package}") if response.status_code == 200: json = response.json() self.hashes = json["shas"] self.texts = set(json["texts"]) def get(self, package, name, file_md5): key = f"{file_md5}-{name}" if self.hashes is None: if self.enabled: self.load_package(package) else: self.hashes = {} return self.hashes.get(key, None) def _proxy_put(self, project, package, name, revision, file_md5, size): quoted_name = urllib.parse.quote(name) url = f"{self.obs.url}/public/source/{project}/{package}/{quoted_name}?rev={revision}" response = requests.put( self.url, data={ "hash": file_md5, "filename": name, "url": url, "package": package, }, ) if response.status_code != 200: raise Exception(f"Redirector error on {self.url} for {url}") key = (file_md5, name) self.hashes[key] = { "sha256": response.content.decode("utf-8"), "fsize": size, } return self.hashes[key] def _obs_put(self, project, package, name, revision, file_md5, size): key = (file_md5, name) self.hashes[key] = { "sha256": sha256(self.obs._download(project, package, name, revision)), "fsize": size, } return self.hashes[key] def put(self, project, package, name, revision, file_md5, size): if not self.enabled: return self._obs_put(project, package, name, revision, file_md5, size) return self._proxy_put(project, package, name, revision, file_md5, size) def is_text(self, filename): return filename in self.texts def get_or_put(self, project, package, name, revision, file_md5, size): result = self.get(package, name, file_md5) if not result: result = self.put(project, package, name, revision, file_md5, size) # Sanity check if result["fsize"] != size: raise Exception(f"Redirector has different size for {name}") return result class Request: def parse(self, xml): self.requestid = int(xml.get("id")) self.creator = xml.get("creator") self.type_ = xml.find("action").get("type") if self.type_ == "delete": # not much to do return self self.source = xml.find("action/source").get("project") # expanded MD5 or commit revision self.revisionid = xml.find("action/source").get("rev") self.target = xml.find("action/target").get("project") self.state = xml.find("state").get("name") # TODO: support muti-action requests # TODO: parse review history # TODO: add description return self def type(self): return self.type_ def __str__(self): return f"Req {self.requestid} {self.creator} {self.type_} {self.source}->{self.target} {self.state}" def __repr__(self): return f"[{self.__str__()}]" class Revision: def __init__(self, obs, history, project, package): self.obs = obs self.history = history self.project = project self.package = package self.commit = None self.ignored = False def parse(self, xml): self.rev = int(xml.get("rev")) # Replaced in check_expanded self.srcmd5 = xml.find("srcmd5").text self.version = xml.find("version").text time = int(xml.find("time").text) self.time = datetime.datetime.fromtimestamp(time) userid = xml.find("user") if userid is not None: self.userid = userid.text else: self.userid = "unknown" comment = xml.find("comment") if comment is not None: self.comment = comment.text or "" else: self.comment = "" # Populated by check_link self.linkrev = None self.requestid = None requestid = xml.find("requestid") if requestid is not None: self.requestid = int(requestid.text) else: # Sometimes requestid is missing, but can be extracted # from "comment" matched = re.match( r"^Copy from .* based on submit request (\d+) from user .*$", self.comment, ) if matched: self.requestid = int(matched.group(1)) return self def __str__(self): return f"Rev {self.project}/{self.rev} Md5 {self.srcmd5} {self.time} {self.userid} {self.requestid}" def __repr__(self): return f"[{self.__str__()}]" def check_link(self): """Add 'linkrev' attribute into the revision. Returns False if the link is invalid""" try: root = self.obs._xml( f"source/{self.project}/{self.package}/_link", rev=self.srcmd5 ) except HTTPError as e: if e.code == 404: logging.debug("No _link for the revision") return True raise e except ET.ParseError: logging.error( f"_link can't be parsed [{self.project}/{self.package} rev={self.srcmd5}]" ) return False target_project = root.get("project") rev = self.history.find_last_rev_after_time(target_project, self.time) if rev: logging.debug(f"Linkrev found: {rev}") self.linkrev = rev.srcmd5 return True def check_expanded(self): # Even if it's not a link we still need to check the expanded # srcmd5 as it's possible used in submit requests if not self.check_link(): return False # If there is a "linkrev", "rev" is ignored params = {"rev": self.srcmd5, "expand": "1"} if self.linkrev: params["linkrev"] = self.linkrev try: root = self.obs._xml(f"source/{self.project}/{self.package}", **params) except HTTPError as e: if e.code == 400: logging.error(f"Package [{self.project}/{self.package} {params}] can't be expanded: {e}") return False raise e self.srcmd5 = root.get("srcmd5") return True class History: """Store the history of revisions of a package in different projects. """ def __init__(self, obs, package): self.obs = obs self.package = package self.revisions = {} def __contains__(self, project): return project in self.revisions def __getitem__(self, project): return self.revisions[project] def _extract_copypac(self, comment): original_project = re.findall( r"osc copypac from project:(.*) package:", comment ) return original_project[0] if original_project else None def _fetch_revisions(self, project, **params): root = self.obs._history(project, self.package, **params) if root is not None: return [ Revision(self.obs, self, project, self.package).parse(r) for r in root.findall("revision") ] def fetch_revisions(self, project, follow_copypac=False): """Get the revision history of a package""" if project in self: return revs = self._fetch_revisions(project) self.revisions[project] = revs # while ( # revs # and follow_copypac # and (copypac_project := self._extract_copypac(revs[0].comment)) # ): # # Add the history pre-copypac # # TODO: missing the old project name # revs = self._fetch_revisions(copypac_project, deleted=1) # self.revisions[project] = ( # revs + self.revisions[project] # ) def fetch_all_revisions(self, projects): """Pre-populate the history""" for project, _, api_url in projects: self.obs.change_url(api_url) self.fetch_revisions(project) def sort_all_revisions(self): """Sort revisions for all projects, from older to newer""" return sorted(itertools.chain(*self.revisions.values()), key=lambda x: x.time) def find_revision(self, project, revisionid, accepted_at): last_commited_revision = None for r in self.revisions.get(project, []): logging.debug(f"Find revision {revisionid} [{accepted_at}]: {r}") if str(r.rev) == str(revisionid) or r.srcmd5 == revisionid: if r.ignored: logging.debug( f"{r} fits but is ignored, returning {last_commited_revision}" ) return last_commited_revision else: logging.debug(f"{r} fits") return r if r.time > accepted_at: # if we can't find the right revision, we take the last # commit. Before ~2012 the data was tracked really loosely # (e.g. using different timezones and the state field was # only introduced in 2016...) logging.warning( f"Deploying workaround for missing request revision - returning {last_commited_revision}" ) return last_commited_revision if r.commit: last_commited_revision = r logging.info("No commited revision found, returning None") return None def find_last_rev_after_time(self, project, time): # revs = self.projects.get(project, []) # return next((r for r in reversed(revs) if r.time <= time), None) prev = None for rev in self.revisions.get(project, []): if rev.time > time: return prev if rev.time == time: return rev prev = rev return prev class Importer: def __init__(self, projects, package, repodir, search_ancestor, rebase_devel): # The idea is to create each commit in order, and draw the # same graph described by the revisions timeline. For that we # need first to fetch all the revisions and sort them # linearly, based on the timestamp. # # After that we recreate the commits, and if one revision is a # request that contains a target inside the projects in the # "history", we create a merge commit. # # Optionally, if a flag is set, we will try to find a common # "Initial commit" from a reference branch (the first one in # "projects", that is safe to assume to be "openSUSE:Factory". # This is not always a good idea. For example, in a normal # situation the "devel" project history is older than # "factory", and we can root the tree on it. But for some # other projects we lost partially the "devel" history project # (could be moved), and "factory" is not the root. self.package = package self.search_ancestor = search_ancestor self.rebase_devel = rebase_devel self.obs = OBS() self.git = Git( repodir, committer="Git OBS Bridge", committer_email="obsbridge@suse.de", ).create() self.proxy_sha256 = ProxySHA256(self.obs, enabled=True) self.history = History(self.obs, self.package) # Add the "devel" project (project, branch, api_url) = projects[0] assert project == "openSUSE:Factory" self.obs.change_url(api_url) devel_project = self.obs.devel_project(project, package) if devel_project: self.projects = [(devel_project, "devel", api_url)] + projects else: self.projects = projects # Associate the branch and api_url information per project self.projects_info = { project: (branch, api_url) for (project, branch, api_url) in self.projects } def download(self, revision): obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5) git_files = { (f.name, f.stat().st_size, md5(f)) for f in self.git.path.iterdir() if f.is_file() and f.name not in (".gitattributes") } # Overwrite ".gitattributes" with the self.git.add_default_lfs_gitattributes(force=True) # Download each file in OBS if it is not a binary (or large) # file for (name, size, file_md5) in obs_files: # this file creates easily 100k commits and is just useless data :( # unfortunately it's stored in the same meta package as the project config if revision.package == "_project" and name == "_staging_workflow": continue # have such files been detected as text mimetype before? is_text = self.proxy_sha256.is_text(name) if not is_text and is_binary_or_large(name, size): file_sha256 = self.proxy_sha256.get_or_put( revision.project, revision.package, name, revision.srcmd5, file_md5, size, ) self.git.add_lfs(name, file_sha256["sha256"], size) else: if (name, size, file_md5) not in git_files: print(f"Download {name}") self.obs.download( revision.project, revision.package, name, revision.srcmd5, self.git.path, ) # Validate the MD5 of the downloaded file if md5(self.git.path / name) != file_md5: raise Exception(f"Download error in {name}") self.git.add(name) # Remove extra files obs_names = {n for (n, _, _) in obs_files} git_names = {n for (n, _, _) in git_files} for name in git_names - obs_names: print(f"Remove {name}") self.git.remove(name) def import_all_revisions(self, gc): # Fetch all the requests and sort them. Ideally we should # build the graph here, to avoid new commits before the merge. # For now we will sort them and invalidate the commits if # "rebase_devel" is set. self.history.fetch_all_revisions(self.projects) revisions = self.history.sort_all_revisions() logging.debug(f"Selected import order for {self.package}") for revision in revisions: logging.debug(revision) gc_cnt = gc for revision in revisions: gc_cnt -= 1 if gc_cnt <= 0 and gc: self.git.gc() gc_cnt = gc self.import_revision(revision) def import_new_revision_with_request(self, revision, request): """Create a new branch as a result of a merge""" submitted_revision = self.history.find_revision( request.source, request.revisionid, revision.time ) if not submitted_revision: logging.warning(f"Request {request} does not connect to a known revision") return False if not submitted_revision.commit: # If the revision appointed by the request is not part of # the git history, we can have an ordering problem. One # example is "premake4". self.import_revision(submitted_revision) assert submitted_revision.commit is not None project = revision.project branch, _ = self.projects_info[project] # TODO: add an empty commit marking the acceptenace of the request (see discussion in PR 2858) self.git.branch(branch, submitted_revision.commit) self.git.clean() self.git.checkout(branch) logging.info(f"Create new branch based on {submitted_revision.commit}") revision.commit = submitted_revision.commit def _rebase_branch_history(self, project, revision): branch, _ = self.projects_info[project] history = self.history[project] revision_index = history.index(revision) for index in range(revision_index + 1, len(history)): revision = history[index] # We are done when we have one non-commited revision if not revision.commit: return logging.info(f"Rebasing {revision} from {branch}") revision.commit = None self.import_revision(revision) def import_revision_with_request(self, revision, request): """Import a single revision via a merge""" submitted_revision = self.history.find_revision( request.source, request.revisionid, revision.time ) if not submitted_revision: logging.warning(f"Request {request} does not connect to a known revision") return False assert submitted_revision.commit is not None # TODO: detect a revision, case in point # Base:System/bash/284 -> rq683701 -> accept O:F/151 # -> autocommit Base:System/bash/285 # Revert lead to openSUSE:Factory/bash/152 # Base:System/286 restored the reverted code in devel project # rq684575 was created and accepted as O:F/153 # But the 284-285 and the 285-286 changeset is seen as empty # as the revert was never in Base:System, so the # submitted_revision of 684575 has no commit if submitted_revision.commit == "EMPTY": logging.warning("Empty commit submitted?!") return False message = ( f"Accepting request {revision.requestid}: {revision.comment}\n\n{revision}" ) commit = self.git.merge( # TODO: revision.userid or request.creator? f"OBS User {revision.userid}", "null@suse.de", revision.time, message, submitted_revision.commit, ) if commit == "EMPTY": logging.warning("Empty merge. Ignoring the revision and the request") self.git.merge_abort() revision.commit = commit return False if commit == "CONFLICT": logging.info("Merge conflict. Downloading revision") self.download(revision) message = f"CONFLICT {message}" commit = self.git.merge( f"OBS User {revision.userid}", "null@suse.de", revision.time, message, submitted_revision.commit, merged=True, ) assert commit and commit != "CONFLICT" logging.info(f"Merge with {submitted_revision.commit} into {commit}") revision.commit = commit # TODO: There are more checks to do, like for example, the # last commit into the non-devel branch should be a merge from # the devel branch if self.rebase_devel: branch, _ = self.projects_info.get(request.source, (None, None)) if branch == "devel": self.git.repo.references[f"refs/heads/{branch}"].set_target(commit) self._rebase_branch_history(request.source, submitted_revision) return True def matching_request(self, revision): request = self.obs.request(revision.requestid) if not request: return None # to be handled by the caller if request.type() != "submit": return request if request.source not in self.projects_info: logging.info("Request from a non exported project") return None if request.target != revision.project: # This seems to happen when the devel project gets # reinitialized (for example, SR#943593 in 7zip, or # SR#437901 in ColorFull) logging.info("Request target different from current project") return None if request.source == request.target: # this is not a merge, but a different way to do a # contribution to the (devel) project - see bindfs's rev 1 logging.info("Request within the same project") return None return request def import_revision(self, revision): """Import a single revision into git""" project = revision.project branch, api_url = self.projects_info[project] logging.info(f"Importing [{revision}] to {branch}") self.obs.change_url(api_url) # Populate linkrev and replace srcmd5 from the linked # revision. If the expansion fails, the revision will be ignored # and not imported. if not revision.check_expanded(): logging.warning(f"Broken revision") revision.ignored = True return # When doing a SR, we see also a revision in the origin # project with the outgoing request, but without changes in # the project. We can ignore them. # # If there is a request ID, it will be filtered out later, # when the target project is different from itself. if revision.userid == "autobuild" and not revision.requestid: logging.info("Ignoring autocommit") revision.ignored = True return if revision.userid == "buildservice-autocommit": logging.info("Ignoring autocommit") revision.ignored = True return # Create the reference if the branch is new. If so return # True. new_branch = self.git.checkout(branch) if revision.requestid: request = self.matching_request(revision) if request: if request.type() == "delete": # TODO: after this comes a restore, this should be collapsed # before even hitting git logging.info("Delete request ignored") revision.ignored = True return logging.debug(f"Found matching request: #{revision.project} #{request}") if new_branch: self.import_new_revision_with_request(revision, request) return if self.import_revision_with_request(revision, request): return # Import revision as a single commit (without merging) self.download(revision) if new_branch or self.git.is_dirty(): commit = self.git.commit( f"OBS User {revision.userid}", "null@suse.de", revision.time, # TODO: Normalize better the commit message f"{revision.comment}\n\n{revision}", # Create an empty commit only if is a new branch allow_empty=new_branch, ) revision.commit = commit logging.info(f"Commit {commit}") else: logging.info("Skip empty commit") revision.ignored = True def main(): parser = argparse.ArgumentParser(description="OBS history importer into git") parser.add_argument("package", help="OBS package name") parser.add_argument( "-r", "--repodir", required=False, type=pathlib.Path, help="Local git repository directory", ) parser.add_argument( "-f", "--force", action="store_true", help="If the repository directory exists, remove it", ) parser.add_argument( "-a", "--search-ancestor", action="store_true", help="Search closest ancestor candidate for initial commit", ) parser.add_argument( "-d", "--rebase-devel", action="store_true", help="The devel project with be rebased after a merge", ) parser.add_argument( "-g", "--gc", metavar="N", type=int, default=200, help="Garbage recollect and pack the git history each N commits", ) parser.add_argument( "--level", "-l", default="INFO", help="logging level", ) args = parser.parse_args() if args.level: numeric_level = getattr(logging, args.level.upper(), None) if not isinstance(numeric_level, int): print(f"Invalid log level: {args.level}") sys.exit(-1) logging.basicConfig(level=numeric_level) if numeric_level == logging.DEBUG: osc.conf.config["debug"] = True requests_log = logging.getLogger("requests.packages.urllib3") requests_log.setLevel(logging.DEBUG) requests_log.propagate = True if not args.repodir: args.repodir = pathlib.Path(args.package) if args.repodir.exists() and not args.force: print(f"Repository {args.repodir} already present") sys.exit(-1) elif args.repodir.exists() and args.force: logging.info(f"Removing old repository {args.repodir}") shutil.rmtree(args.repodir) Cache.init() # TODO: use a CLI parameter to describe the projects importer = Importer( PROJECTS, args.package, args.repodir, args.search_ancestor, args.rebase_devel ) importer.import_all_revisions(args.gc) if __name__ == "__main__": main()