From ed4b7367ebf84964ccc7224e310380c088aae9a3 Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Thu, 3 Nov 2022 10:37:26 +0100 Subject: [PATCH] Reset branch if the devel branch is based on Factory This happens in packages that change their devel project over time. Then the commit in the devel project no longer has the parent in the devel branch but is based on factory --- git-importer.py | 4 ++- lib/git.py | 3 ++ lib/git_exporter.py | 84 ++++++++++++++++++++++----------------------- lib/obs.py | 5 ++- lib/proxy_sha256.py | 2 +- 5 files changed, 50 insertions(+), 48 deletions(-) diff --git a/git-importer.py b/git-importer.py index 12b1910..d5c17f6 100755 --- a/git-importer.py +++ b/git-importer.py @@ -105,7 +105,9 @@ def main(): importer = Importer(URL_OBS, "openSUSE:Factory", args.package) importer.import_into_db() - exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir, args.cachedir) + exporter = GitExporter( + URL_OBS, "openSUSE:Factory", args.package, args.repodir, args.cachedir + ) exporter.set_gc_interval(args.gc) exporter.export_as_git() diff --git a/lib/git.py b/lib/git.py index 01d0a15..a02b3ec 100644 --- a/lib/git.py +++ b/lib/git.py @@ -184,6 +184,9 @@ class Git: def branch_head(self, branch): return self.repo.references["refs/heads/" + branch].target + def set_branch_head(self, branch, commit): + self.repo.references["refs/heads/" + branch].set_target(commit) + def gc(self): logging.info(f"Garbage recollect and repackage {self.path}") subprocess.run( diff --git a/lib/git_exporter.py b/lib/git_exporter.py index 88cfc23..93d1065 100644 --- a/lib/git_exporter.py +++ b/lib/git_exporter.py @@ -28,22 +28,6 @@ class GitExporter: self.gc_interval = 200 self.cachedir = cachedir - def download(self, revision): - obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5) - git_files = { - (f.name, f.stat().st_size, md5(f)) - for f in self.git.path.iterdir() - if f.is_file() and f.name not in (".gitattributes") - } - - # Overwrite ".gitattributes" with the - self.git.add_default_lfs_gitattributes(force=True) - - # Download each file in OBS if it is not a binary (or large) - # file - for name in obs_files: - self.git.add(name) - def set_gc_interval(self, gc): self.gc_interval = gc @@ -85,44 +69,58 @@ class GitExporter: logging.debug(f"Committing {flat}") self.commit_flat(flat, branch_state) + def commit_file(self, flat, file, size, md5): + # have such files been detected as text mimetype before? + is_text = self.proxy_sha256.is_text(file.name) + if not is_text and is_binary_or_large(file.name, size): + file_sha256 = self.proxy_sha256.get_or_put( + flat.commit.project, + flat.commit.package, + file.name, + flat.commit.expanded_srcmd5, + md5, + size, + ) + self.git.add_lfs(file.name, file_sha256["sha256"], size) + else: + self.obs.download( + flat.commit.project, + flat.commit.package, + file.name, + flat.commit.expanded_srcmd5, + self.git.path, + self.cachedir, + file_md5=md5, + ) + self.git.add(file) + + def branch_fits_parent1(self, flat, branch_state): + if branch_state[flat.branch] is None: + # everything fits nothing + return True + return flat.parent1 == branch_state[flat.branch] + def commit_flat(self, flat, branch_state): parents = [] self.git.checkout(flat.branch) + if flat.parent1: + if not self.branch_fits_parent1(flat, branch_state): + logging.info(f"Reset {flat.branch} onto {flat.parent1.short_string()}") + self.git.set_branch_head(flat.branch, flat.parent1.git_commit) + self.git.checkout(flat.branch) + parents.append(flat.parent1.git_commit) + if flat.parent2: + parents.append(flat.parent2.git_commit) + # Overwrite ".gitattributes" with the self.git.add_default_lfs_gitattributes(force=True) - if flat.parent1: - parents.append(flat.parent1.git_commit) - if flat.parent2: - parents.append(flat.parent2.git_commit) to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch]) for file in to_delete: self.git.remove(file) for file, size, md5 in to_download: - # have such files been detected as text mimetype before? - is_text = self.proxy_sha256.is_text(file.name) - if not is_text and is_binary_or_large(file.name, size): - file_sha256 = self.proxy_sha256.get_or_put( - flat.commit.project, - flat.commit.package, - file.name, - flat.commit.expanded_srcmd5, - md5, - size, - ) - self.git.add_lfs(file.name, file_sha256["sha256"], size) - else: - self.obs.download( - flat.commit.project, - flat.commit.package, - file.name, - flat.commit.expanded_srcmd5, - self.git.path, - self.cachedir, - file_md5=md5, - ) - self.git.add(file) + self.commit_file(flat, file, size, md5) commit = self.git.commit( f"OBS User {flat.commit.userid}", diff --git a/lib/obs.py b/lib/obs.py index 73e6d8e..dbca9ea 100644 --- a/lib/obs.py +++ b/lib/obs.py @@ -5,11 +5,10 @@ import time import urllib.parse import xml.etree.ElementTree as ET from urllib.error import HTTPError -from pathlib import Path -from lib.proxy_sha256 import md5 import osc.core +from lib.proxy_sha256 import md5 from lib.request import Request from lib.user import User @@ -164,7 +163,7 @@ class OBS: cachedir: str, file_md5: str, ) -> None: - + cached_file = self._path_from_md5(name, cachedir, file_md5) if not self.in_cache(name, cachedir, file_md5): with (dirpath / name).open("wb") as f: diff --git a/lib/proxy_sha256.py b/lib/proxy_sha256.py index 26b4340..2864351 100644 --- a/lib/proxy_sha256.py +++ b/lib/proxy_sha256.py @@ -40,7 +40,7 @@ class ProxySHA256: self.texts = set(["_config", "_service"]) self.hashes = dict() return - logging.info("Retrieve all previously defined SHA256") + logging.debug("Retrieve all previously defined SHA256") response = requests.get(f"http://source.dyn.cloud.suse.de/package/{package}") if response.status_code == 200: json = response.json()