diff --git a/git-importer.py b/git-importer.py index 12b1910..d5c17f6 100755 --- a/git-importer.py +++ b/git-importer.py @@ -105,7 +105,9 @@ def main(): importer = Importer(URL_OBS, "openSUSE:Factory", args.package) importer.import_into_db() - exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir, args.cachedir) + exporter = GitExporter( + URL_OBS, "openSUSE:Factory", args.package, args.repodir, args.cachedir + ) exporter.set_gc_interval(args.gc) exporter.export_as_git() diff --git a/lib/git.py b/lib/git.py index 01d0a15..a02b3ec 100644 --- a/lib/git.py +++ b/lib/git.py @@ -184,6 +184,9 @@ class Git: def branch_head(self, branch): return self.repo.references["refs/heads/" + branch].target + def set_branch_head(self, branch, commit): + self.repo.references["refs/heads/" + branch].set_target(commit) + def gc(self): logging.info(f"Garbage recollect and repackage {self.path}") subprocess.run( diff --git a/lib/git_exporter.py b/lib/git_exporter.py index 88cfc23..93d1065 100644 --- a/lib/git_exporter.py +++ b/lib/git_exporter.py @@ -28,22 +28,6 @@ class GitExporter: self.gc_interval = 200 self.cachedir = cachedir - def download(self, revision): - obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5) - git_files = { - (f.name, f.stat().st_size, md5(f)) - for f in self.git.path.iterdir() - if f.is_file() and f.name not in (".gitattributes") - } - - # Overwrite ".gitattributes" with the - self.git.add_default_lfs_gitattributes(force=True) - - # Download each file in OBS if it is not a binary (or large) - # file - for name in obs_files: - self.git.add(name) - def set_gc_interval(self, gc): self.gc_interval = gc @@ -85,44 +69,58 @@ class GitExporter: logging.debug(f"Committing {flat}") self.commit_flat(flat, branch_state) + def commit_file(self, flat, file, size, md5): + # have such files been detected as text mimetype before? + is_text = self.proxy_sha256.is_text(file.name) + if not is_text and is_binary_or_large(file.name, size): + file_sha256 = self.proxy_sha256.get_or_put( + flat.commit.project, + flat.commit.package, + file.name, + flat.commit.expanded_srcmd5, + md5, + size, + ) + self.git.add_lfs(file.name, file_sha256["sha256"], size) + else: + self.obs.download( + flat.commit.project, + flat.commit.package, + file.name, + flat.commit.expanded_srcmd5, + self.git.path, + self.cachedir, + file_md5=md5, + ) + self.git.add(file) + + def branch_fits_parent1(self, flat, branch_state): + if branch_state[flat.branch] is None: + # everything fits nothing + return True + return flat.parent1 == branch_state[flat.branch] + def commit_flat(self, flat, branch_state): parents = [] self.git.checkout(flat.branch) + if flat.parent1: + if not self.branch_fits_parent1(flat, branch_state): + logging.info(f"Reset {flat.branch} onto {flat.parent1.short_string()}") + self.git.set_branch_head(flat.branch, flat.parent1.git_commit) + self.git.checkout(flat.branch) + parents.append(flat.parent1.git_commit) + if flat.parent2: + parents.append(flat.parent2.git_commit) + # Overwrite ".gitattributes" with the self.git.add_default_lfs_gitattributes(force=True) - if flat.parent1: - parents.append(flat.parent1.git_commit) - if flat.parent2: - parents.append(flat.parent2.git_commit) to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch]) for file in to_delete: self.git.remove(file) for file, size, md5 in to_download: - # have such files been detected as text mimetype before? - is_text = self.proxy_sha256.is_text(file.name) - if not is_text and is_binary_or_large(file.name, size): - file_sha256 = self.proxy_sha256.get_or_put( - flat.commit.project, - flat.commit.package, - file.name, - flat.commit.expanded_srcmd5, - md5, - size, - ) - self.git.add_lfs(file.name, file_sha256["sha256"], size) - else: - self.obs.download( - flat.commit.project, - flat.commit.package, - file.name, - flat.commit.expanded_srcmd5, - self.git.path, - self.cachedir, - file_md5=md5, - ) - self.git.add(file) + self.commit_file(flat, file, size, md5) commit = self.git.commit( f"OBS User {flat.commit.userid}", diff --git a/lib/obs.py b/lib/obs.py index 73e6d8e..dbca9ea 100644 --- a/lib/obs.py +++ b/lib/obs.py @@ -5,11 +5,10 @@ import time import urllib.parse import xml.etree.ElementTree as ET from urllib.error import HTTPError -from pathlib import Path -from lib.proxy_sha256 import md5 import osc.core +from lib.proxy_sha256 import md5 from lib.request import Request from lib.user import User @@ -164,7 +163,7 @@ class OBS: cachedir: str, file_md5: str, ) -> None: - + cached_file = self._path_from_md5(name, cachedir, file_md5) if not self.in_cache(name, cachedir, file_md5): with (dirpath / name).open("wb") as f: diff --git a/lib/proxy_sha256.py b/lib/proxy_sha256.py index 26b4340..2864351 100644 --- a/lib/proxy_sha256.py +++ b/lib/proxy_sha256.py @@ -40,7 +40,7 @@ class ProxySHA256: self.texts = set(["_config", "_service"]) self.hashes = dict() return - logging.info("Retrieve all previously defined SHA256") + logging.debug("Retrieve all previously defined SHA256") response = requests.get(f"http://source.dyn.cloud.suse.de/package/{package}") if response.status_code == 200: json = response.json()