import logging import os from pathlib import Path import yaml from lib.binary import is_binary_or_large from lib.db import DB from lib.git import Git from lib.obs import OBS from lib.proxy_sha256 import ProxySHA256, md5 from lib.tree_builder import TreeBuilder class GitExporter: def __init__(self, api_url, project, package, repodir): self.obs = OBS() self.project = project self.package = package # TODO: Store the api url in the revision self.obs.change_url(api_url) self.proxy_sha256 = ProxySHA256(self.obs, enabled=True) self.git = Git( repodir, committer="Git OBS Bridge", committer_email="obsbridge@suse.de", ).create() self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml") self.gc_interval = 200 def download(self, revision): obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5) git_files = { (f.name, f.stat().st_size, md5(f)) for f in self.git.path.iterdir() if f.is_file() and f.name not in (".gitattributes") } # Overwrite ".gitattributes" with the self.git.add_default_lfs_gitattributes(force=True) # Download each file in OBS if it is not a binary (or large) # file for (name, size, file_md5) in obs_files: # this file creates easily 100k commits and is just useless data :( # unfortunately it's stored in the same meta package as the project config if revision.package == "_project" and name == "_staging_workflow": continue # have such files been detected as text mimetype before? is_text = self.proxy_sha256.is_text(name) if not is_text and is_binary_or_large(name, size): file_sha256 = self.proxy_sha256.get_or_put( revision.project, revision.package, name, revision.srcmd5, file_md5, size, ) self.git.add_lfs(name, file_sha256["sha256"], size) else: if (name, size, file_md5) not in git_files: logging.debug(f"Download {name}") self.obs.download( revision.project, revision.package, name, revision.srcmd5, self.git.path, file_md5=file_md5, ) # Validate the MD5 of the downloaded file if md5(self.git.path / name) != file_md5: raise Exception(f"Download error in {name}") self.git.add(name) # Remove extra files obs_names = {n for (n, _, _) in obs_files} git_names = {n for (n, _, _) in git_files} for name in git_names - obs_names: logging.debug(f"Remove {name}") self.git.remove(name) def set_gc_interval(self, gc): self.gc_interval = gc def export_as_git(self): db = DB() tree = TreeBuilder(db).build(self.project, self.package) flats = tree.as_flat_list() branch_state = {"factory": None, "devel": None} state_data = dict() if os.path.exists(self.state_file): with open(self.state_file, "r") as f: state_data = yaml.safe_load(f) if type(state_data) != dict: state_data = {} left_to_commit = [] for flat in reversed(flats): found_state = False for branch in ["factory", "devel"]: if flat.commit.dbid == state_data.get(branch): branch_state[branch] = flat.commit flat.commit.git_commit = self.git.branch_head(branch) logging.debug( f"Found {self.git.path}'s {branch} branch in state {flat}" ) left_to_commit = [] found_state = True if not found_state: left_to_commit.append(flat) gc_cnt = self.gc_interval if len(left_to_commit) > 0: self.git.gc() for flat in left_to_commit: gc_cnt -= 1 if gc_cnt <= 0 and self.gc_interval: self.git.gc() gc_cnt = self.gc_interval logging.debug(f"Committing {flat}") self.commit_flat(flat, branch_state) def limit_download(self, file: Path): return file.suffix in (".spec", ".changes") def commit_flat(self, flat, branch_state): parents = [] self.git.checkout(flat.branch) if flat.parent1: parents.append(flat.parent1.git_commit) if flat.parent2: parents.append(flat.parent2.git_commit) to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch]) for file in to_delete: if not self.limit_download(file): continue self.git.remove(file) for file, md5 in to_download: if not self.limit_download(file): continue self.obs.download( flat.commit.project, flat.commit.package, file.name, flat.commit.expanded_srcmd5, self.git.path, file_md5=md5, ) self.git.add(file) commit = self.git.commit( f"OBS User {flat.commit.userid}", "null@suse.de", flat.commit.commit_time, # TODO: Normalize better the commit message f"{flat.commit.comment}\n\n{flat.commit}", allow_empty=True, parents=parents, ) flat.commit.git_commit = commit branch_state[flat.branch] = flat.commit with open(self.state_file, "w") as f: data = {} for branch in ["factory", "devel"]: commit = branch_state[branch] if commit: data[branch] = commit.dbid yaml.dump(data, f)