import logging import os from urllib.parse import parse_qs import psycopg from urllib3.util import url import yaml from lib import db from lib.binary import is_binary_or_large from lib.db import DB from lib.git import Git from lib.lfs_oid import LFSOid from lib.obs import OBS from lib.proxy_sha256 import ProxySHA256 from lib.tree_builder import TreeBuilder from lib.user import User def is_number(s): try: float(s) return True except ValueError: return False class GitExporter: def __init__(self, api_url, project, package, repodir, cachedir): self.obs = OBS(api_url) self.project = project self.package = package self.db = DB() self.proxy_sha256 = ProxySHA256(self.obs, self.db) self.git = Git( repodir / package, committer="Git OBS Bridge", committer_email="obsbridge@suse.de", ) if self.git.exists(): self.git.open() else: self.git.create() self.git.add_gitea_remote(package) self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml") self.gc_interval = 200 self.cachedir = cachedir def set_gc_interval(self, gc): self.gc_interval = gc def reconstruct_state(self, flats): state_data = dict() prefix = "OBS-URL: " for line in self.git.branch_commit("factory").splitlines(): if line.startswith(prefix): u = url.parse_url(line.strip(prefix)) if u.path != f"/package/show/openSUSE:Factory/{self.package}" or "rev=" not in u.query: continue v = parse_qs(u.query) rev = v['rev'][0] with self.db.cursor() as cur: try: if is_number(rev): cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", ('openSUSE:Factory', self.package, rev,)) else: cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", ('openSUSE:Factory', self.package, rev, rev)) except psycopg.Error as e: logging.error(e) self.db.conn.rollback() row = cur.fetchone() if not row: return state_data state_data['factory'] = row[0] try: print("devel reconstruct") d = self.devel_rev(flats) if d is not None: prj = d.commit.project for line in self.git.branch_commit("devel").splitlines(): if line.startswith(prefix): u = url.parse_url(line.strip(prefix)) if u.path != f"/package/show/{prj}/{self.package}" or u.query is None or "rev=" not in u.query: continue v = parse_qs(u.query) rev = v['rev'][0] try: with self.db.cursor() as cur: logging.debug(f"finding id for ({prj, self.package, rev}") if is_number(rev): cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", (prj, self.package, rev,)) else: cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", (prj, self.package, rev,)) row = cur.fetchone() if not row: logging.info(" ** cannot find revision for devel branch:", rev) return state_data state_data['devel'] = row[0] except psycopg.Error as e: logging.error(e) self.db.conn.rollback() if state_data['factory'] is not None: state_data['devel'] = state_data['factory'] except: if state_data['factory'] is not None: state_data['devel'] = state_data['factory'] return state_data def check_repo_state(self, flats, branch_state, branch): state_data = dict() if os.path.exists(self.state_file): with open(self.state_file) as f: state_data = yaml.safe_load(f) if not isinstance(state_data, dict): state_data = {} else: state_data = self.reconstruct_state(flats) logging.debug(f"state data: {state_data}") left_to_commit = [] for flat in reversed(flats): found_state = False if flat.commit.dbid == state_data.get(branch): branch_state[branch] = flat.commit flat.commit.git_commit = self.git.branch_head(branch) logging.debug( f"Found {self.git.path}'s {branch} branch in state {flat}" ) left_to_commit = [] found_state = True if not found_state: left_to_commit.append(flat) return left_to_commit def devel_rev(self, tree): for flat in tree: if flat.branch == "devel": return flat return None def export_as_git(self): if os.getenv("CHECK_ALL_LFS"): LFSOid.check_all(self.db, self.package) tree = TreeBuilder(self.db).build(self.project, self.package) added_commits = False if tree == None: # eg. python-M2Crypto errors return flats = tree.as_flat_list() branch_state = {"factory": None, "devel": None} left_to_commit = self.check_repo_state(flats, branch_state, "factory") logging.info(f"Commiting into {self.git.path} {len(left_to_commit)} into factory") self.run_gc() users = dict() for flat in left_to_commit: if flat.commit.userid not in users: users[flat.commit.userid] = User.find(self.db, flat.commit.userid) flat.user = users[flat.commit.userid] self.gc_cnt -= 1 if self.gc_cnt <= 0 and self.gc_interval: self.run_gc() logging.debug(f"Committing {flat}") self.commit_flat(flat, branch_state) added_commits = True # export the devel_tree head commits based on the devel branch if self.project == "openSUSE:Factory": devel_head = self.devel_rev(flats) flat_devel = None if devel_head is not None: logging.debug(f"building devel revisions chain for {devel_head.commit.project} / {self.package}") flat_devel = TreeBuilder(self.db).revisions_chain(devel_head.commit.project, self.package).as_flat_list() for f in flat_devel: f.branch = "devel" if flat_devel is not None: left_to_commit = self.check_repo_state(flat_devel, branch_state, "devel") logging.debug(branch_state) logging.debug(f"appending {len(left_to_commit)} items on top of devel") for flat in left_to_commit: if flat.commit.userid not in users: users[flat.commit.userid] = User.find(self.db, flat.commit.userid) flat.user = users[flat.commit.userid] self.gc_cnt -= 1 if self.gc_cnt <= 0 and self.gc_interval: self.run_gc() logging.debug(f"Committing {flat}") self.commit_flat(flat, branch_state) added_commits = True # make sure that we create devel branch if not branch_state["devel"]: logging.debug("force creating devel") self.git.set_branch_head("devel", self.git.branch_head("factory")) #if added_commits: # self.git.push(force=True) def run_gc(self): self.gc_cnt = self.gc_interval self.git.gc() def is_lfs_file(self, package, filename, size): if not is_binary_or_large(filename, size): return False return not self.proxy_sha256.is_text(package, filename) def commit_file(self, flat, file, size, md5): # don't export imported _service: files, if any if file.name[0:9] == '_service:': return # have such files been detected as text mimetype before? if self.is_lfs_file(flat.commit.package, file.name, size): file_sha256 = self.proxy_sha256.get_or_put( flat.commit.project, flat.commit.package, file.name, flat.commit.expanded_srcmd5, md5, size, ) # as it's newly registered, it might be a text file now, so double check if not self.proxy_sha256.is_text(flat.commit.package, file.name): self.git.add_lfs(file.name, file_sha256, size) return self.commit_non_lfs_file(flat, file, md5) def commit_non_lfs_file(self, flat, file, md5): self.obs.change_url(flat.commit.api_url) self.obs.download( flat.commit.project, flat.commit.package, file.name, flat.commit.expanded_srcmd5, self.git.path, self.cachedir, file_md5=md5, ) self.git.add(file) def branch_fits_parent1(self, flat, branch_state): if branch_state[flat.branch] is None: # everything fits nothing return True return flat.parent1 == branch_state[flat.branch] def commit_flat(self, flat, branch_state): parents = [] self.git.checkout(flat.branch) if flat.parent1: if not self.branch_fits_parent1(flat, branch_state): logging.debug(f"Reset {flat.branch} onto {flat.parent1.short_string()}") assert flat.parent1.git_commit self.git.set_branch_head(flat.branch, flat.parent1.git_commit) self.git.checkout(flat.branch) parents.append(flat.parent1.git_commit) if flat.parent2: assert flat.parent2.git_commit parents.append(flat.parent2.git_commit) # create file if not existant self.git.add_default_lfs_gitattributes(force=False) self.git.add_default_gitignore() to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch]) for file in to_delete: self.git.remove(file) for file, size, md5 in to_download: self.commit_file(flat, file, size, md5) commit = self.git.commit( flat.user.realname, flat.user.email, flat.commit.commit_time, flat.commit.git_commit_message(), parents=parents, ) flat.commit.git_commit = commit branch_state[flat.branch] = flat.commit with open(self.state_file, "w") as f: data = {} for branch in ["factory", "devel"]: commit = branch_state[branch] if commit: data[branch] = commit.dbid yaml.dump(data, f)