diff --git a/git-importer.py b/git-importer.py index f05f9e9..8373358 100755 --- a/git-importer.py +++ b/git-importer.py @@ -119,7 +119,7 @@ def main(): importer = Importer(URL_OBS, args.project, args.packages) importer.import_into_db() for package in args.packages: - if not importer.package_with_scmsync(package): + if not importer.package_with_scmsync(args.project, package): export_package(args.project, package, args.repodir, args.cachedir, args.gc) else: logging.debug(f"{args.project}/{package} has scmsync links - skipping export") diff --git a/lib/db_revision.py b/lib/db_revision.py index bb56722..3da37f3 100644 --- a/lib/db_revision.py +++ b/lib/db_revision.py @@ -204,6 +204,11 @@ class DBRevision: and self.package == "_project" ): continue + + # do not import _service:* files as those are created by OBS on source imports + if entry.get("name")[0:9] == "_service:": + continue + cur.execute( """INSERT INTO files (name, md5, size, mtime, revision_id) VALUES (%s,%s,%s,%s,%s)""", diff --git a/lib/flat_walker.py b/lib/flat_walker.py index 7176da6..b490e25 100644 --- a/lib/flat_walker.py +++ b/lib/flat_walker.py @@ -20,7 +20,7 @@ class FlatTreeWalker(AbstractWalker): def __init__(self, rebase_devel=False) -> None: super().__init__() - self.flats = [] + self.flats:list[FlatNode] = [] # the rebase_devel won't work as such as rebasing the branch needs an explicit action self.rebase_devel = rebase_devel # remember the last merge point so we can know the parent of it for the root of the sources diff --git a/lib/git.py b/lib/git.py index 3fa7c9c..ba89b72 100644 --- a/lib/git.py +++ b/lib/git.py @@ -160,6 +160,12 @@ class Git: .strip() ) + def branch_commit(self, branch="HEAD"): + try: + return (self.git_run(["cat-file", "commit", branch], stdout=subprocess.PIPE).stdout.decode("utf-8").strip()) + except: + return '' + def set_branch_head(self, branch, commit): return self.git_run(["update-ref", f"refs/heads/{branch}", commit]) diff --git a/lib/git_exporter.py b/lib/git_exporter.py index 1fe9f63..d8ff581 100644 --- a/lib/git_exporter.py +++ b/lib/git_exporter.py @@ -1,8 +1,12 @@ import logging import os +from urllib.parse import parse_qs +import psycopg +from urllib3.util import url import yaml +from lib import db from lib.binary import is_binary_or_large from lib.db import DB from lib.git import Git @@ -12,6 +16,12 @@ from lib.proxy_sha256 import ProxySHA256 from lib.tree_builder import TreeBuilder from lib.user import User +def is_number(s): + try: + float(s) + return True + except ValueError: + return False class GitExporter: def __init__(self, api_url, project, package, repodir, cachedir): @@ -30,49 +40,106 @@ class GitExporter: else: self.git.create() self.git.add_gitea_remote(package) - self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml") self.gc_interval = 200 self.cachedir = cachedir def set_gc_interval(self, gc): self.gc_interval = gc - def check_repo_state(self, flats, branch_state): + def reconstruct_state(self, flats): state_data = dict() - if os.path.exists(self.state_file): - with open(self.state_file) as f: - state_data = yaml.safe_load(f) - if not isinstance(state_data, dict): - state_data = {} + prefix = "OBS-URL: " + for line in self.git.branch_commit("factory").splitlines(): + if line.startswith(prefix): + u = url.parse_url(line.strip(prefix)) + if u.path != f"/package/show/openSUSE:Factory/{self.package}" or "rev=" not in u.query: + continue + v = parse_qs(u.query) + rev = v['rev'][0] + with self.db.cursor() as cur: + try: + if is_number(rev): + cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", ('openSUSE:Factory', self.package, rev,)) + else: + cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", ('openSUSE:Factory', self.package, rev, rev)) + except psycopg.Error as e: + logging.error(e) + self.db.conn.rollback() + row = cur.fetchone() + if not row: + return state_data + state_data['factory'] = row[0] + try: + print("devel reconstruct") + d = self.devel_rev(flats) + if d is not None: + prj = d.commit.project + for line in self.git.branch_commit("devel").splitlines(): + if line.startswith(prefix): + u = url.parse_url(line.strip(prefix)) + if u.path != f"/package/show/{prj}/{self.package}" or u.query is None or "rev=" not in u.query: + continue + v = parse_qs(u.query) + rev = v['rev'][0] + try: + with self.db.cursor() as cur: + logging.debug(f"finding id for ({prj, self.package, rev}") + if is_number(rev): + cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", (prj, self.package, rev,)) + else: + cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", (prj, self.package, rev,)) + row = cur.fetchone() + if not row: + logging.info(" ** cannot find revision for devel branch:", rev) + return state_data + state_data['devel'] = row[0] + except psycopg.Error as e: + logging.error(e) + self.db.conn.rollback() + if state_data['factory'] is not None: + state_data['devel'] = state_data['factory'] + except: + if state_data['factory'] is not None: + state_data['devel'] = state_data['factory'] + return state_data + + def check_repo_state(self, flats, branch_state, branch): + state_data = self.reconstruct_state(flats) + + logging.debug(f"state data: {state_data}") left_to_commit = [] for flat in reversed(flats): found_state = False - for branch in ["factory"]: - if flat.commit.dbid == state_data.get(branch): - branch_state[branch] = flat.commit - flat.commit.git_commit = self.git.branch_head(branch) - logging.debug( - f"Found {self.git.path}'s {branch} branch in state {flat}" - ) - left_to_commit = [] - found_state = True + if flat.commit.dbid == state_data.get(branch): + branch_state[branch] = flat.commit + flat.commit.git_commit = self.git.branch_head(branch) + logging.debug( + f"Found {self.git.path}'s {branch} branch in state {flat}" + ) + left_to_commit = [] + found_state = True if not found_state: left_to_commit.append(flat) return left_to_commit + def devel_rev(self, tree): + for flat in tree: + if flat.branch == "devel": + return flat + return None + def export_as_git(self): if os.getenv("CHECK_ALL_LFS"): LFSOid.check_all(self.db, self.package) tree = TreeBuilder(self.db).build(self.project, self.package) - flats = tree.as_flat_list() + added_commits = False - branch_state = {"factory": None, "devel": None} - left_to_commit = self.check_repo_state(flats, branch_state) - - if not left_to_commit: + if tree == None: # eg. python-M2Crypto errors return - - logging.info(f"Commiting into {self.git.path}") + flats = tree.as_flat_list() + branch_state = {"factory": None, "devel": None} + left_to_commit = self.check_repo_state(flats, branch_state, "factory") + logging.info(f"Commiting into {self.git.path} {len(left_to_commit)} into factory") self.run_gc() users = dict() @@ -85,13 +152,40 @@ class GitExporter: self.run_gc() logging.debug(f"Committing {flat}") self.commit_flat(flat, branch_state) + added_commits = True + + # export the devel_tree head commits based on the devel branch + if self.project == "openSUSE:Factory": + devel_head = self.devel_rev(flats) + flat_devel = None + if devel_head is not None: + logging.debug(f"building devel revisions chain for {devel_head.commit.project} / {self.package}") + flat_devel = TreeBuilder(self.db).revisions_chain(devel_head.commit.project, self.package).as_flat_list() + for f in flat_devel: + f.branch = "devel" + + if flat_devel is not None: + left_to_commit = self.check_repo_state(flat_devel, branch_state, "devel") + logging.debug(branch_state) + logging.debug(f"appending {len(left_to_commit)} items on top of devel") + for flat in left_to_commit: + if flat.commit.userid not in users: + users[flat.commit.userid] = User.find(self.db, flat.commit.userid) + flat.user = users[flat.commit.userid] + self.gc_cnt -= 1 + if self.gc_cnt <= 0 and self.gc_interval: + self.run_gc() + logging.debug(f"Committing {flat}") + self.commit_flat(flat, branch_state) + added_commits = True # make sure that we create devel branch if not branch_state["devel"]: logging.debug("force creating devel") self.git.set_branch_head("devel", self.git.branch_head("factory")) - self.git.push(force=True) + #if added_commits: + # self.git.push(force=True) def run_gc(self): self.gc_cnt = self.gc_interval @@ -103,6 +197,10 @@ class GitExporter: return not self.proxy_sha256.is_text(package, filename) def commit_file(self, flat, file, size, md5): + # don't export imported _service: files, if any + if file.name[0:9] == '_service:': + return + # have such files been detected as text mimetype before? if self.is_lfs_file(flat.commit.package, file.name, size): file_sha256 = self.proxy_sha256.get_or_put( @@ -172,10 +270,3 @@ class GitExporter: ) flat.commit.git_commit = commit branch_state[flat.branch] = flat.commit - with open(self.state_file, "w") as f: - data = {} - for branch in ["factory", "devel"]: - commit = branch_state[branch] - if commit: - data[branch] = commit.dbid - yaml.dump(data, f) diff --git a/lib/importer.py b/lib/importer.py index b82faa8..455f038 100644 --- a/lib/importer.py +++ b/lib/importer.py @@ -42,6 +42,8 @@ class Importer: def update_db_package(self, project, package): root = self.obs._history(project, package) if root is None: + if self.project == "openSUSE:Factory" and project == self.project: + exit(10) return latest = DBRevision.max_rev(self.db, project, package) for r in root.findall("revision"): @@ -217,7 +219,7 @@ class Importer: logging.debug(f"Refresh {project}/{package}") self.refreshed_packages.add(key) if self.has_scmsync(project) or self.has_scmsync(key): - self.packages_with_scmsync.add(package) + self.packages_with_scmsync.add((project, package)) logging.debug(f"{project}/{package} already in Git - skipping") return self.update_db_package(project, package) @@ -268,15 +270,12 @@ class Importer: return self.scmsync_cache[key] root = self.obs._meta(key) - scmsync = None scmsync_exists = False - if root and root.find('scmsync') is not None: - scmsync = root.find('scmsync').text - if scmsync: - scmsync_exists = scmsync.startswith('https://src.opensuse.org/pool/') + if root is not None: + scmsync_exists = root.find('scmsync') is not None self.scmsync_cache[key] = scmsync_exists return scmsync_exists - def package_with_scmsync(self, package): - return package in self.packages_with_scmsync + def package_with_scmsync(self, project, package): + return (project, package) in self.packages_with_scmsync diff --git a/lib/obs.py b/lib/obs.py index d36c56b..ec75019 100644 --- a/lib/obs.py +++ b/lib/obs.py @@ -148,12 +148,21 @@ class OBS: ] def _download(self, project, package, name, revision): - url = osc.core.makeurl( - self.url, - ["source", project, package, name], - {"rev": revision, "expand": 1}, - ) - return osc.core.http_GET(url) + try: + url = osc.core.makeurl( + self.url, + ["source", project, package, name], + {"rev": revision, "expand": 1}, + ) + return osc.core.http_GET(url) + except HTTPError as e: + if e.status == 404: + url = osc.core.makeurl( + self.url, + ["source", project, package, name], + {"rev": revision, "expand": 1, "deleted": 1}, + ) + return osc.core.http_GET(url) def download( self, @@ -189,7 +198,7 @@ class OBS: try: root = self._xml(f"source/{project}/{package}", **params) except HTTPError as e: - if e.code == 400: + if e.code == 400 or e.code == 404: logging.error( f"Package [{project}/{package} {params}] can't be expanded: {e}" ) diff --git a/lib/proxy_sha256.py b/lib/proxy_sha256.py index 605af08..a0353b1 100644 --- a/lib/proxy_sha256.py +++ b/lib/proxy_sha256.py @@ -50,12 +50,12 @@ class ProxySHA256: sha = hashlib.sha256() while True: buffer = fin.read(10000) - if not buffer: - break - sha.update(buffer) # only guess from the first 10K if not mimetype: mimetype = self.mime.from_buffer(buffer) + if not buffer: + break + sha.update(buffer) fin.close() LFSOid(self.db).add( project, package, name, revision, sha.hexdigest(), size, mimetype, file_md5 diff --git a/lib/tree_builder.py b/lib/tree_builder.py index 8f179d5..a92121c 100644 --- a/lib/tree_builder.py +++ b/lib/tree_builder.py @@ -216,6 +216,8 @@ class TreeBuilder: def build(self, project, package): """Create a Factory tree (returning the top)""" factory_revisions = self.revisions_chain(project, package) + if factory_revisions == None: + return None self.add_merge_points(factory_revisions) # factory_revisions.print() self.prune_loose_end(factory_revisions)