From ab38332642701f8b885dd9d95f5ce382985c582e Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Thu, 3 Nov 2022 20:14:56 +0100 Subject: [PATCH] Allow to import multiple packages in one go This way we avoid duplicating all startup and SQL queries --- git-importer.py | 24 +++++++++-------- lib/git.py | 65 +-------------------------------------------- lib/git_exporter.py | 2 +- lib/importer.py | 19 ++++++++----- 4 files changed, 28 insertions(+), 82 deletions(-) diff --git a/git-importer.py b/git-importer.py index d5c17f6..0d7d98f 100755 --- a/git-importer.py +++ b/git-importer.py @@ -44,11 +44,12 @@ PROJECTS = [ def main(): parser = argparse.ArgumentParser(description="OBS history importer into git") - parser.add_argument("package", help="OBS package name") + parser.add_argument("packages", help="OBS package names", nargs="*") parser.add_argument( "-r", "--repodir", required=False, + default=pathlib.Path("repos"), type=pathlib.Path, help="Local git repository directory", ) @@ -94,22 +95,23 @@ def main(): requests_log.propagate = True if args.export: - TestExporter(args.package).run() + if len(args.packages) != 0: + print("Can only export one package") + sys.exit(1) + TestExporter(args.packages[0]).run() return - if not args.repodir: - args.repodir = pathlib.Path("repos") / args.package - if not args.cachedir: args.cachedir = pathlib.Path("~/.cache/git-import/").expanduser() - importer = Importer(URL_OBS, "openSUSE:Factory", args.package) + importer = Importer(URL_OBS, "openSUSE:Factory", args.packages) importer.import_into_db() - exporter = GitExporter( - URL_OBS, "openSUSE:Factory", args.package, args.repodir, args.cachedir - ) - exporter.set_gc_interval(args.gc) - exporter.export_as_git() + for package in args.packages: + exporter = GitExporter( + URL_OBS, "openSUSE:Factory", package, args.repodir, args.cachedir + ) + exporter.set_gc_interval(args.gc) + exporter.export_as_git() if __name__ == "__main__": diff --git a/lib/git.py b/lib/git.py index a02b3ec..56b1ee3 100644 --- a/lib/git.py +++ b/lib/git.py @@ -109,69 +109,6 @@ class Git: "HEAD", author, committer, message, tree, parents ) - def merge( - self, - user, - user_email, - user_time, - message, - commit, - committer=None, - committer_email=None, - committer_time=None, - clean_on_conflict=True, - merged=False, - allow_empty=False, - ): - new_branch = False - - if not merged: - try: - self.repo.merge(commit) - except KeyError: - # If it is the first commit, we will have a missing - # "HEAD", but the files will be there. We can proceed - # to the commit directly. - new_branch = True - - if not merged and self.repo.index.conflicts: - for conflict in self.repo.index.conflicts: - conflict = [c for c in conflict if c] - if conflict: - logging.info(f"CONFLICT {conflict[0].path}") - - if clean_on_conflict: - self.clean() - # Now I miss Rust enums - return "CONFLICT" - - # Some merges are empty in OBS (no changes, not sure - # why), for now we signal them - if not allow_empty and not self.is_dirty(): - # I really really do miss Rust enums - return "EMPTY" - - if new_branch: - parents = [commit] - else: - parents = [ - self.repo.head.target, - commit, - ] - commit = self.commit( - user, - user_email, - user_time, - message, - parents, - committer, - committer_email, - committer_time, - allow_empty=allow_empty, - ) - - return commit - def merge_abort(self): self.repo.state_cleanup() @@ -188,7 +125,7 @@ class Git: self.repo.references["refs/heads/" + branch].set_target(commit) def gc(self): - logging.info(f"Garbage recollect and repackage {self.path}") + logging.debug(f"Garbage recollect and repackage {self.path}") subprocess.run( ["git", "gc", "--auto"], cwd=self.path, diff --git a/lib/git_exporter.py b/lib/git_exporter.py index f606946..d1b7da8 100644 --- a/lib/git_exporter.py +++ b/lib/git_exporter.py @@ -20,7 +20,7 @@ class GitExporter: self.obs.change_url(api_url) self.proxy_sha256 = ProxySHA256(self.obs, enabled=True) self.git = Git( - repodir, + repodir / package, committer="Git OBS Bridge", committer_email="obsbridge@suse.de", ).create() diff --git a/lib/importer.py b/lib/importer.py index b5431b4..0b77c04 100644 --- a/lib/importer.py +++ b/lib/importer.py @@ -9,9 +9,9 @@ from lib.user import User class Importer: - def __init__(self, api_url, project, package): - # Import a Factory package into the database - self.package = package + def __init__(self, api_url, project, packages): + # Import multiple Factory packages into the database + self.packages = packages self.project = project self.obs = OBS() @@ -191,23 +191,30 @@ class Importer: def import_into_db(self): db = DB() - self.refresh_package(db, self.project, self.package) + for package in self.packages: + self.refresh_package(db, self.project, package) + db.conn.commit() + for number in DBRevision.requests_to_fetch(db): self.obs.request(number).import_into_db(db) + db.conn.commit() + with db.cursor() as cur: cur.execute( """SELECT DISTINCT source_project,source_package FROM requests - WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package=%s);""", - (self.project, self.package), + WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""", + (self.project, self.packages), ) for project, package in cur.fetchall(): self.refresh_package(db, project, package) + db.conn.commit() missing_users = User.missing_users(db) for userid in missing_users: missing_user = self.obs.user(userid) if missing_user: missing_user.import_into_db(db) + db.conn.commit() self.fill_file_lists(db) db.conn.commit()