diff --git a/git-importer.py b/git-importer.py index 1a2173d..2f763b8 100755 --- a/git-importer.py +++ b/git-importer.py @@ -77,15 +77,10 @@ def main(): default="INFO", help="logging level", ) - parser.add_argument( - "--db", - action="store_true", - help="Import revisions into database only", - ) parser.add_argument( "--export", action="store_true", - help="Export database fields for the given package as JSON", + help="Export database fields for the given package as YAML", ) args = parser.parse_args() @@ -113,12 +108,9 @@ def main(): importer = Importer( PROJECTS, args.package, args.repodir, args.search_ancestor, args.rebase_devel ) - - if args.db: - importer.import_into_db() - importer.export_as_git() - return - importer.import_all_revisions(args.gc) + importer.set_gc_interval(args.gc) + importer.import_into_db() + importer.export_as_git() if __name__ == "__main__": diff --git a/lib/db_revision.py b/lib/db_revision.py index 4b23324..bea541d 100644 --- a/lib/db_revision.py +++ b/lib/db_revision.py @@ -1,3 +1,4 @@ +import logging from hashlib import md5 from lib.db import DB @@ -213,11 +214,11 @@ class DBRevision: old_files[entry["name"]] = f"{entry['md5']}-{entry['size']}" for entry in self.files_list(db): if old_files.get(entry["name"], "") != f"{entry['md5']}-{entry['size']}": - print("Download", entry["name"]) + logging.debug(f"Download {entry['name']}") to_download.append(entry["name"]) old_files.pop(entry["name"], None) for entry in old_files.keys(): - print("Delete", entry) + logging.debug(f"Delete {entry}") to_delete.append(entry) return to_download, to_delete diff --git a/lib/git.py b/lib/git.py index 70e0f4b..53007cb 100644 --- a/lib/git.py +++ b/lib/git.py @@ -185,7 +185,7 @@ class Git: return self.repo.references["refs/heads/" + branch].target def gc(self): - logging.info(f"Garbage recollec and repackage {self.path}") + logging.info(f"Garbage recollect and repackage {self.path}") subprocess.run( ["git", "gc", "--auto"], cwd=self.path, diff --git a/lib/history.py b/lib/history.py index 94ea7b3..1feab02 100644 --- a/lib/history.py +++ b/lib/history.py @@ -29,39 +29,6 @@ class History: ) return original_project[0] if original_project else None - def _fetch_revisions(self, project, **params): - root = self.obs._history(project, self.package, **params) - if root is not None: - return [ - OBSRevision(self.obs, self, project, self.package).parse(r) - for r in root.findall("revision") - ] - - def fetch_revisions(self, project, follow_copypac=False): - """Get the revision history of a package""" - if project in self: - return - - revs = self._fetch_revisions(project) - self.revisions[project] = revs - # while ( - # revs - # and follow_copypac - # and (copypac_project := self._extract_copypac(revs[0].comment)) - # ): - # # Add the history pre-copypac - # # TODO: missing the old project name - # revs = self._fetch_revisions(copypac_project, deleted=1) - # self.revisions[project] = ( - # revs + self.revisions[project] - # ) - - def fetch_all_revisions(self, projects): - """Pre-populate the history""" - for project, _, api_url in projects: - self.obs.change_url(api_url) - self.fetch_revisions(project) - def sort_all_revisions(self): """Sort revisions for all projects, from older to newer""" return sorted(itertools.chain(*self.revisions.values()), key=lambda x: x.time) diff --git a/lib/importer.py b/lib/importer.py index e4141b0..f6884ce 100644 --- a/lib/importer.py +++ b/lib/importer.py @@ -60,6 +60,7 @@ class Importer: ).create() self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml") self.proxy_sha256 = ProxySHA256(self.obs, enabled=True) + self.gc_interval = 200 self.history = History(self.obs, self.package) @@ -110,7 +111,7 @@ class Importer: self.git.add_lfs(name, file_sha256["sha256"], size) else: if (name, size, file_md5) not in git_files: - print(f"Download {name}") + logging.debug(f"Download {name}") self.obs.download( revision.project, revision.package, @@ -127,9 +128,12 @@ class Importer: obs_names = {n for (n, _, _) in obs_files} git_names = {n for (n, _, _) in git_files} for name in git_names - obs_names: - print(f"Remove {name}") + logging.debug(f"Remove {name}") self.git.remove(name) + def set_gc_interval(self, gc): + self.gc_interval = gc + def update_db_package(self, db, project, package): root = self.obs._history(project, package) if root is None: @@ -344,7 +348,15 @@ class Importer: found_state = True if not found_state: left_to_commit.append(flat) + + gc_cnt = self.gc_interval + if len(left_to_commit) > 0: + self.git.gc() for flat in left_to_commit: + gc_cnt -= 1 + if gc_cnt <= 0 and self.gc_interval: + self.git.gc() + gc_cnt = self.gc_interval logging.debug(f"Committing {flat}") self.commit_flat(db, flat, branch_state) @@ -441,26 +453,6 @@ class Importer: db.conn.commit() - def import_all_revisions(self, gc): - # Fetch all the requests and sort them. Ideally we should - # build the graph here, to avoid new commits before the merge. - # For now we will sort them and invalidate the commits if - # "rebase_devel" is set. - self.history.fetch_all_revisions(self.projects) - revisions = self.history.sort_all_revisions() - - logging.debug(f"Selected import order for {self.package}") - for revision in revisions: - logging.debug(revision) - - gc_cnt = gc - for revision in revisions: - gc_cnt -= 1 - if gc_cnt <= 0 and gc: - self.git.gc() - gc_cnt = gc - self.import_revision(revision) - def import_new_revision_with_request(self, revision, request): """Create a new branch as a result of a merge"""