From 9bd6643e8a188540692ac4c592d6426bcf382e60 Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Wed, 26 Oct 2022 22:18:16 +0200 Subject: [PATCH] Fix files_hash calculcation and fake revisions --- lib/importer.py | 27 +++++++++------------------ lib/tree_builder.py | 2 +- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/lib/importer.py b/lib/importer.py index 89e8aac..65adb44 100644 --- a/lib/importer.py +++ b/lib/importer.py @@ -167,18 +167,6 @@ class Importer: (rev.dbid, linked_rev.dbid), ) - def calculate_file_hashes(self, db): - with db.cursor() as cur: - cur.execute( - "SELECT * from revisions where files_hash IS NULL AND broken is FALSE" - ) - for row in cur.fetchall(): - rev = DBRevision(row) - md5 = rev.calculate_files_hash(db) - cur.execute( - "UPDATE revisions SET files_hash=%s WHERE id=%s", (md5, rev.dbid) - ) - def fetch_all_linked_packages(self, db, project, package): with db.cursor() as cur: cur.execute( @@ -193,12 +181,10 @@ class Importer: def find_fake_revisions(self, db): with db.cursor() as cur: cur.execute( - """SELECT * from revisions WHERE - id in (SELECT revision_id from linked_revs WHERE considered=FALSE) AND - id not in (SELECT revision_id FROM fake_revs) ORDER by project,package,rev""" + "SELECT * from revisions WHERE id in (SELECT revision_id from linked_revs WHERE considered=FALSE)" ) for row in cur.fetchall(): - DBRevision(row) + self._find_fake_revision(db, DBRevision(row)) def _find_fake_revision(self, db, rev): prev = rev.previous_commit(db) @@ -211,7 +197,7 @@ class Importer: return with db.cursor() as cur: cur.execute( - """SELECT * from revisions where id in + """SELECT * FROM revisions WHERE id IN (SELECT revision_id from linked_revs WHERE linked_id=%s) AND commit_time <= %s ORDER BY commit_time""", (prev.dbid, rev.commit_time), @@ -305,13 +291,18 @@ class Importer: ) if list: rev.import_dir_list(db, list) + md5 = rev.calculate_files_hash(db) + with db.cursor() as cur: + cur.execute( + "UPDATE revisions SET files_hash=%s WHERE id=%s", + (md5, rev.dbid), + ) else: rev.set_broken(db) for number in DBRevision.requests_to_fetch(db, project, self.package): self.obs.request(number).import_into_db(db) - self.calculate_file_hashes(db) db.conn.commit() TreeBuilder(db).build(self.package) diff --git a/lib/tree_builder.py b/lib/tree_builder.py index cc2a2e8..9779d57 100644 --- a/lib/tree_builder.py +++ b/lib/tree_builder.py @@ -38,4 +38,4 @@ class TreeBuilder: if rev2.commit_time > rev.commit_time: continue if rev2.files_hash == rev.files_hash: - print(" ", rev2) + print(" ", rev2, rev2.files_hash)