diff --git a/lib/db.py b/lib/db.py index da9c837..23e54bd 100644 --- a/lib/db.py +++ b/lib/db.py @@ -206,6 +206,10 @@ class DB: "ALTER TABLE linked_revs ADD COLUMN considered BOOLEAN DEFAULT FALSE", "UPDATE scheme SET version=18", ) + schemes[19] = ( + "CREATE INDEX ON files(revision_id);", + "UPDATE scheme SET version=19", + ) schema_version = self.schema_version() if (schema_version + 1) not in schemes: return diff --git a/lib/db_revision.py b/lib/db_revision.py index 06c3c98..eaebae6 100644 --- a/lib/db_revision.py +++ b/lib/db_revision.py @@ -200,13 +200,12 @@ class DBRevision: return self._files @staticmethod - def requests_to_fetch(db, project, package): + def requests_to_fetch(db): with db.cursor() as cur: cur.execute( """SELECT request_number FROM revisions revs LEFT JOIN requests reqs ON reqs.number=revs.request_number WHERE reqs.id is null AND - revs.request_number IS NOT NULL and project=%s AND package=%s;""", - (project, package), + revs.request_number IS NOT NULL""", ) return [row[0] for row in cur.fetchall()] diff --git a/lib/importer.py b/lib/importer.py index 14c3a9f..249d5a2 100644 --- a/lib/importer.py +++ b/lib/importer.py @@ -167,18 +167,6 @@ class Importer: (rev.dbid, linked_rev.dbid), ) - def calculate_file_hashes(self, db): - with db.cursor() as cur: - cur.execute( - "SELECT * from revisions where files_hash IS NULL AND broken is FALSE" - ) - for row in cur.fetchall(): - rev = DBRevision(row) - md5 = rev.calculate_files_hash(db) - cur.execute( - "UPDATE revisions SET files_hash=%s WHERE id=%s", (md5, rev.dbid) - ) - def fetch_all_linked_packages(self, db, project, package): with db.cursor() as cur: cur.execute( @@ -193,12 +181,10 @@ class Importer: def find_fake_revisions(self, db): with db.cursor() as cur: cur.execute( - """SELECT * from revisions WHERE - id in (SELECT revision_id from linked_revs WHERE considered=FALSE) AND - id not in (SELECT revision_id FROM fake_revs) ORDER by project,package,rev""" + "SELECT * from revisions WHERE id in (SELECT linked_id from linked_revs WHERE considered=FALSE)" ) for row in cur.fetchall(): - DBRevision(row) + self._find_fake_revision(db, DBRevision(row)) def _find_fake_revision(self, db, rev): prev = rev.previous_commit(db) @@ -211,7 +197,7 @@ class Importer: return with db.cursor() as cur: cur.execute( - """SELECT * from revisions where id in + """SELECT * FROM revisions WHERE id IN (SELECT revision_id from linked_revs WHERE linked_id=%s) AND commit_time <= %s ORDER BY commit_time""", (prev.dbid, rev.commit_time), @@ -267,6 +253,13 @@ class Importer: (rev.dbid, linked.dbid), ) + def revisions_without_files(self, db): + with db.cursor() as cur: + cur.execute( + "SELECT * FROM revisions WHERE broken=FALSE AND expanded_srcmd5 IS NULL" + ) + return [DBRevision(row) for row in cur.fetchall()] + def import_into_db(self): db = DB() for project, _, api_url in self.projects: @@ -275,39 +268,41 @@ class Importer: self.fetch_all_linked_packages(db, project, self.package) # all remaining, no filtering here self.find_linked_revs(db) - self.find_fake_revisions(db) - missing_users = User.missing_users(db) - for userid in missing_users: - missing_user = self.obs.user(userid) - if missing_user: - missing_user.import_into_db(db) + missing_users = User.missing_users(db) + for userid in missing_users: + missing_user = self.obs.user(userid) + if missing_user: + missing_user.import_into_db(db) - for rev in DBRevision.all_revisions(db, project, self.package): - # TODO move into SELECT - if rev.broken or rev.expanded_srcmd5: - continue + self.find_fake_revisions(db) + for rev in self.revisions_without_files(db): + with db.cursor() as cur: + cur.execute( + """SELECT unexpanded_srcmd5 from revisions WHERE + id=(SELECT linked_id FROM linked_revs WHERE revision_id=%s)""", + (rev.dbid,), + ) + linked_rev = cur.fetchone() + if linked_rev: + linked_rev = linked_rev[0] + list = self.obs.list( + rev.project, rev.package, rev.unexpanded_srcmd5, linked_rev + ) + if list: + rev.import_dir_list(db, list) + md5 = rev.calculate_files_hash(db) with db.cursor() as cur: cur.execute( - """SELECT unexpanded_srcmd5 from revisions WHERE - id=(SELECT linked_id FROM linked_revs WHERE revision_id=%s)""", - (rev.dbid,), + "UPDATE revisions SET files_hash=%s WHERE id=%s", + (md5, rev.dbid), ) - linked_rev = cur.fetchone() - if linked_rev: - linked_rev = linked_rev[0] - list = self.obs.list( - project, self.package, rev.unexpanded_srcmd5, linked_rev - ) - if list: - rev.import_dir_list(db, list) - else: - rev.set_broken(db) + else: + rev.set_broken(db) - for number in DBRevision.requests_to_fetch(db, project, self.package): - self.obs.request(number).import_into_db(db) + for number in DBRevision.requests_to_fetch(db): + self.obs.request(number).import_into_db(db) - self.calculate_file_hashes(db) db.conn.commit() TreeBuilder(db).build(self.package) diff --git a/lib/tree_builder.py b/lib/tree_builder.py index cc2a2e8..9779d57 100644 --- a/lib/tree_builder.py +++ b/lib/tree_builder.py @@ -38,4 +38,4 @@ class TreeBuilder: if rev2.commit_time > rev.commit_time: continue if rev2.files_hash == rev.files_hash: - print(" ", rev2) + print(" ", rev2, rev2.files_hash)