diff --git a/lib/db_revision.py b/lib/db_revision.py index 8876e7a..11dfecb 100644 --- a/lib/db_revision.py +++ b/lib/db_revision.py @@ -22,6 +22,7 @@ class DBRevision: ) = row self.rev = int(self.rev) self._files = None + self._hash = None def __str__(self): return f"Rev {self.project}/{self.rev} Md5 {self.unexpanded_srcmd5} {self.commit_time} {self.userid} {self.request_number}" @@ -159,7 +160,9 @@ class DBRevision: ), ) - def fileshash(self, db): + def files_hash(self, db): + if self._hash: + return self._hash m = md5() for file_dict in self.files_list(db): m.update( @@ -171,7 +174,8 @@ class DBRevision: + str(file_dict["size"]) ).encode("utf-8") ) - return m.hexdigest() + self._hash = m.hexdigest() + return self._hash def files_list(self, db): if self._files: diff --git a/lib/importer.py b/lib/importer.py index 34c5f10..8bd08bf 100644 --- a/lib/importer.py +++ b/lib/importer.py @@ -9,10 +9,9 @@ from lib.history import History from lib.obs import OBS from lib.obs_revision import OBSRevision from lib.proxy_sha256 import ProxySHA256, md5, sha256 -from lib.request import Request import xml.etree.ElementTree as ET from lib.user import User - +from lib.tree_builder import TreeBuilder def _files_hash(hash_alg, dirpath): """List of (filepath, md5) for a directory""" @@ -185,6 +184,7 @@ class Importer: self.obs.request(number).import_into_db(db) db.conn.commit() + TreeBuilder(db).build(self.package) def import_all_revisions(self, gc): # Fetch all the requests and sort them. Ideally we should diff --git a/lib/tree_builder.py b/lib/tree_builder.py new file mode 100644 index 0000000..69d260d --- /dev/null +++ b/lib/tree_builder.py @@ -0,0 +1,38 @@ +from lib.db_revision import DBRevision +from lib.request import Request + + +class TreeBuilder: + def __init__(self, db): + self.db = db + + def filtered_revisions(self, project, package): + revisions = DBRevision.all_revisions(self.db, project, package) + revisions.sort() + ret = [] + prev = None + for rev in revisions: + if rev.broken: continue + if prev and prev.files_hash(self.db) == rev.files_hash(self.db): + continue + ret.append(rev) + prev = rev + return ret + + + def build(self, package): + factory_revisions = self.filtered_revisions("openSUSE:Factory", package) + source_revisions = dict() + for rev in factory_revisions: + print(rev, rev.files_hash(self.db)) + if rev.request_id: + req = Request.find(self.db, rev.request_id) + print(" ", req) + key = f"{req.source_project}/{req.source_package}" + if key not in source_revisions: + source_revisions[key] = self.filtered_revisions( + req.source_project, req.source_package + ) + for rev2 in source_revisions.get(key): + if rev2.files_hash(self.db) == rev.files_hash(self.db): + print(" ", rev2) diff --git a/tests/tree_test.py b/tests/tree_test.py index 401ec68..03cd73d 100644 --- a/tests/tree_test.py +++ b/tests/tree_test.py @@ -5,7 +5,7 @@ import yaml from lib.db import DB from lib.db_revision import DBRevision -from lib.request import Request +from lib.tree_builder import TreeBuilder class TestTreeMethods(unittest.TestCase): @@ -18,22 +18,7 @@ class TestTreeMethods(unittest.TestCase): DBRevision.import_fixture_dict(self.db, rev) def test_create_tree(self): - factory_revisions = DBRevision.all_revisions(self.db, "openSUSE:Factory", "zsh") - source_revisions = dict() - for rev in sorted(factory_revisions): - print(rev, rev.fileshash(self.db)) - if rev.request_id: - req = Request.find(self.db, rev.request_id) - print(" ", req) - key = f"{req.source_project}/{req.source_package}" - if key not in source_revisions: - source_revisions[key] = sorted( - DBRevision.all_revisions( - self.db, req.source_project, req.source_package - ) - ) - for rev2 in source_revisions.get(key): - print(rev2, rev2.fileshash(self.db)) + TreeBuilder(self.db).build("zsh") if __name__ == "__main__":