forked from importers/git-importer
		
	Create fake revisions for every commit in the base of a linked package
This is a rather complex operation, but whenever a package changes in Factory, the inherited package gets a shadow revision consisting of the 3-way merge. If this happens due to a request being accepted, this is actually in most cases also commited by 'buildservice-autocommit', so we're making sure this is always happening (and are actually duplicating revisions in cases that we filter out later as empty commits). To differenciate the fake revisions from the real revisions, I add a fraction part
This commit is contained in:
		| @@ -1,3 +1,5 @@ | |||||||
| sudo zypper in python3-psycopg2 | sudo zypper in python3-psycopg2 | ||||||
| sudo su - postgres | sudo su - postgres | ||||||
| # `createdb -O <LOCAL_USER> imported_git` | # `createdb -O <LOCAL_USER> imported_git` | ||||||
|  |  | ||||||
|  | To reset the database, drop table scheme | ||||||
|   | |||||||
							
								
								
									
										34
									
								
								lib/db.py
									
									
									
									
									
								
							
							
						
						
									
										34
									
								
								lib/db.py
									
									
									
									
									
								
							| @@ -1,4 +1,7 @@ | |||||||
|  | import logging | ||||||
|  |  | ||||||
| import psycopg2 | import psycopg2 | ||||||
|  | from psycopg2.extras import LoggingConnection | ||||||
|  |  | ||||||
| from lib.config import config | from lib.config import config | ||||||
|  |  | ||||||
| @@ -14,7 +17,9 @@ class DB: | |||||||
|             # read the connection parameters |             # read the connection parameters | ||||||
|             params = config(section=self.config_section) |             params = config(section=self.config_section) | ||||||
|             # connect to the PostgreSQL server |             # connect to the PostgreSQL server | ||||||
|             self.conn = psycopg2.connect(**params) |             self.conn = psycopg2.connect(connection_factory=LoggingConnection, **params) | ||||||
|  |             logger = logging.getLogger(__name__) | ||||||
|  |             self.conn.initialize(logger) | ||||||
|  |  | ||||||
|         except (Exception, psycopg2.DatabaseError) as error: |         except (Exception, psycopg2.DatabaseError) as error: | ||||||
|             print(error) |             print(error) | ||||||
| @@ -96,6 +101,7 @@ class DB: | |||||||
|             "UPDATE scheme SET version=4", |             "UPDATE scheme SET version=4", | ||||||
|         ) |         ) | ||||||
|         schemes[5] = ( |         schemes[5] = ( | ||||||
|  |             """DROP TABLE IF EXISTS files""", | ||||||
|             """ |             """ | ||||||
|             CREATE TABLE files ( |             CREATE TABLE files ( | ||||||
|                 id              SERIAL PRIMARY KEY, |                 id              SERIAL PRIMARY KEY, | ||||||
| @@ -109,6 +115,7 @@ class DB: | |||||||
|             "UPDATE scheme SET version=5", |             "UPDATE scheme SET version=5", | ||||||
|         ) |         ) | ||||||
|         schemes[6] = ( |         schemes[6] = ( | ||||||
|  |             """DROP TABLE IF EXISTS requests""", | ||||||
|             """ |             """ | ||||||
|             CREATE TABLE requests ( |             CREATE TABLE requests ( | ||||||
|                 id              SERIAL PRIMARY KEY, |                 id              SERIAL PRIMARY KEY, | ||||||
| @@ -154,6 +161,7 @@ class DB: | |||||||
|             "UPDATE scheme SET version=12", |             "UPDATE scheme SET version=12", | ||||||
|         ) |         ) | ||||||
|         schemes[13] = ( |         schemes[13] = ( | ||||||
|  |             """DROP TABLE IF EXISTS linked_revs""", | ||||||
|             """ |             """ | ||||||
|             CREATE TABLE users ( |             CREATE TABLE users ( | ||||||
|                 id              SERIAL PRIMARY KEY, |                 id              SERIAL PRIMARY KEY, | ||||||
| @@ -174,6 +182,30 @@ class DB: | |||||||
|             """, |             """, | ||||||
|             "UPDATE scheme SET version=14", |             "UPDATE scheme SET version=14", | ||||||
|         ) |         ) | ||||||
|  |         schemes[14] = ( | ||||||
|  |             "ALTER TABLE revisions ALTER COLUMN rev TYPE real USING rev::real", | ||||||
|  |             "UPDATE scheme SET version=14", | ||||||
|  |         ) | ||||||
|  |         schemes[15] = ( | ||||||
|  |             """DROP TABLE IF EXISTS fake_revs""", | ||||||
|  |             """ | ||||||
|  |             CREATE TABLE fake_revs ( | ||||||
|  |                 id              SERIAL PRIMARY KEY, | ||||||
|  |                 revision_id     INTEGER NOT NULL, | ||||||
|  |                 linked_id       INTEGER NOT NULL | ||||||
|  |             ) | ||||||
|  |             """, | ||||||
|  |             "create index revs_linked on fake_revs (revision_id,linked_id)", | ||||||
|  |             "UPDATE scheme SET version=15", | ||||||
|  |         ) | ||||||
|  |         schemes[16] = ( | ||||||
|  |             "ALTER TABLE revisions ADD COLUMN files_hash VARCHAR(40)", | ||||||
|  |             "UPDATE scheme SET version=16", | ||||||
|  |         ) | ||||||
|  |         schemes[17] = ( | ||||||
|  |             "ALTER TABLE linked_revs ADD COLUMN considered BOOLEAN DEFAULT FALSE", | ||||||
|  |             "UPDATE scheme SET version=17", | ||||||
|  |         ) | ||||||
|         schema_version = self.schema_version() |         schema_version = self.schema_version() | ||||||
|         if (schema_version + 1) not in schemes: |         if (schema_version + 1) not in schemes: | ||||||
|             return |             return | ||||||
|   | |||||||
| @@ -19,10 +19,10 @@ class DBRevision: | |||||||
|             self.expanded_srcmd5, |             self.expanded_srcmd5, | ||||||
|             self.request_number, |             self.request_number, | ||||||
|             self.request_id, |             self.request_id, | ||||||
|  |             self.files_hash, | ||||||
|         ) = row |         ) = row | ||||||
|         self.rev = int(self.rev) |         self.rev = float(self.rev) | ||||||
|         self._files = None |         self._files = None | ||||||
|         self._hash = None |  | ||||||
|  |  | ||||||
|     def __str__(self): |     def __str__(self): | ||||||
|         return f"Rev {self.project}/{self.package}/{self.rev} Md5 {self.unexpanded_srcmd5} {self.commit_time} {self.userid} {self.request_number}" |         return f"Rev {self.project}/{self.package}/{self.rev} Md5 {self.unexpanded_srcmd5} {self.commit_time} {self.userid} {self.request_number}" | ||||||
| @@ -52,6 +52,7 @@ class DBRevision: | |||||||
|             "comment": self.comment, |             "comment": self.comment, | ||||||
|             "broken": self.broken, |             "broken": self.broken, | ||||||
|             "expanded_srcmd5": self.expanded_srcmd5, |             "expanded_srcmd5": self.expanded_srcmd5, | ||||||
|  |             "files_hash": self.files_hash, | ||||||
|             "files": self.files_list(db), |             "files": self.files_list(db), | ||||||
|         } |         } | ||||||
|         if self.request_id: |         if self.request_id: | ||||||
| @@ -92,6 +93,7 @@ class DBRevision: | |||||||
|                 (project, package, str(rev)), |                 (project, package, str(rev)), | ||||||
|             ) |             ) | ||||||
|             row = cur.fetchone() |             row = cur.fetchone() | ||||||
|  |         if row: | ||||||
|             return DBRevision(row) |             return DBRevision(row) | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
| @@ -103,7 +105,7 @@ class DBRevision: | |||||||
|             ) |             ) | ||||||
|             max = cur.fetchone()[0] |             max = cur.fetchone()[0] | ||||||
|         if max: |         if max: | ||||||
|             return DBRevision.fetch_revision(db, project, package, int(max)) |             return DBRevision.fetch_revision(db, project, package, max) | ||||||
|         return None |         return None | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
| @@ -144,7 +146,6 @@ class DBRevision: | |||||||
|         with db.cursor() as cur: |         with db.cursor() as cur: | ||||||
|             cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,)) |             cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,)) | ||||||
|  |  | ||||||
|  |  | ||||||
|     def import_dir_list(self, db, xml): |     def import_dir_list(self, db, xml): | ||||||
|         with db.cursor() as cur: |         with db.cursor() as cur: | ||||||
|             cur.execute( |             cur.execute( | ||||||
| @@ -164,9 +165,13 @@ class DBRevision: | |||||||
|                     ), |                     ), | ||||||
|                 ) |                 ) | ||||||
|  |  | ||||||
|     def files_hash(self, db): |     def previous_commit(self, db): | ||||||
|         if self._hash: |         return self.fetch_revision(db, self.project, self.package, int(self.rev) - 1) | ||||||
|             return self._hash |  | ||||||
|  |     def next_commit(self, db): | ||||||
|  |         return self.fetch_revision(db, self.project, self.package, int(self.rev) + 1) | ||||||
|  |  | ||||||
|  |     def calculate_files_hash(self, db): | ||||||
|         m = md5() |         m = md5() | ||||||
|         for file_dict in self.files_list(db): |         for file_dict in self.files_list(db): | ||||||
|             m.update( |             m.update( | ||||||
| @@ -178,8 +183,7 @@ class DBRevision: | |||||||
|                     + str(file_dict["size"]) |                     + str(file_dict["size"]) | ||||||
|                 ).encode("utf-8") |                 ).encode("utf-8") | ||||||
|             ) |             ) | ||||||
|         self._hash = m.hexdigest() |         return m.hexdigest() | ||||||
|         return self._hash |  | ||||||
|  |  | ||||||
|     def files_list(self, db): |     def files_list(self, db): | ||||||
|         if self._files: |         if self._files: | ||||||
| @@ -194,7 +198,7 @@ class DBRevision: | |||||||
|         return self._files |         return self._files | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def requests_to_fetch(self, db, project, package): |     def requests_to_fetch(db, project, package): | ||||||
|         with db.cursor() as cur: |         with db.cursor() as cur: | ||||||
|             cur.execute( |             cur.execute( | ||||||
|                 """SELECT request_number FROM revisions revs LEFT JOIN requests |                 """SELECT request_number FROM revisions revs LEFT JOIN requests | ||||||
| @@ -209,8 +213,9 @@ class DBRevision: | |||||||
|         """Used in test cases to read a revision from fixtures into the test database""" |         """Used in test cases to read a revision from fixtures into the test database""" | ||||||
|         with db.cursor() as cur: |         with db.cursor() as cur: | ||||||
|             cur.execute( |             cur.execute( | ||||||
|                 """INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, expanded_srcmd5, commit_time, userid, comment, broken)   |                 """INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, expanded_srcmd5,  | ||||||
|                         VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING id""", |                                         commit_time, userid, comment, broken, files_hash)   | ||||||
|  |                         VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING id""", | ||||||
|                 ( |                 ( | ||||||
|                     rev_dict["project"], |                     rev_dict["project"], | ||||||
|                     rev_dict["package"], |                     rev_dict["package"], | ||||||
| @@ -221,6 +226,7 @@ class DBRevision: | |||||||
|                     rev_dict["userid"], |                     rev_dict["userid"], | ||||||
|                     rev_dict["comment"], |                     rev_dict["comment"], | ||||||
|                     rev_dict["broken"], |                     rev_dict["broken"], | ||||||
|  |                     rev_dict["files_hash"], | ||||||
|                 ), |                 ), | ||||||
|             ) |             ) | ||||||
|             rev_id = cur.fetchone()[0] |             rev_id = cur.fetchone()[0] | ||||||
|   | |||||||
| @@ -13,7 +13,10 @@ class Exporter: | |||||||
|     def run(self): |     def run(self): | ||||||
|         db = DB() |         db = DB() | ||||||
|         cur = db.cursor() |         cur = db.cursor() | ||||||
|         cur.execute("SELECT * from revisions where package=%s", (self.package,)) |         cur.execute( | ||||||
|  |             "SELECT * from revisions where package=%s ORDER BY project,rev", | ||||||
|  |             (self.package,), | ||||||
|  |         ) | ||||||
|         data = {"revisions": []} |         data = {"revisions": []} | ||||||
|         for row in cur.fetchall(): |         for row in cur.fetchall(): | ||||||
|             data["revisions"].append(DBRevision(row).as_dict(db)) |             data["revisions"].append(DBRevision(row).as_dict(db)) | ||||||
|   | |||||||
| @@ -2,6 +2,8 @@ import functools | |||||||
| import logging | import logging | ||||||
| import xml.etree.ElementTree as ET | import xml.etree.ElementTree as ET | ||||||
|  |  | ||||||
|  | import psycopg2 | ||||||
|  |  | ||||||
| from lib.binary import is_binary_or_large | from lib.binary import is_binary_or_large | ||||||
| from lib.db import DB | from lib.db import DB | ||||||
| from lib.db_revision import DBRevision | from lib.db_revision import DBRevision | ||||||
| @@ -165,6 +167,18 @@ class Importer: | |||||||
|                 (rev.dbid, linked_rev.dbid), |                 (rev.dbid, linked_rev.dbid), | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|  |     def calculate_file_hashes(self, db): | ||||||
|  |         cur = db.cursor() | ||||||
|  |         cur.execute( | ||||||
|  |             "SELECT * from revisions where files_hash IS NULL AND broken is FALSE" | ||||||
|  |         ) | ||||||
|  |         for row in cur.fetchall(): | ||||||
|  |             rev = DBRevision(row) | ||||||
|  |             md5 = rev.calculate_files_hash(db) | ||||||
|  |             cur.execute( | ||||||
|  |                 "UPDATE revisions SET files_hash=%s WHERE id=%s", (md5, rev.dbid) | ||||||
|  |             ) | ||||||
|  |  | ||||||
|     def fetch_all_linked_packages(self, db, project, package): |     def fetch_all_linked_packages(self, db, project, package): | ||||||
|         cur = db.cursor() |         cur = db.cursor() | ||||||
|         cur.execute( |         cur.execute( | ||||||
| @@ -176,19 +190,86 @@ class Importer: | |||||||
|             (lproject, lpackage) = row |             (lproject, lpackage) = row | ||||||
|             self.update_db_package(db, lproject, lpackage) |             self.update_db_package(db, lproject, lpackage) | ||||||
|  |  | ||||||
|  |     def find_fake_revisions(self, db): | ||||||
|  |         cur = db.cursor() | ||||||
|  |         cur.execute( | ||||||
|  |             """SELECT * from revisions WHERE | ||||||
|  |                         id in (SELECT revision_id from linked_revs WHERE considered=FALSE) AND | ||||||
|  |                         id not in (SELECT revision_id FROM fake_revs) ORDER by project,package,rev""" | ||||||
|  |         ) | ||||||
|  |         for row in cur.fetchall(): | ||||||
|  |             rev = DBRevision(row) | ||||||
|  |             prev = rev.previous_commit(db) | ||||||
|  |             if not prev: | ||||||
|  |                 cur.execute( | ||||||
|  |                     "UPDATE linked_revs SET considered=TRUE where revision_id=%s", | ||||||
|  |                     (rev.dbid,), | ||||||
|  |                 ) | ||||||
|  |                 continue | ||||||
|  |             cur2 = db.cursor() | ||||||
|  |             cur2.execute( | ||||||
|  |                 """SELECT * from revisions where id in  | ||||||
|  |                             (SELECT revision_id from linked_revs WHERE linked_id=%s)  | ||||||
|  |                             AND commit_time <= %s ORDER BY commit_time""", | ||||||
|  |                 (prev.dbid, rev.commit_time), | ||||||
|  |             ) | ||||||
|  |             last_linked = None | ||||||
|  |             for linked in cur2.fetchall(): | ||||||
|  |                 linked = DBRevision(linked) | ||||||
|  |                 nextrev = linked.next_commit(db) | ||||||
|  |                 if nextrev and nextrev.commit_time < rev.commit_time: | ||||||
|  |                     continue | ||||||
|  |                 last_linked = linked | ||||||
|  |             cur.execute( | ||||||
|  |                 "UPDATE linked_revs SET considered=TRUE where revision_id=%s", | ||||||
|  |                 (rev.dbid,), | ||||||
|  |             ) | ||||||
|  |             if last_linked: | ||||||
|  |                 linked = last_linked | ||||||
|  |                 cur2.execute( | ||||||
|  |                     "SELECT 1 FROM fake_revs where revision_id=%s AND linked_id=%s", | ||||||
|  |                     (rev.dbid, linked.dbid), | ||||||
|  |                 ) | ||||||
|  |                 if cur2.fetchone(): | ||||||
|  |                     cur.execute( | ||||||
|  |                         "UPDATE linked_revs SET considered=TRUE where revision_id=%s", | ||||||
|  |                         (rev.dbid,), | ||||||
|  |                     ) | ||||||
|  |                     continue | ||||||
|  |                 fake_rev = linked.rev + rev.rev / 1000.0 | ||||||
|  |                 comment = f"Updating link to change in {rev.project}/{rev.package} revision {rev.rev}" | ||||||
|  |                 cur2.execute( | ||||||
|  |                     """INSERT INTO revisions (project,package,rev,unexpanded_srcmd5,  | ||||||
|  |                                 commit_time, userid, comment) VALUES(%s,%s,%s,%s,%s,%s,%s) RETURNING id""", | ||||||
|  |                     ( | ||||||
|  |                         linked.project, | ||||||
|  |                         linked.package, | ||||||
|  |                         fake_rev, | ||||||
|  |                         linked.unexpanded_srcmd5, | ||||||
|  |                         rev.commit_time, | ||||||
|  |                         "buildservice-autocommit", | ||||||
|  |                         comment, | ||||||
|  |                     ), | ||||||
|  |                 ) | ||||||
|  |                 new_id = cur2.fetchone()[0] | ||||||
|  |                 cur2.execute( | ||||||
|  |                     """INSERT INTO linked_revs (revision_id, linked_id) VALUES (%s,%s)""", | ||||||
|  |                     (new_id, rev.dbid), | ||||||
|  |                 ) | ||||||
|  |                 cur2.execute( | ||||||
|  |                     """INSERT INTO fake_revs (revision_id, linked_id) VALUES (%s,%s)""", | ||||||
|  |                     (rev.dbid, linked.dbid), | ||||||
|  |                 ) | ||||||
|  |  | ||||||
|     def import_into_db(self): |     def import_into_db(self): | ||||||
|         db = DB() |         db = DB() | ||||||
|         for project, _, api_url in self.projects: |         for project, _, api_url in self.projects: | ||||||
|             self.obs.change_url(api_url) |             self.obs.change_url(api_url) | ||||||
|             self.update_db_package(db, project, self.package) |             self.update_db_package(db, project, self.package) | ||||||
|             with db.cursor() as cur: |             self.fetch_all_linked_packages(db, project, self.package) | ||||||
|                 cur.execute( |             # all remaining, no filtering here | ||||||
|                     "SELECT DISTINCT l.project, l.package from links l join revisions r on r.id=l.revision_id WHERE r.project=%s AND r.package=%s", |             self.find_linked_revs(db) | ||||||
|                     (project, self.package), |             self.find_fake_revisions(db) | ||||||
|                 ) |  | ||||||
|                 for row in cur.fetchall(): |  | ||||||
|                     (lproject, lpackage) = row |  | ||||||
|                     self.update_db_package(db, lproject, lpackage) |  | ||||||
|  |  | ||||||
|             missing_users = User.missing_users(db) |             missing_users = User.missing_users(db) | ||||||
|             for userid in missing_users: |             for userid in missing_users: | ||||||
| @@ -203,7 +284,7 @@ class Importer: | |||||||
|                 with db.cursor() as cur: |                 with db.cursor() as cur: | ||||||
|                     cur.execute( |                     cur.execute( | ||||||
|                         """SELECT unexpanded_srcmd5 from revisions WHERE |                         """SELECT unexpanded_srcmd5 from revisions WHERE | ||||||
|                                 id=(SELECT linked_id FROM linked_revs WHERE revision_id=%s""", |                                 id=(SELECT linked_id FROM linked_revs WHERE revision_id=%s)""", | ||||||
|                         (rev.dbid,), |                         (rev.dbid,), | ||||||
|                     ) |                     ) | ||||||
|                     linked_rev = cur.fetchone() |                     linked_rev = cur.fetchone() | ||||||
|   | |||||||
| @@ -56,7 +56,7 @@ class OBSRevision: | |||||||
|         return self |         return self | ||||||
|  |  | ||||||
|     def __str__(self): |     def __str__(self): | ||||||
|         return f"Rev {self.project}/{self.rev} Md5 {self.srcmd5} {self.time} {self.userid} {self.request_number}" |         return f"Rev {self.project}/{self.package}/{self.rev}.0 Md5 {self.srcmd5} {self.time} {self.userid} {self.request_number}" | ||||||
|  |  | ||||||
|     def __repr__(self): |     def __repr__(self): | ||||||
|         return f"[{self.__str__()}]" |         return f"[{self.__str__()}]" | ||||||
|   | |||||||
| @@ -12,10 +12,10 @@ class TreeBuilder: | |||||||
|         ret = [] |         ret = [] | ||||||
|         prev = None |         prev = None | ||||||
|         for rev in revisions: |         for rev in revisions: | ||||||
|             print(rev, rev.files_hash(self.db)) |             print(rev, rev.files_hash) | ||||||
|             if rev.broken: |             if rev.broken: | ||||||
|                 continue |                 continue | ||||||
|             if prev and prev.files_hash(self.db) == rev.files_hash(self.db): |             if prev and prev.files_hash == rev.files_hash: | ||||||
|                 continue |                 continue | ||||||
|             ret.append(rev) |             ret.append(rev) | ||||||
|             prev = rev |             prev = rev | ||||||
| @@ -25,7 +25,7 @@ class TreeBuilder: | |||||||
|         factory_revisions = self.filtered_revisions("openSUSE:Factory", package) |         factory_revisions = self.filtered_revisions("openSUSE:Factory", package) | ||||||
|         source_revisions = dict() |         source_revisions = dict() | ||||||
|         for rev in factory_revisions: |         for rev in factory_revisions: | ||||||
|             print(rev, rev.files_hash(self.db)) |             print(rev, rev.files_hash) | ||||||
|             if rev.request_id: |             if rev.request_id: | ||||||
|                 req = Request.find(self.db, rev.request_id) |                 req = Request.find(self.db, rev.request_id) | ||||||
|                 print("  ", req) |                 print("  ", req) | ||||||
| @@ -35,5 +35,5 @@ class TreeBuilder: | |||||||
|                         req.source_project, req.source_package |                         req.source_project, req.source_package | ||||||
|                     ) |                     ) | ||||||
|                 for rev2 in source_revisions.get(key): |                 for rev2 in source_revisions.get(key): | ||||||
|                     if rev2.files_hash(self.db) == rev.files_hash(self.db): |                     if rev2.files_hash == rev.files_hash: | ||||||
|                         print("    ", rev2) |                         print("    ", rev2) | ||||||
|   | |||||||
							
								
								
									
										43125
									
								
								tests/fixtures/zsh-data.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										43125
									
								
								tests/fixtures/zsh-data.yaml
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user