forked from importers/git-importer
		
	Refresh the packages in multiple threads
This commit is contained in:
		| @@ -111,13 +111,18 @@ class DBRevision: | |||||||
|             return DBRevision(db, row) |             return DBRevision(db, row) | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def latest_revision(db, project, package): |     def max_rev(db, project, package): | ||||||
|         with db.cursor() as cur: |         with db.cursor() as cur: | ||||||
|             cur.execute( |             cur.execute( | ||||||
|                 "SELECT MAX(rev) FROM revisions where project=%s and package=%s", |                 "SELECT MAX(rev) FROM revisions where project=%s and package=%s", | ||||||
|                 (project, package), |                 (project, package), | ||||||
|             ) |             ) | ||||||
|             max = cur.fetchone()[0] |             return cur.fetchone()[0] | ||||||
|  |         return None | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def latest_revision(db, project, package): | ||||||
|  |         max = DBRevision.max_rev(db, project, package) | ||||||
|         if max: |         if max: | ||||||
|             return DBRevision.fetch_revision(db, project, package, max) |             return DBRevision.fetch_revision(db, project, package, max) | ||||||
|         return None |         return None | ||||||
|   | |||||||
| @@ -60,6 +60,7 @@ class GitExporter: | |||||||
|  |  | ||||||
|         gc_cnt = self.gc_interval |         gc_cnt = self.gc_interval | ||||||
|         if len(left_to_commit) > 0: |         if len(left_to_commit) > 0: | ||||||
|  |             logging.info(f"Commiting into {self.git.path}") | ||||||
|             self.git.gc() |             self.git.gc() | ||||||
|         for flat in left_to_commit: |         for flat in left_to_commit: | ||||||
|             gc_cnt -= 1 |             gc_cnt -= 1 | ||||||
|   | |||||||
							
								
								
									
										130
									
								
								lib/importer.py
									
									
									
									
									
								
							
							
						
						
									
										130
									
								
								lib/importer.py
									
									
									
									
									
								
							| @@ -1,3 +1,4 @@ | |||||||
|  | import concurrent.futures | ||||||
| import logging | import logging | ||||||
| import xml.etree.ElementTree as ET | import xml.etree.ElementTree as ET | ||||||
|  |  | ||||||
| @@ -8,26 +9,42 @@ from lib.obs_revision import OBSRevision | |||||||
| from lib.user import User | from lib.user import User | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def refresh_package(importer, project, package): | ||||||
|  |     importer.refresh_package(project, package) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def import_request(importer, number): | ||||||
|  |     importer.import_request(number) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def import_rev(importer, rev): | ||||||
|  |     importer.import_rev(rev) | ||||||
|  |  | ||||||
|  |  | ||||||
| class Importer: | class Importer: | ||||||
|     def __init__(self, api_url, project, packages): |     def __init__(self, api_url, project, packages): | ||||||
|         # Import multiple Factory packages into the database |         # Import multiple Factory packages into the database | ||||||
|         self.packages = packages |         self.packages = packages | ||||||
|         self.project = project |         self.project = project | ||||||
|  |  | ||||||
|  |         self.db = DB() | ||||||
|         self.obs = OBS() |         self.obs = OBS() | ||||||
|         assert project == "openSUSE:Factory" |         assert project == "openSUSE:Factory" | ||||||
|         self.obs.change_url(api_url) |         self.obs.change_url(api_url) | ||||||
|         self.refreshed_packages = set() |         self.refreshed_packages = set() | ||||||
|  |  | ||||||
|     def update_db_package(self, db, project, package): |     def import_request(self, number): | ||||||
|  |         self.obs.request(number).import_into_db(self.db) | ||||||
|  |  | ||||||
|  |     def update_db_package(self, project, package): | ||||||
|         root = self.obs._history(project, package) |         root = self.obs._history(project, package) | ||||||
|         if root is None: |         if root is None: | ||||||
|             return |             return | ||||||
|         latest = DBRevision.latest_revision(db, project, package) |         latest = DBRevision.max_rev(self.db, project, package) | ||||||
|         for r in root.findall("revision"): |         for r in root.findall("revision"): | ||||||
|             rev = OBSRevision(self.obs, project, package).parse(r) |             rev = OBSRevision(self.obs, project, package).parse(r) | ||||||
|             if not latest or rev.rev > latest.rev: |             if not latest or rev.rev > latest: | ||||||
|                 dbrev = DBRevision.import_obs_rev(db, rev) |                 dbrev = DBRevision.import_obs_rev(self.db, rev) | ||||||
|                 try: |                 try: | ||||||
|                     root = rev.read_link() |                     root = rev.read_link() | ||||||
|                 except ET.ParseError: |                 except ET.ParseError: | ||||||
| @@ -38,15 +55,15 @@ class Importer: | |||||||
|                     tpkg = root.get("package") or package |                     tpkg = root.get("package") or package | ||||||
|                     dbrev.links_to(tprj, tpkg) |                     dbrev.links_to(tprj, tpkg) | ||||||
|  |  | ||||||
|     def find_linked_revs(self, db): |     def find_linked_revs(self): | ||||||
|         with db.cursor() as cur: |         with self.db.cursor() as cur: | ||||||
|             cur.execute( |             cur.execute( | ||||||
|                 """SELECT * from revisions WHERE id in (SELECT l.revision_id FROM links l |                 """SELECT * from revisions WHERE id in (SELECT l.revision_id FROM links l | ||||||
|                             LEFT JOIN linked_revs lrevs ON lrevs.revision_id=l.revision_id |                             LEFT JOIN linked_revs lrevs ON lrevs.revision_id=l.revision_id | ||||||
|                             WHERE lrevs.id IS NULL) and broken is FALSE;""" |                             WHERE lrevs.id IS NULL) and broken is FALSE;""" | ||||||
|             ) |             ) | ||||||
|             for row in cur.fetchall(): |             for row in cur.fetchall(): | ||||||
|                 rev = DBRevision(db, row) |                 rev = DBRevision(self.db, row) | ||||||
|                 linked_rev = rev.linked_rev() |                 linked_rev = rev.linked_rev() | ||||||
|                 if not linked_rev: |                 if not linked_rev: | ||||||
|                     logging.debug(f"No link {rev}") |                     logging.debug(f"No link {rev}") | ||||||
| @@ -57,8 +74,8 @@ class Importer: | |||||||
|                     (rev.dbid, linked_rev.dbid), |                     (rev.dbid, linked_rev.dbid), | ||||||
|                 ) |                 ) | ||||||
|  |  | ||||||
|     def fetch_all_linked_packages(self, db, project, package): |     def fetch_all_linked_packages(self, project, package): | ||||||
|         with db.cursor() as cur: |         with self.db.cursor() as cur: | ||||||
|             cur.execute( |             cur.execute( | ||||||
|                 """SELECT DISTINCT l.project, l.package from links l JOIN revisions r |                 """SELECT DISTINCT l.project, l.package from links l JOIN revisions r | ||||||
|                     on r.id=l.revision_id WHERE r.project=%s AND r.package=%s""", |                     on r.id=l.revision_id WHERE r.project=%s AND r.package=%s""", | ||||||
| @@ -67,26 +84,26 @@ class Importer: | |||||||
|             for row in cur.fetchall(): |             for row in cur.fetchall(): | ||||||
|                 (lproject, lpackage) = row |                 (lproject, lpackage) = row | ||||||
|                 # recurse |                 # recurse | ||||||
|                 self.refresh_package(db, lproject, lpackage) |                 self.refresh_package(lproject, lpackage) | ||||||
|  |  | ||||||
|     def find_fake_revisions(self, db): |     def find_fake_revisions(self): | ||||||
|         with db.cursor() as cur: |         with self.db.cursor() as cur: | ||||||
|             cur.execute( |             cur.execute( | ||||||
|                 "SELECT * from revisions WHERE id in (SELECT linked_id from linked_revs WHERE considered=FALSE)" |                 "SELECT * from revisions WHERE id in (SELECT linked_id from linked_revs WHERE considered=FALSE)" | ||||||
|             ) |             ) | ||||||
|             for row in cur.fetchall(): |             for row in cur.fetchall(): | ||||||
|                 self._find_fake_revision(db, DBRevision(db, row)) |                 self._find_fake_revision(DBRevision(self.db, row)) | ||||||
|  |  | ||||||
|     def _find_fake_revision(self, db, rev): |     def _find_fake_revision(self, rev): | ||||||
|         prev = rev.previous_commit() |         prev = rev.previous_commit() | ||||||
|         if not prev: |         if not prev: | ||||||
|             with db.cursor() as cur: |             with self.db.cursor() as cur: | ||||||
|                 cur.execute( |                 cur.execute( | ||||||
|                     "UPDATE linked_revs SET considered=TRUE where linked_id=%s", |                     "UPDATE linked_revs SET considered=TRUE where linked_id=%s", | ||||||
|                     (rev.dbid,), |                     (rev.dbid,), | ||||||
|                 ) |                 ) | ||||||
|             return |             return | ||||||
|         with db.cursor() as cur: |         with self.db.cursor() as cur: | ||||||
|             cur.execute( |             cur.execute( | ||||||
|                 """SELECT * FROM revisions WHERE id IN |                 """SELECT * FROM revisions WHERE id IN | ||||||
|                             (SELECT revision_id from linked_revs WHERE linked_id=%s)  |                             (SELECT revision_id from linked_revs WHERE linked_id=%s)  | ||||||
| @@ -95,7 +112,7 @@ class Importer: | |||||||
|             ) |             ) | ||||||
|             last_linked = None |             last_linked = None | ||||||
|             for linked in cur.fetchall(): |             for linked in cur.fetchall(): | ||||||
|                 linked = DBRevision(db, linked) |                 linked = DBRevision(self.db, linked) | ||||||
|                 nextrev = linked.next_commit() |                 nextrev = linked.next_commit() | ||||||
|                 if nextrev and nextrev.commit_time < rev.commit_time: |                 if nextrev and nextrev.commit_time < rev.commit_time: | ||||||
|                     continue |                     continue | ||||||
| @@ -107,7 +124,7 @@ class Importer: | |||||||
|         if not last_linked: |         if not last_linked: | ||||||
|             return |             return | ||||||
|  |  | ||||||
|         with db.cursor() as cur: |         with self.db.cursor() as cur: | ||||||
|             linked = last_linked |             linked = last_linked | ||||||
|             cur.execute( |             cur.execute( | ||||||
|                 "SELECT 1 FROM fake_revs where revision_id=%s AND linked_id=%s", |                 "SELECT 1 FROM fake_revs where revision_id=%s AND linked_id=%s", | ||||||
| @@ -144,19 +161,15 @@ class Importer: | |||||||
|                 (rev.dbid, linked.dbid), |                 (rev.dbid, linked.dbid), | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|     def revisions_without_files(self, db): |     def revisions_without_files(self): | ||||||
|         with db.cursor() as cur: |         with self.db.cursor() as cur: | ||||||
|             cur.execute( |             cur.execute( | ||||||
|                 "SELECT * FROM revisions WHERE broken=FALSE AND expanded_srcmd5 IS NULL" |                 "SELECT * FROM revisions WHERE broken=FALSE AND expanded_srcmd5 IS NULL" | ||||||
|             ) |             ) | ||||||
|             return [DBRevision(db, row) for row in cur.fetchall()] |             return [DBRevision(self.db, row) for row in cur.fetchall()] | ||||||
|  |  | ||||||
|     def fill_file_lists(self, db): |     def import_rev(self, rev): | ||||||
|         self.find_linked_revs(db) |         with self.db.cursor() as cur: | ||||||
|  |  | ||||||
|         self.find_fake_revisions(db) |  | ||||||
|         for rev in self.revisions_without_files(db): |  | ||||||
|             with db.cursor() as cur: |  | ||||||
|             cur.execute( |             cur.execute( | ||||||
|                 """SELECT unexpanded_srcmd5 from revisions WHERE |                 """SELECT unexpanded_srcmd5 from revisions WHERE | ||||||
|                         id=(SELECT linked_id FROM linked_revs WHERE revision_id=%s)""", |                         id=(SELECT linked_id FROM linked_revs WHERE revision_id=%s)""", | ||||||
| @@ -171,7 +184,7 @@ class Importer: | |||||||
|         if list: |         if list: | ||||||
|             rev.import_dir_list(list) |             rev.import_dir_list(list) | ||||||
|             md5 = rev.calculate_files_hash() |             md5 = rev.calculate_files_hash() | ||||||
|                 with db.cursor() as cur: |             with self.db.cursor() as cur: | ||||||
|                 cur.execute( |                 cur.execute( | ||||||
|                     "UPDATE revisions SET files_hash=%s WHERE id=%s", |                     "UPDATE revisions SET files_hash=%s WHERE id=%s", | ||||||
|                     (md5, rev.dbid), |                     (md5, rev.dbid), | ||||||
| @@ -179,42 +192,65 @@ class Importer: | |||||||
|         else: |         else: | ||||||
|             rev.set_broken() |             rev.set_broken() | ||||||
|  |  | ||||||
|     def refresh_package(self, db, project, package): |     def fill_file_lists(self): | ||||||
|  |         self.find_linked_revs() | ||||||
|  |  | ||||||
|  |         self.find_fake_revisions() | ||||||
|  |         with concurrent.futures.ThreadPoolExecutor() as executor: | ||||||
|  |             fs = [ | ||||||
|  |                 executor.submit(import_rev, self, rev) | ||||||
|  |                 for rev in self.revisions_without_files() | ||||||
|  |             ] | ||||||
|  |         concurrent.futures.wait(fs) | ||||||
|  |  | ||||||
|  |     def refresh_package(self, project, package): | ||||||
|         key = f"{project}/{package}" |         key = f"{project}/{package}" | ||||||
|         if key in self.refreshed_packages: |         if key in self.refreshed_packages: | ||||||
|             # refreshing once is good enough |             # refreshing once is good enough | ||||||
|             return |             return | ||||||
|  |         logging.info(f"Refresh {project}/{package}") | ||||||
|         self.refreshed_packages.add(key) |         self.refreshed_packages.add(key) | ||||||
|         self.update_db_package(db, project, package) |         self.update_db_package(project, package) | ||||||
|         self.fetch_all_linked_packages(db, project, package) |         self.fetch_all_linked_packages(project, package) | ||||||
|  |  | ||||||
|     def import_into_db(self): |     def import_into_db(self): | ||||||
|         db = DB() |  | ||||||
|  |  | ||||||
|         for package in self.packages: |         with concurrent.futures.ThreadPoolExecutor() as executor: | ||||||
|             self.refresh_package(db, self.project, package) |             fs = [ | ||||||
|         db.conn.commit() |                 executor.submit(refresh_package, self, self.project, package) | ||||||
|  |                 for package in self.packages | ||||||
|  |             ] | ||||||
|  |             concurrent.futures.wait(fs) | ||||||
|  |  | ||||||
|         for number in DBRevision.requests_to_fetch(db): |             self.db.conn.commit() | ||||||
|             self.obs.request(number).import_into_db(db) |  | ||||||
|         db.conn.commit() |  | ||||||
|  |  | ||||||
|         with db.cursor() as cur: |             fs = [ | ||||||
|  |                 executor.submit(import_request, self, number) | ||||||
|  |                 for number in DBRevision.requests_to_fetch(self.db) | ||||||
|  |             ] | ||||||
|  |             concurrent.futures.wait(fs) | ||||||
|  |  | ||||||
|  |             self.db.conn.commit() | ||||||
|  |  | ||||||
|  |             with self.db.cursor() as cur: | ||||||
|                 cur.execute( |                 cur.execute( | ||||||
|                     """SELECT DISTINCT source_project,source_package FROM requests  |                     """SELECT DISTINCT source_project,source_package FROM requests  | ||||||
|                             WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""", |                             WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""", | ||||||
|                     (self.project, self.packages), |                     (self.project, self.packages), | ||||||
|                 ) |                 ) | ||||||
|             for project, package in cur.fetchall(): |                 fs = [ | ||||||
|                 self.refresh_package(db, project, package) |                     executor.submit(refresh_package, self, project, package) | ||||||
|         db.conn.commit() |                     for project, package in cur.fetchall() | ||||||
|  |                 ] | ||||||
|  |                 concurrent.futures.wait(fs) | ||||||
|  |         self.db.conn.commit() | ||||||
|  |  | ||||||
|         missing_users = User.missing_users(db) |         missing_users = User.missing_users(self.db) | ||||||
|         for userid in missing_users: |         for userid in missing_users: | ||||||
|             missing_user = self.obs.user(userid) |             missing_user = self.obs.user(userid) | ||||||
|             if missing_user: |             if missing_user: | ||||||
|                 missing_user.import_into_db(db) |                 missing_user.import_into_db(self.db) | ||||||
|         db.conn.commit() |         self.db.conn.commit() | ||||||
|  |  | ||||||
|         self.fill_file_lists(db) |         self.fill_file_lists() | ||||||
|         db.conn.commit() |         self.db.conn.commit() | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user