forked from importers/git-importer
Refresh the packages in multiple threads
This commit is contained in:
parent
ab38332642
commit
d21ce571f5
@ -111,13 +111,18 @@ class DBRevision:
|
|||||||
return DBRevision(db, row)
|
return DBRevision(db, row)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def latest_revision(db, project, package):
|
def max_rev(db, project, package):
|
||||||
with db.cursor() as cur:
|
with db.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"SELECT MAX(rev) FROM revisions where project=%s and package=%s",
|
"SELECT MAX(rev) FROM revisions where project=%s and package=%s",
|
||||||
(project, package),
|
(project, package),
|
||||||
)
|
)
|
||||||
max = cur.fetchone()[0]
|
return cur.fetchone()[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def latest_revision(db, project, package):
|
||||||
|
max = DBRevision.max_rev(db, project, package)
|
||||||
if max:
|
if max:
|
||||||
return DBRevision.fetch_revision(db, project, package, max)
|
return DBRevision.fetch_revision(db, project, package, max)
|
||||||
return None
|
return None
|
||||||
|
@ -60,6 +60,7 @@ class GitExporter:
|
|||||||
|
|
||||||
gc_cnt = self.gc_interval
|
gc_cnt = self.gc_interval
|
||||||
if len(left_to_commit) > 0:
|
if len(left_to_commit) > 0:
|
||||||
|
logging.info(f"Commiting into {self.git.path}")
|
||||||
self.git.gc()
|
self.git.gc()
|
||||||
for flat in left_to_commit:
|
for flat in left_to_commit:
|
||||||
gc_cnt -= 1
|
gc_cnt -= 1
|
||||||
|
130
lib/importer.py
130
lib/importer.py
@ -1,3 +1,4 @@
|
|||||||
|
import concurrent.futures
|
||||||
import logging
|
import logging
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
@ -8,26 +9,42 @@ from lib.obs_revision import OBSRevision
|
|||||||
from lib.user import User
|
from lib.user import User
|
||||||
|
|
||||||
|
|
||||||
|
def refresh_package(importer, project, package):
|
||||||
|
importer.refresh_package(project, package)
|
||||||
|
|
||||||
|
|
||||||
|
def import_request(importer, number):
|
||||||
|
importer.import_request(number)
|
||||||
|
|
||||||
|
|
||||||
|
def import_rev(importer, rev):
|
||||||
|
importer.import_rev(rev)
|
||||||
|
|
||||||
|
|
||||||
class Importer:
|
class Importer:
|
||||||
def __init__(self, api_url, project, packages):
|
def __init__(self, api_url, project, packages):
|
||||||
# Import multiple Factory packages into the database
|
# Import multiple Factory packages into the database
|
||||||
self.packages = packages
|
self.packages = packages
|
||||||
self.project = project
|
self.project = project
|
||||||
|
|
||||||
|
self.db = DB()
|
||||||
self.obs = OBS()
|
self.obs = OBS()
|
||||||
assert project == "openSUSE:Factory"
|
assert project == "openSUSE:Factory"
|
||||||
self.obs.change_url(api_url)
|
self.obs.change_url(api_url)
|
||||||
self.refreshed_packages = set()
|
self.refreshed_packages = set()
|
||||||
|
|
||||||
def update_db_package(self, db, project, package):
|
def import_request(self, number):
|
||||||
|
self.obs.request(number).import_into_db(self.db)
|
||||||
|
|
||||||
|
def update_db_package(self, project, package):
|
||||||
root = self.obs._history(project, package)
|
root = self.obs._history(project, package)
|
||||||
if root is None:
|
if root is None:
|
||||||
return
|
return
|
||||||
latest = DBRevision.latest_revision(db, project, package)
|
latest = DBRevision.max_rev(self.db, project, package)
|
||||||
for r in root.findall("revision"):
|
for r in root.findall("revision"):
|
||||||
rev = OBSRevision(self.obs, project, package).parse(r)
|
rev = OBSRevision(self.obs, project, package).parse(r)
|
||||||
if not latest or rev.rev > latest.rev:
|
if not latest or rev.rev > latest:
|
||||||
dbrev = DBRevision.import_obs_rev(db, rev)
|
dbrev = DBRevision.import_obs_rev(self.db, rev)
|
||||||
try:
|
try:
|
||||||
root = rev.read_link()
|
root = rev.read_link()
|
||||||
except ET.ParseError:
|
except ET.ParseError:
|
||||||
@ -38,15 +55,15 @@ class Importer:
|
|||||||
tpkg = root.get("package") or package
|
tpkg = root.get("package") or package
|
||||||
dbrev.links_to(tprj, tpkg)
|
dbrev.links_to(tprj, tpkg)
|
||||||
|
|
||||||
def find_linked_revs(self, db):
|
def find_linked_revs(self):
|
||||||
with db.cursor() as cur:
|
with self.db.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"""SELECT * from revisions WHERE id in (SELECT l.revision_id FROM links l
|
"""SELECT * from revisions WHERE id in (SELECT l.revision_id FROM links l
|
||||||
LEFT JOIN linked_revs lrevs ON lrevs.revision_id=l.revision_id
|
LEFT JOIN linked_revs lrevs ON lrevs.revision_id=l.revision_id
|
||||||
WHERE lrevs.id IS NULL) and broken is FALSE;"""
|
WHERE lrevs.id IS NULL) and broken is FALSE;"""
|
||||||
)
|
)
|
||||||
for row in cur.fetchall():
|
for row in cur.fetchall():
|
||||||
rev = DBRevision(db, row)
|
rev = DBRevision(self.db, row)
|
||||||
linked_rev = rev.linked_rev()
|
linked_rev = rev.linked_rev()
|
||||||
if not linked_rev:
|
if not linked_rev:
|
||||||
logging.debug(f"No link {rev}")
|
logging.debug(f"No link {rev}")
|
||||||
@ -57,8 +74,8 @@ class Importer:
|
|||||||
(rev.dbid, linked_rev.dbid),
|
(rev.dbid, linked_rev.dbid),
|
||||||
)
|
)
|
||||||
|
|
||||||
def fetch_all_linked_packages(self, db, project, package):
|
def fetch_all_linked_packages(self, project, package):
|
||||||
with db.cursor() as cur:
|
with self.db.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"""SELECT DISTINCT l.project, l.package from links l JOIN revisions r
|
"""SELECT DISTINCT l.project, l.package from links l JOIN revisions r
|
||||||
on r.id=l.revision_id WHERE r.project=%s AND r.package=%s""",
|
on r.id=l.revision_id WHERE r.project=%s AND r.package=%s""",
|
||||||
@ -67,26 +84,26 @@ class Importer:
|
|||||||
for row in cur.fetchall():
|
for row in cur.fetchall():
|
||||||
(lproject, lpackage) = row
|
(lproject, lpackage) = row
|
||||||
# recurse
|
# recurse
|
||||||
self.refresh_package(db, lproject, lpackage)
|
self.refresh_package(lproject, lpackage)
|
||||||
|
|
||||||
def find_fake_revisions(self, db):
|
def find_fake_revisions(self):
|
||||||
with db.cursor() as cur:
|
with self.db.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"SELECT * from revisions WHERE id in (SELECT linked_id from linked_revs WHERE considered=FALSE)"
|
"SELECT * from revisions WHERE id in (SELECT linked_id from linked_revs WHERE considered=FALSE)"
|
||||||
)
|
)
|
||||||
for row in cur.fetchall():
|
for row in cur.fetchall():
|
||||||
self._find_fake_revision(db, DBRevision(db, row))
|
self._find_fake_revision(DBRevision(self.db, row))
|
||||||
|
|
||||||
def _find_fake_revision(self, db, rev):
|
def _find_fake_revision(self, rev):
|
||||||
prev = rev.previous_commit()
|
prev = rev.previous_commit()
|
||||||
if not prev:
|
if not prev:
|
||||||
with db.cursor() as cur:
|
with self.db.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"UPDATE linked_revs SET considered=TRUE where linked_id=%s",
|
"UPDATE linked_revs SET considered=TRUE where linked_id=%s",
|
||||||
(rev.dbid,),
|
(rev.dbid,),
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
with db.cursor() as cur:
|
with self.db.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"""SELECT * FROM revisions WHERE id IN
|
"""SELECT * FROM revisions WHERE id IN
|
||||||
(SELECT revision_id from linked_revs WHERE linked_id=%s)
|
(SELECT revision_id from linked_revs WHERE linked_id=%s)
|
||||||
@ -95,7 +112,7 @@ class Importer:
|
|||||||
)
|
)
|
||||||
last_linked = None
|
last_linked = None
|
||||||
for linked in cur.fetchall():
|
for linked in cur.fetchall():
|
||||||
linked = DBRevision(db, linked)
|
linked = DBRevision(self.db, linked)
|
||||||
nextrev = linked.next_commit()
|
nextrev = linked.next_commit()
|
||||||
if nextrev and nextrev.commit_time < rev.commit_time:
|
if nextrev and nextrev.commit_time < rev.commit_time:
|
||||||
continue
|
continue
|
||||||
@ -107,7 +124,7 @@ class Importer:
|
|||||||
if not last_linked:
|
if not last_linked:
|
||||||
return
|
return
|
||||||
|
|
||||||
with db.cursor() as cur:
|
with self.db.cursor() as cur:
|
||||||
linked = last_linked
|
linked = last_linked
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"SELECT 1 FROM fake_revs where revision_id=%s AND linked_id=%s",
|
"SELECT 1 FROM fake_revs where revision_id=%s AND linked_id=%s",
|
||||||
@ -144,19 +161,15 @@ class Importer:
|
|||||||
(rev.dbid, linked.dbid),
|
(rev.dbid, linked.dbid),
|
||||||
)
|
)
|
||||||
|
|
||||||
def revisions_without_files(self, db):
|
def revisions_without_files(self):
|
||||||
with db.cursor() as cur:
|
with self.db.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"SELECT * FROM revisions WHERE broken=FALSE AND expanded_srcmd5 IS NULL"
|
"SELECT * FROM revisions WHERE broken=FALSE AND expanded_srcmd5 IS NULL"
|
||||||
)
|
)
|
||||||
return [DBRevision(db, row) for row in cur.fetchall()]
|
return [DBRevision(self.db, row) for row in cur.fetchall()]
|
||||||
|
|
||||||
def fill_file_lists(self, db):
|
def import_rev(self, rev):
|
||||||
self.find_linked_revs(db)
|
with self.db.cursor() as cur:
|
||||||
|
|
||||||
self.find_fake_revisions(db)
|
|
||||||
for rev in self.revisions_without_files(db):
|
|
||||||
with db.cursor() as cur:
|
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"""SELECT unexpanded_srcmd5 from revisions WHERE
|
"""SELECT unexpanded_srcmd5 from revisions WHERE
|
||||||
id=(SELECT linked_id FROM linked_revs WHERE revision_id=%s)""",
|
id=(SELECT linked_id FROM linked_revs WHERE revision_id=%s)""",
|
||||||
@ -171,7 +184,7 @@ class Importer:
|
|||||||
if list:
|
if list:
|
||||||
rev.import_dir_list(list)
|
rev.import_dir_list(list)
|
||||||
md5 = rev.calculate_files_hash()
|
md5 = rev.calculate_files_hash()
|
||||||
with db.cursor() as cur:
|
with self.db.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"UPDATE revisions SET files_hash=%s WHERE id=%s",
|
"UPDATE revisions SET files_hash=%s WHERE id=%s",
|
||||||
(md5, rev.dbid),
|
(md5, rev.dbid),
|
||||||
@ -179,42 +192,65 @@ class Importer:
|
|||||||
else:
|
else:
|
||||||
rev.set_broken()
|
rev.set_broken()
|
||||||
|
|
||||||
def refresh_package(self, db, project, package):
|
def fill_file_lists(self):
|
||||||
|
self.find_linked_revs()
|
||||||
|
|
||||||
|
self.find_fake_revisions()
|
||||||
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
|
fs = [
|
||||||
|
executor.submit(import_rev, self, rev)
|
||||||
|
for rev in self.revisions_without_files()
|
||||||
|
]
|
||||||
|
concurrent.futures.wait(fs)
|
||||||
|
|
||||||
|
def refresh_package(self, project, package):
|
||||||
key = f"{project}/{package}"
|
key = f"{project}/{package}"
|
||||||
if key in self.refreshed_packages:
|
if key in self.refreshed_packages:
|
||||||
# refreshing once is good enough
|
# refreshing once is good enough
|
||||||
return
|
return
|
||||||
|
logging.info(f"Refresh {project}/{package}")
|
||||||
self.refreshed_packages.add(key)
|
self.refreshed_packages.add(key)
|
||||||
self.update_db_package(db, project, package)
|
self.update_db_package(project, package)
|
||||||
self.fetch_all_linked_packages(db, project, package)
|
self.fetch_all_linked_packages(project, package)
|
||||||
|
|
||||||
def import_into_db(self):
|
def import_into_db(self):
|
||||||
db = DB()
|
|
||||||
|
|
||||||
for package in self.packages:
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
self.refresh_package(db, self.project, package)
|
fs = [
|
||||||
db.conn.commit()
|
executor.submit(refresh_package, self, self.project, package)
|
||||||
|
for package in self.packages
|
||||||
|
]
|
||||||
|
concurrent.futures.wait(fs)
|
||||||
|
|
||||||
for number in DBRevision.requests_to_fetch(db):
|
self.db.conn.commit()
|
||||||
self.obs.request(number).import_into_db(db)
|
|
||||||
db.conn.commit()
|
|
||||||
|
|
||||||
with db.cursor() as cur:
|
fs = [
|
||||||
|
executor.submit(import_request, self, number)
|
||||||
|
for number in DBRevision.requests_to_fetch(self.db)
|
||||||
|
]
|
||||||
|
concurrent.futures.wait(fs)
|
||||||
|
|
||||||
|
self.db.conn.commit()
|
||||||
|
|
||||||
|
with self.db.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"""SELECT DISTINCT source_project,source_package FROM requests
|
"""SELECT DISTINCT source_project,source_package FROM requests
|
||||||
WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""",
|
WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""",
|
||||||
(self.project, self.packages),
|
(self.project, self.packages),
|
||||||
)
|
)
|
||||||
for project, package in cur.fetchall():
|
fs = [
|
||||||
self.refresh_package(db, project, package)
|
executor.submit(refresh_package, self, project, package)
|
||||||
db.conn.commit()
|
for project, package in cur.fetchall()
|
||||||
|
]
|
||||||
|
concurrent.futures.wait(fs)
|
||||||
|
self.db.conn.commit()
|
||||||
|
|
||||||
missing_users = User.missing_users(db)
|
missing_users = User.missing_users(self.db)
|
||||||
for userid in missing_users:
|
for userid in missing_users:
|
||||||
missing_user = self.obs.user(userid)
|
missing_user = self.obs.user(userid)
|
||||||
if missing_user:
|
if missing_user:
|
||||||
missing_user.import_into_db(db)
|
missing_user.import_into_db(self.db)
|
||||||
db.conn.commit()
|
self.db.conn.commit()
|
||||||
|
|
||||||
self.fill_file_lists(db)
|
self.fill_file_lists()
|
||||||
db.conn.commit()
|
self.db.conn.commit()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user