2022-10-17 15:17:25 +02:00
|
|
|
import logging
|
2022-12-02 11:00:31 +01:00
|
|
|
import pathlib
|
2022-10-24 12:01:28 +02:00
|
|
|
import xml.etree.ElementTree as ET
|
2022-10-17 15:17:25 +02:00
|
|
|
|
2022-10-17 20:39:03 +02:00
|
|
|
from lib.db import DB
|
2022-10-18 13:13:52 +02:00
|
|
|
from lib.db_revision import DBRevision
|
2022-10-17 19:54:47 +02:00
|
|
|
from lib.obs import OBS
|
2022-10-18 12:17:43 +02:00
|
|
|
from lib.obs_revision import OBSRevision
|
2022-11-02 07:59:25 +01:00
|
|
|
from lib.user import User
|
2022-11-01 13:09:27 +01:00
|
|
|
|
2022-11-02 08:50:54 +01:00
|
|
|
|
2022-11-03 22:04:45 +01:00
|
|
|
def refresh_package(importer, project, package):
|
|
|
|
importer.refresh_package(project, package)
|
|
|
|
|
|
|
|
|
|
|
|
def import_request(importer, number):
|
|
|
|
importer.import_request(number)
|
|
|
|
|
|
|
|
|
|
|
|
def import_rev(importer, rev):
|
|
|
|
importer.import_rev(rev)
|
|
|
|
|
|
|
|
|
2022-10-17 15:17:25 +02:00
|
|
|
class Importer:
|
2022-11-03 20:14:56 +01:00
|
|
|
def __init__(self, api_url, project, packages):
|
|
|
|
# Import multiple Factory packages into the database
|
|
|
|
self.packages = packages
|
2022-11-02 07:59:25 +01:00
|
|
|
self.project = project
|
2024-08-08 10:34:37 +02:00
|
|
|
self.scmsync_cache = dict()
|
|
|
|
self.packages_with_scmsync = set()
|
2022-10-17 15:17:25 +02:00
|
|
|
|
2022-11-03 22:04:45 +01:00
|
|
|
self.db = DB()
|
2022-11-06 10:57:32 +01:00
|
|
|
self.obs = OBS(api_url)
|
2024-08-08 10:34:37 +02:00
|
|
|
assert not self.has_scmsync(project)
|
2022-11-02 08:50:54 +01:00
|
|
|
self.refreshed_packages = set()
|
2022-12-02 11:00:31 +01:00
|
|
|
self.gone_packages_set = None
|
2022-11-02 08:50:54 +01:00
|
|
|
|
2024-08-08 10:34:37 +02:00
|
|
|
|
2022-11-03 22:04:45 +01:00
|
|
|
def import_request(self, number):
|
|
|
|
self.obs.request(number).import_into_db(self.db)
|
|
|
|
|
|
|
|
def update_db_package(self, project, package):
|
2022-10-18 12:17:43 +02:00
|
|
|
root = self.obs._history(project, package)
|
|
|
|
if root is None:
|
|
|
|
return
|
2022-11-03 22:04:45 +01:00
|
|
|
latest = DBRevision.max_rev(self.db, project, package)
|
2022-10-18 12:17:43 +02:00
|
|
|
for r in root.findall("revision"):
|
2022-11-01 11:34:10 +01:00
|
|
|
rev = OBSRevision(self.obs, project, package).parse(r)
|
2022-11-03 22:04:45 +01:00
|
|
|
if not latest or rev.rev > latest:
|
|
|
|
dbrev = DBRevision.import_obs_rev(self.db, rev)
|
2022-10-18 21:36:58 +02:00
|
|
|
try:
|
|
|
|
root = rev.read_link()
|
|
|
|
except ET.ParseError:
|
2022-11-02 18:07:24 +01:00
|
|
|
dbrev.set_broken()
|
2022-10-18 21:36:58 +02:00
|
|
|
continue
|
2022-10-18 15:40:11 +02:00
|
|
|
if root is not None:
|
2022-10-18 13:13:52 +02:00
|
|
|
tprj = root.get("project") or project
|
2022-10-18 14:29:54 +02:00
|
|
|
tpkg = root.get("package") or package
|
2022-11-02 18:07:24 +01:00
|
|
|
dbrev.links_to(tprj, tpkg)
|
2022-10-18 12:17:43 +02:00
|
|
|
|
2022-11-03 22:04:45 +01:00
|
|
|
def find_linked_revs(self):
|
|
|
|
with self.db.cursor() as cur:
|
2022-10-24 12:01:28 +02:00
|
|
|
cur.execute(
|
2022-10-26 15:49:14 +02:00
|
|
|
"""SELECT * from revisions WHERE id in (SELECT l.revision_id FROM links l
|
|
|
|
LEFT JOIN linked_revs lrevs ON lrevs.revision_id=l.revision_id
|
|
|
|
WHERE lrevs.id IS NULL) and broken is FALSE;"""
|
2022-10-24 12:01:28 +02:00
|
|
|
)
|
2022-10-26 15:49:14 +02:00
|
|
|
for row in cur.fetchall():
|
2022-11-03 22:04:45 +01:00
|
|
|
rev = DBRevision(self.db, row)
|
2022-11-02 18:07:24 +01:00
|
|
|
linked_rev = rev.linked_rev()
|
2022-10-26 15:49:14 +02:00
|
|
|
if not linked_rev:
|
|
|
|
logging.debug(f"No link {rev}")
|
|
|
|
continue
|
|
|
|
cur.execute(
|
|
|
|
"""INSERT INTO linked_revs (revision_id, linked_id)
|
|
|
|
VALUES (%s,%s)""",
|
|
|
|
(rev.dbid, linked_rev.dbid),
|
|
|
|
)
|
2022-10-24 12:01:28 +02:00
|
|
|
|
2022-11-03 22:04:45 +01:00
|
|
|
def fetch_all_linked_packages(self, project, package):
|
|
|
|
with self.db.cursor() as cur:
|
2022-10-26 15:49:14 +02:00
|
|
|
cur.execute(
|
|
|
|
"""SELECT DISTINCT l.project, l.package from links l JOIN revisions r
|
|
|
|
on r.id=l.revision_id WHERE r.project=%s AND r.package=%s""",
|
|
|
|
(project, package),
|
|
|
|
)
|
|
|
|
for row in cur.fetchall():
|
|
|
|
(lproject, lpackage) = row
|
2022-11-02 10:52:53 +01:00
|
|
|
# recurse
|
2022-11-03 22:04:45 +01:00
|
|
|
self.refresh_package(lproject, lpackage)
|
2022-10-24 12:01:28 +02:00
|
|
|
|
2022-11-03 22:04:45 +01:00
|
|
|
def find_fake_revisions(self):
|
|
|
|
with self.db.cursor() as cur:
|
2022-10-26 15:49:14 +02:00
|
|
|
cur.execute(
|
2022-10-27 07:33:46 +02:00
|
|
|
"SELECT * from revisions WHERE id in (SELECT linked_id from linked_revs WHERE considered=FALSE)"
|
2022-10-26 15:49:14 +02:00
|
|
|
)
|
|
|
|
for row in cur.fetchall():
|
2022-11-03 22:04:45 +01:00
|
|
|
self._find_fake_revision(DBRevision(self.db, row))
|
2022-10-26 15:49:14 +02:00
|
|
|
|
2022-11-03 22:04:45 +01:00
|
|
|
def _find_fake_revision(self, rev):
|
2022-11-02 18:07:24 +01:00
|
|
|
prev = rev.previous_commit()
|
2022-10-26 15:49:14 +02:00
|
|
|
if not prev:
|
2022-11-03 22:04:45 +01:00
|
|
|
with self.db.cursor() as cur:
|
2022-10-24 20:21:53 +02:00
|
|
|
cur.execute(
|
2022-10-27 10:16:20 +02:00
|
|
|
"UPDATE linked_revs SET considered=TRUE where linked_id=%s",
|
2022-10-24 20:21:53 +02:00
|
|
|
(rev.dbid,),
|
|
|
|
)
|
2022-10-26 15:49:14 +02:00
|
|
|
return
|
2022-11-03 22:04:45 +01:00
|
|
|
with self.db.cursor() as cur:
|
2022-10-26 15:49:14 +02:00
|
|
|
cur.execute(
|
2022-10-26 22:18:16 +02:00
|
|
|
"""SELECT * FROM revisions WHERE id IN
|
2023-11-28 23:36:44 +01:00
|
|
|
(SELECT revision_id from linked_revs WHERE linked_id=%s)
|
2022-10-24 20:21:53 +02:00
|
|
|
AND commit_time <= %s ORDER BY commit_time""",
|
|
|
|
(prev.dbid, rev.commit_time),
|
|
|
|
)
|
|
|
|
last_linked = None
|
2022-10-26 15:49:14 +02:00
|
|
|
for linked in cur.fetchall():
|
2022-11-03 22:04:45 +01:00
|
|
|
linked = DBRevision(self.db, linked)
|
2022-11-02 18:07:24 +01:00
|
|
|
nextrev = linked.next_commit()
|
2022-10-24 20:21:53 +02:00
|
|
|
if nextrev and nextrev.commit_time < rev.commit_time:
|
|
|
|
continue
|
|
|
|
last_linked = linked
|
|
|
|
cur.execute(
|
2022-10-27 10:16:20 +02:00
|
|
|
"UPDATE linked_revs SET considered=TRUE where linked_id=%s",
|
2022-10-24 20:21:53 +02:00
|
|
|
(rev.dbid,),
|
|
|
|
)
|
2022-10-26 15:49:14 +02:00
|
|
|
if not last_linked:
|
|
|
|
return
|
|
|
|
|
2022-11-03 22:04:45 +01:00
|
|
|
with self.db.cursor() as cur:
|
2022-10-26 15:49:14 +02:00
|
|
|
linked = last_linked
|
|
|
|
cur.execute(
|
|
|
|
"SELECT 1 FROM fake_revs where revision_id=%s AND linked_id=%s",
|
|
|
|
(rev.dbid, linked.dbid),
|
|
|
|
)
|
|
|
|
if cur.fetchone():
|
|
|
|
cur.execute(
|
2022-10-27 10:16:20 +02:00
|
|
|
"UPDATE linked_revs SET considered=TRUE where linked_id=%s",
|
2022-10-26 15:49:14 +02:00
|
|
|
(rev.dbid,),
|
2022-10-24 20:21:53 +02:00
|
|
|
)
|
2022-10-26 15:49:14 +02:00
|
|
|
return
|
|
|
|
fake_rev = linked.rev + rev.rev / 1000.0
|
2022-11-06 12:27:36 +01:00
|
|
|
comment = f"Updating link to change in {rev.project}/{rev.package} revision {int(rev.rev)}"
|
2022-10-26 15:49:14 +02:00
|
|
|
cur.execute(
|
2023-11-28 23:36:44 +01:00
|
|
|
"""INSERT INTO revisions (project,package,rev,unexpanded_srcmd5,
|
2022-11-06 12:27:36 +01:00
|
|
|
commit_time, userid, comment, api_url) VALUES(%s,%s,%s,%s,%s,%s,%s,%s) RETURNING id""",
|
2022-10-26 15:49:14 +02:00
|
|
|
(
|
|
|
|
linked.project,
|
|
|
|
linked.package,
|
|
|
|
fake_rev,
|
|
|
|
linked.unexpanded_srcmd5,
|
|
|
|
rev.commit_time,
|
|
|
|
"buildservice-autocommit",
|
|
|
|
comment,
|
2022-11-06 12:27:36 +01:00
|
|
|
linked.api_url,
|
2022-10-26 15:49:14 +02:00
|
|
|
),
|
|
|
|
)
|
|
|
|
new_id = cur.fetchone()[0]
|
|
|
|
cur.execute(
|
|
|
|
"""INSERT INTO linked_revs (revision_id, linked_id) VALUES (%s,%s)""",
|
|
|
|
(new_id, rev.dbid),
|
|
|
|
)
|
|
|
|
cur.execute(
|
|
|
|
"""INSERT INTO fake_revs (revision_id, linked_id) VALUES (%s,%s)""",
|
|
|
|
(rev.dbid, linked.dbid),
|
|
|
|
)
|
2022-10-24 20:21:53 +02:00
|
|
|
|
2023-11-28 23:36:44 +01:00
|
|
|
def revisions_without_files(self, package):
|
|
|
|
logging.debug(f"revisions_without_files({package})")
|
2022-11-03 22:04:45 +01:00
|
|
|
with self.db.cursor() as cur:
|
2022-10-26 21:47:39 +02:00
|
|
|
cur.execute(
|
2023-11-28 23:36:44 +01:00
|
|
|
"SELECT * FROM revisions WHERE package=%s AND broken=FALSE AND expanded_srcmd5 IS NULL",
|
2024-05-16 15:47:45 +02:00
|
|
|
(package,),
|
2022-10-26 21:47:39 +02:00
|
|
|
)
|
2022-11-03 22:04:45 +01:00
|
|
|
return [DBRevision(self.db, row) for row in cur.fetchall()]
|
2022-10-26 11:58:01 +02:00
|
|
|
|
2022-11-03 22:04:45 +01:00
|
|
|
def import_rev(self, rev):
|
|
|
|
with self.db.cursor() as cur:
|
|
|
|
cur.execute(
|
|
|
|
"""SELECT unexpanded_srcmd5 from revisions WHERE
|
|
|
|
id=(SELECT linked_id FROM linked_revs WHERE revision_id=%s)""",
|
|
|
|
(rev.dbid,),
|
|
|
|
)
|
|
|
|
linked_rev = cur.fetchone()
|
|
|
|
if linked_rev:
|
|
|
|
linked_rev = linked_rev[0]
|
2023-11-28 23:36:44 +01:00
|
|
|
obs_dir_list = self.obs.list(
|
2022-11-03 22:04:45 +01:00
|
|
|
rev.project, rev.package, rev.unexpanded_srcmd5, linked_rev
|
|
|
|
)
|
2023-11-28 23:36:44 +01:00
|
|
|
if obs_dir_list:
|
|
|
|
rev.import_dir_list(obs_dir_list)
|
2022-11-03 22:04:45 +01:00
|
|
|
md5 = rev.calculate_files_hash()
|
|
|
|
with self.db.cursor() as cur:
|
2022-10-27 07:33:46 +02:00
|
|
|
cur.execute(
|
2022-11-03 22:04:45 +01:00
|
|
|
"UPDATE revisions SET files_hash=%s WHERE id=%s",
|
|
|
|
(md5, rev.dbid),
|
2022-10-27 07:33:46 +02:00
|
|
|
)
|
2022-11-03 22:04:45 +01:00
|
|
|
else:
|
|
|
|
rev.set_broken()
|
|
|
|
|
|
|
|
def fill_file_lists(self):
|
|
|
|
self.find_linked_revs()
|
|
|
|
|
|
|
|
self.find_fake_revisions()
|
2023-11-28 23:36:44 +01:00
|
|
|
for package in self.packages:
|
|
|
|
for rev in self.revisions_without_files(package):
|
|
|
|
print(f"rev {rev} is without files")
|
|
|
|
self.import_rev(rev)
|
2022-11-03 22:04:45 +01:00
|
|
|
|
|
|
|
def refresh_package(self, project, package):
|
2022-11-02 08:50:54 +01:00
|
|
|
key = f"{project}/{package}"
|
|
|
|
if key in self.refreshed_packages:
|
|
|
|
# refreshing once is good enough
|
|
|
|
return
|
2022-12-02 11:00:31 +01:00
|
|
|
if self.package_gone(key):
|
|
|
|
return
|
2022-11-16 09:05:16 +01:00
|
|
|
logging.debug(f"Refresh {project}/{package}")
|
2022-11-02 08:50:54 +01:00
|
|
|
self.refreshed_packages.add(key)
|
2024-08-08 10:34:37 +02:00
|
|
|
if self.has_scmsync(project) or self.has_scmsync(key):
|
|
|
|
self.packages_with_scmsync.add(package)
|
|
|
|
logging.debug(f"{project}/{package} already in Git - skipping")
|
|
|
|
return
|
2022-11-03 22:04:45 +01:00
|
|
|
self.update_db_package(project, package)
|
|
|
|
self.fetch_all_linked_packages(project, package)
|
2022-11-02 08:50:54 +01:00
|
|
|
|
|
|
|
def import_into_db(self):
|
2023-11-28 23:36:44 +01:00
|
|
|
for package in self.packages:
|
|
|
|
refresh_package(self, self.project, package)
|
2022-11-02 08:50:54 +01:00
|
|
|
|
2023-11-28 23:36:44 +01:00
|
|
|
self.db.conn.commit()
|
2022-11-03 20:14:56 +01:00
|
|
|
|
2023-11-28 23:36:44 +01:00
|
|
|
for number in DBRevision.requests_to_fetch(self.db):
|
|
|
|
self.import_request(number)
|
2022-11-03 20:14:56 +01:00
|
|
|
|
2023-11-28 23:36:44 +01:00
|
|
|
self.db.conn.commit()
|
2022-11-03 22:04:45 +01:00
|
|
|
|
2023-11-28 23:36:44 +01:00
|
|
|
with self.db.cursor() as cur:
|
|
|
|
cur.execute(
|
|
|
|
"""SELECT DISTINCT source_project,source_package FROM requests
|
|
|
|
WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""",
|
|
|
|
(self.project, self.packages),
|
|
|
|
)
|
|
|
|
for project, package in cur.fetchall():
|
|
|
|
self.refresh_package(project, package)
|
2022-11-03 22:04:45 +01:00
|
|
|
|
|
|
|
self.db.conn.commit()
|
2022-11-02 08:50:54 +01:00
|
|
|
|
2022-11-03 22:04:45 +01:00
|
|
|
missing_users = User.missing_users(self.db)
|
2022-11-02 08:50:54 +01:00
|
|
|
for userid in missing_users:
|
|
|
|
missing_user = self.obs.user(userid)
|
|
|
|
if missing_user:
|
2022-11-03 22:04:45 +01:00
|
|
|
missing_user.import_into_db(self.db)
|
|
|
|
self.db.conn.commit()
|
2022-10-19 14:00:07 +02:00
|
|
|
|
2022-11-03 22:04:45 +01:00
|
|
|
self.fill_file_lists()
|
|
|
|
self.db.conn.commit()
|
2022-12-02 11:00:31 +01:00
|
|
|
|
|
|
|
def package_gone(self, key):
|
|
|
|
if not self.gone_packages_set:
|
|
|
|
self.gone_packages_set = set()
|
|
|
|
with open(pathlib.Path(__file__).parent.parent / "gone-packages.txt") as f:
|
|
|
|
for line in f.readlines():
|
|
|
|
self.gone_packages_set.add(line.strip())
|
|
|
|
return key in self.gone_packages_set
|
2024-08-08 10:34:37 +02:00
|
|
|
|
|
|
|
def has_scmsync(self, key):
|
|
|
|
if key in self.scmsync_cache:
|
|
|
|
return self.scmsync_cache[key]
|
|
|
|
|
|
|
|
root = self.obs._meta(key)
|
|
|
|
scmsync_exists = False
|
|
|
|
if root is not None:
|
|
|
|
scmsync_exists = root.find('scmsync') is not None
|
|
|
|
self.scmsync_cache[key] = scmsync_exists
|
|
|
|
return scmsync_exists
|
|
|
|
|
|
|
|
def package_with_scmsync(self, package):
|
|
|
|
return package in self.packages_with_scmsync
|
|
|
|
|