git-importer/lib/importer.py
Stephan Kulow ab38332642 Allow to import multiple packages in one go
This way we avoid duplicating all startup and SQL queries
2022-11-03 20:14:56 +01:00

221 lines
8.2 KiB
Python

import logging
import xml.etree.ElementTree as ET
from lib.db import DB
from lib.db_revision import DBRevision
from lib.obs import OBS
from lib.obs_revision import OBSRevision
from lib.user import User
class Importer:
def __init__(self, api_url, project, packages):
# Import multiple Factory packages into the database
self.packages = packages
self.project = project
self.obs = OBS()
assert project == "openSUSE:Factory"
self.obs.change_url(api_url)
self.refreshed_packages = set()
def update_db_package(self, db, project, package):
root = self.obs._history(project, package)
if root is None:
return
latest = DBRevision.latest_revision(db, project, package)
for r in root.findall("revision"):
rev = OBSRevision(self.obs, project, package).parse(r)
if not latest or rev.rev > latest.rev:
dbrev = DBRevision.import_obs_rev(db, rev)
try:
root = rev.read_link()
except ET.ParseError:
dbrev.set_broken()
continue
if root is not None:
tprj = root.get("project") or project
tpkg = root.get("package") or package
dbrev.links_to(tprj, tpkg)
def find_linked_revs(self, db):
with db.cursor() as cur:
cur.execute(
"""SELECT * from revisions WHERE id in (SELECT l.revision_id FROM links l
LEFT JOIN linked_revs lrevs ON lrevs.revision_id=l.revision_id
WHERE lrevs.id IS NULL) and broken is FALSE;"""
)
for row in cur.fetchall():
rev = DBRevision(db, row)
linked_rev = rev.linked_rev()
if not linked_rev:
logging.debug(f"No link {rev}")
continue
cur.execute(
"""INSERT INTO linked_revs (revision_id, linked_id)
VALUES (%s,%s)""",
(rev.dbid, linked_rev.dbid),
)
def fetch_all_linked_packages(self, db, project, package):
with db.cursor() as cur:
cur.execute(
"""SELECT DISTINCT l.project, l.package from links l JOIN revisions r
on r.id=l.revision_id WHERE r.project=%s AND r.package=%s""",
(project, package),
)
for row in cur.fetchall():
(lproject, lpackage) = row
# recurse
self.refresh_package(db, lproject, lpackage)
def find_fake_revisions(self, db):
with db.cursor() as cur:
cur.execute(
"SELECT * from revisions WHERE id in (SELECT linked_id from linked_revs WHERE considered=FALSE)"
)
for row in cur.fetchall():
self._find_fake_revision(db, DBRevision(db, row))
def _find_fake_revision(self, db, rev):
prev = rev.previous_commit()
if not prev:
with db.cursor() as cur:
cur.execute(
"UPDATE linked_revs SET considered=TRUE where linked_id=%s",
(rev.dbid,),
)
return
with db.cursor() as cur:
cur.execute(
"""SELECT * FROM revisions WHERE id IN
(SELECT revision_id from linked_revs WHERE linked_id=%s)
AND commit_time <= %s ORDER BY commit_time""",
(prev.dbid, rev.commit_time),
)
last_linked = None
for linked in cur.fetchall():
linked = DBRevision(db, linked)
nextrev = linked.next_commit()
if nextrev and nextrev.commit_time < rev.commit_time:
continue
last_linked = linked
cur.execute(
"UPDATE linked_revs SET considered=TRUE where linked_id=%s",
(rev.dbid,),
)
if not last_linked:
return
with db.cursor() as cur:
linked = last_linked
cur.execute(
"SELECT 1 FROM fake_revs where revision_id=%s AND linked_id=%s",
(rev.dbid, linked.dbid),
)
if cur.fetchone():
cur.execute(
"UPDATE linked_revs SET considered=TRUE where linked_id=%s",
(rev.dbid,),
)
return
fake_rev = linked.rev + rev.rev / 1000.0
comment = f"Updating link to change in {rev.project}/{rev.package} revision {rev.rev}"
cur.execute(
"""INSERT INTO revisions (project,package,rev,unexpanded_srcmd5,
commit_time, userid, comment) VALUES(%s,%s,%s,%s,%s,%s,%s) RETURNING id""",
(
linked.project,
linked.package,
fake_rev,
linked.unexpanded_srcmd5,
rev.commit_time,
"buildservice-autocommit",
comment,
),
)
new_id = cur.fetchone()[0]
cur.execute(
"""INSERT INTO linked_revs (revision_id, linked_id) VALUES (%s,%s)""",
(new_id, rev.dbid),
)
cur.execute(
"""INSERT INTO fake_revs (revision_id, linked_id) VALUES (%s,%s)""",
(rev.dbid, linked.dbid),
)
def revisions_without_files(self, db):
with db.cursor() as cur:
cur.execute(
"SELECT * FROM revisions WHERE broken=FALSE AND expanded_srcmd5 IS NULL"
)
return [DBRevision(db, row) for row in cur.fetchall()]
def fill_file_lists(self, db):
self.find_linked_revs(db)
self.find_fake_revisions(db)
for rev in self.revisions_without_files(db):
with db.cursor() as cur:
cur.execute(
"""SELECT unexpanded_srcmd5 from revisions WHERE
id=(SELECT linked_id FROM linked_revs WHERE revision_id=%s)""",
(rev.dbid,),
)
linked_rev = cur.fetchone()
if linked_rev:
linked_rev = linked_rev[0]
list = self.obs.list(
rev.project, rev.package, rev.unexpanded_srcmd5, linked_rev
)
if list:
rev.import_dir_list(list)
md5 = rev.calculate_files_hash()
with db.cursor() as cur:
cur.execute(
"UPDATE revisions SET files_hash=%s WHERE id=%s",
(md5, rev.dbid),
)
else:
rev.set_broken()
def refresh_package(self, db, project, package):
key = f"{project}/{package}"
if key in self.refreshed_packages:
# refreshing once is good enough
return
self.refreshed_packages.add(key)
self.update_db_package(db, project, package)
self.fetch_all_linked_packages(db, project, package)
def import_into_db(self):
db = DB()
for package in self.packages:
self.refresh_package(db, self.project, package)
db.conn.commit()
for number in DBRevision.requests_to_fetch(db):
self.obs.request(number).import_into_db(db)
db.conn.commit()
with db.cursor() as cur:
cur.execute(
"""SELECT DISTINCT source_project,source_package FROM requests
WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""",
(self.project, self.packages),
)
for project, package in cur.fetchall():
self.refresh_package(db, project, package)
db.conn.commit()
missing_users = User.missing_users(db)
for userid in missing_users:
missing_user = self.obs.user(userid)
if missing_user:
missing_user.import_into_db(db)
db.conn.commit()
self.fill_file_lists(db)
db.conn.commit()