git-importer/lib/db_revision.py

340 lines
12 KiB
Python
Raw Permalink Normal View History

from __future__ import annotations
from hashlib import md5
2022-11-02 20:55:09 +01:00
from pathlib import Path
from lib.db import DB
from lib.obs_revision import OBSRevision
from lib.request import Request
2022-10-18 12:17:43 +02:00
class DBRevision:
def __init__(self, db: DB, row: tuple):
2022-10-18 13:13:52 +02:00
# need to stay in sync with the schema creation in db.py
(
self.dbid,
self.project,
self.package,
self.rev,
self.unexpanded_srcmd5,
self.commit_time,
self.userid,
self.comment,
2022-10-18 19:29:25 +02:00
self.broken,
self.expanded_srcmd5,
self.request_number,
self.request_id,
self.files_hash,
self.api_url,
2022-10-18 13:13:52 +02:00
) = row
self.rev = float(self.rev)
self._files = None
self.db = db
self.git_commit = None
2022-10-18 13:13:52 +02:00
def short_string(self):
return f"{self.project}/{self.package}/{self.rev}"
2022-10-18 15:40:11 +02:00
def __str__(self):
return f"Rev {self.project}/{self.package}/{self.rev} Md5 {self.unexpanded_srcmd5} {self.commit_time} {self.userid} {self.request_number}"
2022-10-18 15:40:11 +02:00
def __repr__(self):
return f"[{self.__str__()}]"
def __eq__(self, other):
return self.dbid == other.dbid
def __lt__(self, other):
if self.project != other.project:
return self.project < other.project
if self.package != other.package:
return self.package < other.package
return self.rev < other.rev
2022-11-06 11:46:04 +01:00
def request_accept_message(self):
request = Request.find(self.db, self.request_id)
msg = f"Accepting request {request.number} from {request.source_project}\n\n"
msg += self.comment.strip()
url = self.api_url.replace("api.", "build.")
msg += f"\n\nOBS-URL: {url}/request/show/{self.request_number}"
return msg
def git_commit_message(self):
2022-11-06 11:46:04 +01:00
msg = ""
if self.request_id:
msg = self.request_accept_message()
else:
msg = self.comment.strip() + "\n"
url = self.api_url.replace("api.", "build.")
if self.rev == int(self.rev):
# do not link to fake revisions
msg += f"\nOBS-URL: {url}/package/show/{self.project}/{self.package}?expand=0&rev={int(self.rev)}"
else:
msg += f"\nOBS-URL: {url}/package/show/{self.project}/{self.package}?expand=0&rev={self.expanded_srcmd5}"
return msg
def as_dict(self):
"""Return a dict we can put into YAML for test cases"""
ret = {
"project": self.project,
"package": self.package,
"rev": self.rev,
"unexpanded_srcmd5": self.unexpanded_srcmd5,
"commit_time": self.commit_time,
"userid": self.userid,
"comment": self.comment,
"broken": self.broken,
"expanded_srcmd5": self.expanded_srcmd5,
"api_url": self.api_url,
"files_hash": self.files_hash,
"files": self.files_list(),
}
if self.request_id:
ret["request"] = Request.find(self.db, self.request_id).as_dict()
return ret
def links_to(self, project: str, package: str) -> None:
with self.db.cursor() as cur:
2022-10-26 11:58:01 +02:00
cur.execute(
"INSERT INTO links (revision_id, project, package) VALUES (%s,%s,%s)",
(self.dbid, project, package),
)
@staticmethod
def import_obs_rev(db: DB, revision: OBSRevision):
2022-10-26 11:58:01 +02:00
with db.cursor() as cur:
cur.execute(
"""INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, commit_time, userid, comment, request_number, api_url)
VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s)""",
2022-10-26 11:58:01 +02:00
(
revision.project,
revision.package,
revision.rev,
revision.unexpanded_srcmd5,
revision.time,
revision.userid,
revision.comment,
revision.request_number,
revision.obs.url,
2022-10-26 11:58:01 +02:00
),
)
return DBRevision.fetch_revision(
db, revision.project, revision.package, revision.rev
)
2022-10-18 12:17:43 +02:00
2022-10-26 11:58:01 +02:00
@staticmethod
def fetch_revision(db, project, package, rev):
"""Technically we would need the api_url as well, but we assume projects are unique
(e.g. not importing SLE from obs)"""
2022-10-26 11:58:01 +02:00
with db.cursor() as cur:
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s and rev=%s",
(project, package, str(rev)),
)
row = cur.fetchone()
if row:
return DBRevision(db, row)
2022-10-18 12:17:43 +02:00
2022-10-26 11:58:01 +02:00
@staticmethod
def max_rev(db, project, package):
2022-10-26 11:58:01 +02:00
with db.cursor() as cur:
cur.execute(
"SELECT MAX(rev) FROM revisions where project=%s and package=%s",
(project, package),
)
return cur.fetchone()[0]
return None
@staticmethod
def latest_revision(db, project, package):
max = DBRevision.max_rev(db, project, package)
2022-10-18 12:17:43 +02:00
if max:
return DBRevision.fetch_revision(db, project, package, max)
2022-10-18 12:17:43 +02:00
return None
2022-10-18 15:40:11 +02:00
2022-10-26 11:58:01 +02:00
@staticmethod
def all_revisions(db, project, package):
with db.cursor() as cur:
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s",
(project, package),
)
ret = []
for row in cur.fetchall():
ret.append(DBRevision(db, row))
2022-10-18 15:40:11 +02:00
return ret
def linked_rev(self):
if self.broken:
return None
with self.db.cursor() as cur:
2022-10-26 11:58:01 +02:00
cur.execute(
"SELECT project,package FROM links where revision_id=%s", (self.dbid,)
)
row = cur.fetchone()
if not row:
return None
project, package = row
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s and commit_time <= %s ORDER BY commit_time DESC LIMIT 1",
(project, package, self.commit_time),
)
revisions = [DBRevision(self.db, row) for row in cur.fetchall()]
2022-10-18 15:40:11 +02:00
if revisions:
return revisions[0]
else:
self.set_broken()
2022-10-18 15:40:11 +02:00
return None
2022-10-18 19:29:25 +02:00
def set_broken(self):
with self.db.cursor() as cur:
2022-10-26 11:58:01 +02:00
cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,))
def import_dir_list(self, xml):
with self.db.cursor() as cur:
2022-10-18 19:29:25 +02:00
cur.execute(
2022-10-26 11:58:01 +02:00
"UPDATE revisions SET expanded_srcmd5=%s where id=%s",
(xml.get("srcmd5"), self.dbid),
2022-10-18 19:29:25 +02:00
)
2022-10-26 11:58:01 +02:00
for entry in xml.findall("entry"):
# this file creates easily 100k commits and is just useless data :(
# unfortunately it's stored in the same meta package as the project config
if (
entry.get("name") == "_staging_workflow"
and self.package == "_project"
):
continue
2022-10-26 11:58:01 +02:00
cur.execute(
2024-05-16 15:47:45 +02:00
"""INSERT INTO files (name, md5, size, mtime, revision_id)
2022-10-26 11:58:01 +02:00
VALUES (%s,%s,%s,%s,%s)""",
(
entry.get("name"),
entry.get("md5"),
entry.get("size"),
entry.get("mtime"),
self.dbid,
),
)
def previous_commit(self):
return DBRevision.fetch_revision(
self.db, self.project, self.package, int(self.rev) - 1
)
def next_commit(self):
return DBRevision.fetch_revision(
self.db, self.project, self.package, int(self.rev) + 1
)
def calculate_files_hash(self):
m = md5()
for file_dict in self.files_list():
m.update(
(
file_dict["name"]
+ "/"
+ file_dict["md5"]
+ "/"
+ str(file_dict["size"])
).encode("utf-8")
)
return m.hexdigest()
def files_list(self):
if self._files:
return self._files
with self.db.cursor() as cur:
2022-10-26 15:49:14 +02:00
cur.execute("SELECT * from files where revision_id=%s", (self.dbid,))
self._files = []
for row in cur.fetchall():
(_, _, name, md5, size, mtime) = row
self._files.append(
{"md5": md5, "size": size, "mtime": mtime, "name": name}
)
self._files.sort(key=lambda x: x["name"])
return self._files
2024-05-16 11:18:42 +02:00
def calc_delta(self, current_rev: DBRevision | None):
"""Calculate the list of files to download and to delete.
Param current_rev is the revision that's currently checked out.
If it's None, the repository is empty.
"""
to_download = []
if current_rev:
old_files = {
e["name"]: f"{e['md5']}-{e['size']}" for e in current_rev.files_list()
}
else:
old_files = dict()
for entry in self.files_list():
if old_files.get(entry["name"]) != f"{entry['md5']}-{entry['size']}":
2022-11-02 20:55:09 +01:00
to_download.append((Path(entry["name"]), entry["size"], entry["md5"]))
old_files.pop(entry["name"], None)
to_delete = [Path(e) for e in old_files.keys()]
return to_download, to_delete
2022-10-26 11:58:01 +02:00
@staticmethod
def requests_to_fetch(db):
2022-10-26 11:58:01 +02:00
with db.cursor() as cur:
cur.execute(
"""SELECT request_number FROM revisions revs LEFT JOIN requests
reqs ON reqs.number=revs.request_number WHERE reqs.id is null AND
revs.request_number IS NOT NULL""",
2022-10-26 11:58:01 +02:00
)
return [row[0] for row in cur.fetchall()]
@staticmethod
def import_fixture_dict(db, rev_dict):
"""Used in test cases to read a revision from fixtures into the test database"""
with db.cursor() as cur:
cur.execute(
"""INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, expanded_srcmd5,
commit_time, userid, comment, broken, files_hash, api_url)
VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING id""",
(
rev_dict["project"],
rev_dict["package"],
rev_dict["rev"],
rev_dict["unexpanded_srcmd5"],
rev_dict["expanded_srcmd5"],
rev_dict["commit_time"],
rev_dict["userid"],
rev_dict["comment"],
rev_dict["broken"],
rev_dict["files_hash"],
rev_dict.get("api_url", "https://api.opensuse.org"),
),
)
rev_id = cur.fetchone()[0]
for file_dict in rev_dict["files"]:
cur.execute(
"INSERT INTO files (md5, mtime, name, size, revision_id) VALUES(%s, %s, %s, %s, %s)",
(
file_dict["md5"],
file_dict["mtime"],
file_dict["name"],
file_dict["size"],
rev_id,
),
)
request = rev_dict.get("request")
if request:
cur.execute(
"""INSERT INTO requests (creator, number, source_project, source_package,
source_rev, state, type) VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING id""",
(
request["creator"],
request["number"],
request.get("source_project"),
request.get("source_package"),
request.get("source_rev"),
request["state"],
request["type"],
),
)
request_id = cur.fetchone()[0]
cur.execute(
"UPDATE revisions SET request_id=%s, request_number=%s WHERE id=%s",
(request_id, request["number"], rev_id),
)