diff --git a/lib/db.py b/lib/db.py index 53d5d3a..7958c50 100644 --- a/lib/db.py +++ b/lib/db.py @@ -22,20 +22,19 @@ class DB: def schema_version(self): # create a cursor - cur = self.conn.cursor() + with self.conn.cursor() as cur: - # execute a statement - try: - cur.execute("SELECT MAX(version) from scheme") - except psycopg2.errors.UndefinedTable as error: - cur.close() - self.close() - self.connect() - return 0 + # execute a statement + try: + cur.execute("SELECT MAX(version) from scheme") + except psycopg2.errors.UndefinedTable as error: + cur.close() + self.close() + self.connect() + return 0 - db_version = cur.fetchone() + db_version = cur.fetchone() - cur.close() return db_version[0] def close(self): @@ -154,20 +153,29 @@ class DB: "create index request_number_idx2 on requests(number)", "UPDATE scheme SET version=12", ) + schemes[13] = ( + """ + CREATE TABLE users ( + id SERIAL PRIMARY KEY, + userid VARCHAR(255) NOT NULL, + email VARCHAR(255) NOT NULL, + realname VARCHAR(255) NOT NULL + ) + """, + "UPDATE scheme SET version=13", + ) schema_version = self.schema_version() if (schema_version + 1) not in schemes: return try: - cur = self.conn.cursor() - # create table one by one - for version, commands in schemes.items(): - if version <= schema_version: - continue - for command in commands: - cur.execute(command) - # close communication with the PostgreSQL database server - cur.close() + with self.conn.cursor() as cur: + # create table one by one + for version, commands in schemes.items(): + if version <= schema_version: + continue + for command in commands: + cur.execute(command) # commit the changes self.conn.commit() except (Exception, psycopg2.DatabaseError) as error: diff --git a/lib/db_revision.py b/lib/db_revision.py index b283652..dbe874d 100644 --- a/lib/db_revision.py +++ b/lib/db_revision.py @@ -23,122 +23,115 @@ class DBRevision: return f"[{self.__str__()}]" def links_to(self, db, project, package): - db.cursor().execute( - "INSERT INTO links (revision_id, project, package) VALUES (%s,%s,%s)", - (self.dbid, project, package), - ) + with db.cursor() as cur: + cur.execute( + "INSERT INTO links (revision_id, project, package) VALUES (%s,%s,%s)", + (self.dbid, project, package), + ) @classmethod def import_obs_rev(cls, db, revision): - cur = db.cursor() - cur.execute( - """INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, commit_time, userid, comment, request_number) - VALUES(%s, %s, %s, %s, %s, %s, %s, %s)""", - ( - revision.project, - revision.package, - revision.rev, - revision.unexpanded_srcmd5, - revision.time, - revision.userid, - revision.comment, - revision.request_number, - ), - ) - cur.close() + with db.cursor() as cur: + cur.execute( + """INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, commit_time, userid, comment, request_number) + VALUES(%s, %s, %s, %s, %s, %s, %s, %s)""", + ( + revision.project, + revision.package, + revision.rev, + revision.unexpanded_srcmd5, + revision.time, + revision.userid, + revision.comment, + revision.request_number, + ), + ) return cls.fetch_revision(db, revision.project, revision.package, revision.rev) - @classmethod - def fetch_revision(cls, db, project, package, rev): - cur = db.cursor() - cur.execute( - "SELECT * FROM revisions where project=%s and package=%s and rev=%s", - (project, package, str(rev)), - ) - row = cur.fetchone() - cur.close() + @staticmethod + def fetch_revision(db, project, package, rev): + with db.cursor() as cur: + cur.execute( + "SELECT * FROM revisions where project=%s and package=%s and rev=%s", + (project, package, str(rev)), + ) + row = cur.fetchone() return DBRevision(row) - @classmethod - def latest_revision(cls, db, project, package): - cur = db.cursor() - cur.execute( - "SELECT MAX(rev) FROM revisions where project=%s and package=%s", - (project, package), - ) - max = cur.fetchone()[0] - cur.close() + @staticmethod + def latest_revision(db, project, package): + with db.cursor() as cur: + cur.execute( + "SELECT MAX(rev) FROM revisions where project=%s and package=%s", + (project, package), + ) + max = cur.fetchone()[0] if max: return DBRevision.fetch_revision(db, project, package, int(max)) return None - @classmethod - def all_revisions(cls, db, project, package): - cur = db.cursor() - cur.execute( - "SELECT * FROM revisions where project=%s and package=%s", - (project, package), - ) - ret = [] - for row in cur.fetchall(): - ret.append(DBRevision(row)) - cur.close() + @staticmethod + def all_revisions(db, project, package): + with db.cursor() as cur: + cur.execute( + "SELECT * FROM revisions where project=%s and package=%s", + (project, package), + ) + ret = [] + for row in cur.fetchall(): + ret.append(DBRevision(row)) return ret def linked_rev(self, db): - cur = db.cursor() - cur.execute( - "SELECT project,package FROM links where revision_id=%s", (self.dbid,) - ) - row = cur.fetchone() - if not row: - cur.close() - return None - project, package = row - cur.execute( - "SELECT * FROM revisions where project=%s and package=%s and commit_time <= %s ORDER BY commit_time DESC LIMIT 1", - (project, package, self.commit_time), - ) - revisions = [DBRevision(row) for row in cur.fetchall()] - cur.close() + with db.cursor() as cur: + cur.execute( + "SELECT project,package FROM links where revision_id=%s", (self.dbid,) + ) + row = cur.fetchone() + if not row: + return None + project, package = row + cur.execute( + "SELECT * FROM revisions where project=%s and package=%s and commit_time <= %s ORDER BY commit_time DESC LIMIT 1", + (project, package, self.commit_time), + ) + revisions = [DBRevision(row) for row in cur.fetchall()] if revisions: return revisions[0] return None def set_broken(self, db): - cur = db.cursor() - cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,)) - cur.close() + with db.cursor() as cur: + cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,)) + def import_dir_list(self, db, xml): - cur = db.cursor() - cur.execute( - "UPDATE revisions SET expanded_srcmd5=%s where id=%s", - (xml.get("srcmd5"), self.dbid), - ) - for entry in xml.findall("entry"): + with db.cursor() as cur: cur.execute( - """INSERT INTO files (name, md5, size, mtime, revision_id) - VALUES (%s,%s,%s,%s,%s)""", - ( - entry.get("name"), - entry.get("md5"), - entry.get("size"), - entry.get("mtime"), - self.dbid, - ), + "UPDATE revisions SET expanded_srcmd5=%s where id=%s", + (xml.get("srcmd5"), self.dbid), ) - cur.close() + for entry in xml.findall("entry"): + cur.execute( + """INSERT INTO files (name, md5, size, mtime, revision_id) + VALUES (%s,%s,%s,%s,%s)""", + ( + entry.get("name"), + entry.get("md5"), + entry.get("size"), + entry.get("mtime"), + self.dbid, + ), + ) - @classmethod - def requests_to_fetch(self, db, project, package): - cur = db.cursor() - cur.execute( - """SELECT request_number FROM revisions revs left join requests - reqs on reqs.number=revs.request_number where reqs.id is null and - revs.request_number is not null and project=%s and package=%s;""", - (project, package), - ) - ret = [row[0] for row in cur.fetchall()] - cur.close() + @staticmethod + def requests_to_fetch(db, project, package): + with db.cursor() as cur: + cur.execute( + """SELECT request_number FROM revisions revs left join requests + reqs on reqs.number=revs.request_number where reqs.id is null and + revs.request_number is not null and project=%s and package=%s;""", + (project, package), + ) + ret = [row[0] for row in cur.fetchall()] return ret diff --git a/lib/importer.py b/lib/importer.py index e38fa1a..34c5f10 100644 --- a/lib/importer.py +++ b/lib/importer.py @@ -10,6 +10,8 @@ from lib.obs import OBS from lib.obs_revision import OBSRevision from lib.proxy_sha256 import ProxySHA256, md5, sha256 from lib.request import Request +import xml.etree.ElementTree as ET +from lib.user import User def _files_hash(hash_alg, dirpath): @@ -149,14 +151,20 @@ class Importer: for project, _, api_url in self.projects: self.obs.change_url(api_url) self.update_db_package(db, project, self.package) - cur = db.cursor() - cur.execute( - "SELECT DISTINCT l.project, l.package from links l join revisions r on r.id=l.revision_id WHERE r.project=%s AND r.package=%s", - (project, self.package), - ) - for row in cur.fetchall(): - (lproject, lpackage) = row - self.update_db_package(db, lproject, lpackage) + with db.cursor() as cur: + cur.execute( + "SELECT DISTINCT l.project, l.package from links l join revisions r on r.id=l.revision_id WHERE r.project=%s AND r.package=%s", + (project, self.package), + ) + for row in cur.fetchall(): + (lproject, lpackage) = row + self.update_db_package(db, lproject, lpackage) + + missing_users = User.missing_users(db) + for userid in missing_users: + missing_user = self.obs.user(userid) + if missing_user: + missing_user.import_into_db(db) for rev in DBRevision.all_revisions(db, project, self.package): # TODO move into SELECT @@ -173,10 +181,9 @@ class Importer: else: rev.set_broken(db) - for number in DBRevision.requests_to_fetch(db, project, self.package): - self.obs.request(number).import_into_db(db) + for number in DBRevision.requests_to_fetch(db, project, self.package): + self.obs.request(number).import_into_db(db) - cur.close() db.conn.commit() def import_all_revisions(self, gc): diff --git a/lib/obs.py b/lib/obs.py index 77c7c7a..51e72e3 100644 --- a/lib/obs.py +++ b/lib/obs.py @@ -8,6 +8,7 @@ from urllib.error import HTTPError import osc.core from lib.request import Request +from lib.user import User # Add a retry wrapper for some of the HTTP actions. @@ -84,6 +85,14 @@ class OBS: return None return root + def _user(self, userid, **params): + try: + root = self._xml(f"/person/{userid}", **params) + except HTTPError: + logging.error(f"UserID {userid} not found") + return None + return root + def _link(self, project, package, rev): try: root = self._xml(f"source/{project}/{package}/_link", rev=rev) @@ -122,6 +131,11 @@ class OBS: if root is not None: return Request().parse(root) + def user(self, userid): + root = self._user(userid) + if root is not None: + return User().parse(root, userid) + def files(self, project, package, revision): root = self._xml(f"source/{project}/{package}", rev=revision, expand=1) return [ diff --git a/lib/request.py b/lib/request.py index 33c62c1..f5ee787 100644 --- a/lib/request.py +++ b/lib/request.py @@ -34,24 +34,23 @@ class Request: return f"[{self.__str__()}]" def import_into_db(self, db): - cur = db.cursor() - cur.execute( - """INSERT INTO requests (number, creator, type, state, - source_project, source_package, source_rev) - VALUES (%s,%s,%s,%s,%s,%s,%s) RETURNING id""", - ( - self.number, - self.creator, - self.type_, - self.state, - self.source_project, - self.source_package, - self.source_rev, - ), - ) - rowid = cur.fetchone()[0] - cur.execute( - "UPDATE revisions SET request_id=%s WHERE request_number=%s", - (rowid, self.number), - ) - cur.close() + with db.cursor() as cur: + cur.execute( + """INSERT INTO requests (number, creator, type, state, + source_project, source_package, source_rev) + VALUES (%s,%s,%s,%s,%s,%s,%s) RETURNING id""", + ( + self.number, + self.creator, + self.type_, + self.state, + self.source_project, + self.source_package, + self.source_rev, + ), + ) + rowid = cur.fetchone()[0] + cur.execute( + "UPDATE revisions SET request_id=%s WHERE request_number=%s", + (rowid, self.number), + ) diff --git a/lib/user.py b/lib/user.py new file mode 100644 index 0000000..68fe415 --- /dev/null +++ b/lib/user.py @@ -0,0 +1,50 @@ +FAKE_ACCOUNTS = ('unknown', 'buildservice-autocommit', 'autobuild', '_service') + + +class User: + def parse(self, xml, userid): + self.userid = userid + self.realname = xml.find("realname").text + self.email = xml.find("email").text + if self.email is None: + self.email = "" + + return self + + def __str__(self): + return f"User {self.userid}: {self.realname} {self.email}" + + def __repr__(self): + return f"[{self.__str__()}]" + + def import_into_db(self, db): + with db.cursor() as cur: + cur.execute( + """INSERT INTO users (userid, realname, email) + VALUES (%s,%s,%s)""", + ( + self.userid, + self.realname, + self.email, + ), + ) + + @staticmethod + def lookup(db, userid): + with db.cursor() as cur: + cur.execute("SELECT * FROM users where userid=%s", (userid,)) + row = cur.fetchone() + if not row: + return None + return row + + @staticmethod + def missing_users(db): + with db.cursor() as cur: + cur.execute( + """SELECT DISTINCT revisions.userid + FROM revisions LEFT JOIN users ON revisions.userid = users.userid + WHERE users.userid IS NULL AND revisions.userid NOT IN {}""".format(FAKE_ACCOUNTS) + ) + missing_users = [row[0] for row in cur.fetchall()] + return missing_users