diff --git a/lib/db.py b/lib/db.py index 097e675..7958c50 100644 --- a/lib/db.py +++ b/lib/db.py @@ -22,20 +22,19 @@ class DB: def schema_version(self): # create a cursor - cur = self.conn.cursor() + with self.conn.cursor() as cur: - # execute a statement - try: - cur.execute("SELECT MAX(version) from scheme") - except psycopg2.errors.UndefinedTable as error: - cur.close() - self.close() - self.connect() - return 0 + # execute a statement + try: + cur.execute("SELECT MAX(version) from scheme") + except psycopg2.errors.UndefinedTable as error: + cur.close() + self.close() + self.connect() + return 0 - db_version = cur.fetchone() + db_version = cur.fetchone() - cur.close() return db_version[0] def close(self): @@ -170,15 +169,13 @@ class DB: if (schema_version + 1) not in schemes: return try: - cur = self.conn.cursor() - # create table one by one - for version, commands in schemes.items(): - if version <= schema_version: - continue - for command in commands: - cur.execute(command) - # close communication with the PostgreSQL database server - cur.close() + with self.conn.cursor() as cur: + # create table one by one + for version, commands in schemes.items(): + if version <= schema_version: + continue + for command in commands: + cur.execute(command) # commit the changes self.conn.commit() except (Exception, psycopg2.DatabaseError) as error: diff --git a/lib/db_revision.py b/lib/db_revision.py index b283652..dbe874d 100644 --- a/lib/db_revision.py +++ b/lib/db_revision.py @@ -23,122 +23,115 @@ class DBRevision: return f"[{self.__str__()}]" def links_to(self, db, project, package): - db.cursor().execute( - "INSERT INTO links (revision_id, project, package) VALUES (%s,%s,%s)", - (self.dbid, project, package), - ) + with db.cursor() as cur: + cur.execute( + "INSERT INTO links (revision_id, project, package) VALUES (%s,%s,%s)", + (self.dbid, project, package), + ) @classmethod def import_obs_rev(cls, db, revision): - cur = db.cursor() - cur.execute( - """INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, commit_time, userid, comment, request_number) - VALUES(%s, %s, %s, %s, %s, %s, %s, %s)""", - ( - revision.project, - revision.package, - revision.rev, - revision.unexpanded_srcmd5, - revision.time, - revision.userid, - revision.comment, - revision.request_number, - ), - ) - cur.close() + with db.cursor() as cur: + cur.execute( + """INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, commit_time, userid, comment, request_number) + VALUES(%s, %s, %s, %s, %s, %s, %s, %s)""", + ( + revision.project, + revision.package, + revision.rev, + revision.unexpanded_srcmd5, + revision.time, + revision.userid, + revision.comment, + revision.request_number, + ), + ) return cls.fetch_revision(db, revision.project, revision.package, revision.rev) - @classmethod - def fetch_revision(cls, db, project, package, rev): - cur = db.cursor() - cur.execute( - "SELECT * FROM revisions where project=%s and package=%s and rev=%s", - (project, package, str(rev)), - ) - row = cur.fetchone() - cur.close() + @staticmethod + def fetch_revision(db, project, package, rev): + with db.cursor() as cur: + cur.execute( + "SELECT * FROM revisions where project=%s and package=%s and rev=%s", + (project, package, str(rev)), + ) + row = cur.fetchone() return DBRevision(row) - @classmethod - def latest_revision(cls, db, project, package): - cur = db.cursor() - cur.execute( - "SELECT MAX(rev) FROM revisions where project=%s and package=%s", - (project, package), - ) - max = cur.fetchone()[0] - cur.close() + @staticmethod + def latest_revision(db, project, package): + with db.cursor() as cur: + cur.execute( + "SELECT MAX(rev) FROM revisions where project=%s and package=%s", + (project, package), + ) + max = cur.fetchone()[0] if max: return DBRevision.fetch_revision(db, project, package, int(max)) return None - @classmethod - def all_revisions(cls, db, project, package): - cur = db.cursor() - cur.execute( - "SELECT * FROM revisions where project=%s and package=%s", - (project, package), - ) - ret = [] - for row in cur.fetchall(): - ret.append(DBRevision(row)) - cur.close() + @staticmethod + def all_revisions(db, project, package): + with db.cursor() as cur: + cur.execute( + "SELECT * FROM revisions where project=%s and package=%s", + (project, package), + ) + ret = [] + for row in cur.fetchall(): + ret.append(DBRevision(row)) return ret def linked_rev(self, db): - cur = db.cursor() - cur.execute( - "SELECT project,package FROM links where revision_id=%s", (self.dbid,) - ) - row = cur.fetchone() - if not row: - cur.close() - return None - project, package = row - cur.execute( - "SELECT * FROM revisions where project=%s and package=%s and commit_time <= %s ORDER BY commit_time DESC LIMIT 1", - (project, package, self.commit_time), - ) - revisions = [DBRevision(row) for row in cur.fetchall()] - cur.close() + with db.cursor() as cur: + cur.execute( + "SELECT project,package FROM links where revision_id=%s", (self.dbid,) + ) + row = cur.fetchone() + if not row: + return None + project, package = row + cur.execute( + "SELECT * FROM revisions where project=%s and package=%s and commit_time <= %s ORDER BY commit_time DESC LIMIT 1", + (project, package, self.commit_time), + ) + revisions = [DBRevision(row) for row in cur.fetchall()] if revisions: return revisions[0] return None def set_broken(self, db): - cur = db.cursor() - cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,)) - cur.close() + with db.cursor() as cur: + cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,)) + def import_dir_list(self, db, xml): - cur = db.cursor() - cur.execute( - "UPDATE revisions SET expanded_srcmd5=%s where id=%s", - (xml.get("srcmd5"), self.dbid), - ) - for entry in xml.findall("entry"): + with db.cursor() as cur: cur.execute( - """INSERT INTO files (name, md5, size, mtime, revision_id) - VALUES (%s,%s,%s,%s,%s)""", - ( - entry.get("name"), - entry.get("md5"), - entry.get("size"), - entry.get("mtime"), - self.dbid, - ), + "UPDATE revisions SET expanded_srcmd5=%s where id=%s", + (xml.get("srcmd5"), self.dbid), ) - cur.close() + for entry in xml.findall("entry"): + cur.execute( + """INSERT INTO files (name, md5, size, mtime, revision_id) + VALUES (%s,%s,%s,%s,%s)""", + ( + entry.get("name"), + entry.get("md5"), + entry.get("size"), + entry.get("mtime"), + self.dbid, + ), + ) - @classmethod - def requests_to_fetch(self, db, project, package): - cur = db.cursor() - cur.execute( - """SELECT request_number FROM revisions revs left join requests - reqs on reqs.number=revs.request_number where reqs.id is null and - revs.request_number is not null and project=%s and package=%s;""", - (project, package), - ) - ret = [row[0] for row in cur.fetchall()] - cur.close() + @staticmethod + def requests_to_fetch(db, project, package): + with db.cursor() as cur: + cur.execute( + """SELECT request_number FROM revisions revs left join requests + reqs on reqs.number=revs.request_number where reqs.id is null and + revs.request_number is not null and project=%s and package=%s;""", + (project, package), + ) + ret = [row[0] for row in cur.fetchall()] return ret diff --git a/lib/importer.py b/lib/importer.py index 7a5a17c..67eb34d 100644 --- a/lib/importer.py +++ b/lib/importer.py @@ -11,7 +11,7 @@ from lib.obs_revision import OBSRevision from lib.proxy_sha256 import ProxySHA256, md5, sha256 from lib.request import Request import xml.etree.ElementTree as ET -from lib.users import User +from lib.user import User def _files_hash(hash_alg, dirpath): @@ -151,47 +151,39 @@ class Importer: for project, _, api_url in self.projects: self.obs.change_url(api_url) self.update_db_package(db, project, self.package) - cur = db.cursor() - cur.execute( - "SELECT DISTINCT l.project, l.package from links l join revisions r on r.id=l.revision_id WHERE r.project=%s AND r.package=%s", - (project, self.package), - ) - for row in cur.fetchall(): - (lproject, lpackage) = row - self.update_db_package(db, lproject, lpackage) - - missing_users = User.get_batch(db) - print(missing_users) - for userid in missing_users: - missing_user = self.obs.user(userid) - if missing_user is not None: - missing_user.import_into_db(db) - else: - logging.info("No missing users") - - for rev in DBRevision.all_revisions(db, project, self.package): - # TODO move into SELECT - if rev.broken or rev.expanded_srcmd5: - continue - linked_rev = rev.linked_rev(db) - if linked_rev: - linked_rev = linked_rev.unexpanded_srcmd5 - list = self.obs.list( - project, self.package, rev.unexpanded_srcmd5, linked_rev + with db.cursor() as cur: + cur.execute( + "SELECT DISTINCT l.project, l.package from links l join revisions r on r.id=l.revision_id WHERE r.project=%s AND r.package=%s", + (project, self.package), ) - if list: - rev.import_dir_list(db, list) - else: - rev.set_broken(db) + for row in cur.fetchall(): + (lproject, lpackage) = row + self.update_db_package(db, lproject, lpackage) - # fake_accounts = ["unknown","buildservice-autocommit", "autobuild", "_service"] - # if User.user_lookup(db, rev.userid) is None and rev.userid not in fake_accounts: - # self.obs.users(rev.userid).import_into_db(db) + missing_users = User.missing_users(db) + for userid in missing_users: + missing_user = self.obs.user(userid) + if missing_user: + missing_user.import_into_db(db) - for number in DBRevision.requests_to_fetch(db, project, self.package): - self.obs.request(number).import_into_db(db) + for rev in DBRevision.all_revisions(db, project, self.package): + # TODO move into SELECT + if rev.broken or rev.expanded_srcmd5: + continue + linked_rev = rev.linked_rev(db) + if linked_rev: + linked_rev = linked_rev.unexpanded_srcmd5 + list = self.obs.list( + project, self.package, rev.unexpanded_srcmd5, linked_rev + ) + if list: + rev.import_dir_list(db, list) + else: + rev.set_broken(db) + + for number in DBRevision.requests_to_fetch(db, project, self.package): + self.obs.request(number).import_into_db(db) - cur.close() db.conn.commit() def import_all_revisions(self, gc): diff --git a/lib/obs.py b/lib/obs.py index 661defe..51e72e3 100644 --- a/lib/obs.py +++ b/lib/obs.py @@ -8,7 +8,7 @@ from urllib.error import HTTPError import osc.core from lib.request import Request -from lib.users import User +from lib.user import User # Add a retry wrapper for some of the HTTP actions. diff --git a/lib/request.py b/lib/request.py index 33c62c1..f5ee787 100644 --- a/lib/request.py +++ b/lib/request.py @@ -34,24 +34,23 @@ class Request: return f"[{self.__str__()}]" def import_into_db(self, db): - cur = db.cursor() - cur.execute( - """INSERT INTO requests (number, creator, type, state, - source_project, source_package, source_rev) - VALUES (%s,%s,%s,%s,%s,%s,%s) RETURNING id""", - ( - self.number, - self.creator, - self.type_, - self.state, - self.source_project, - self.source_package, - self.source_rev, - ), - ) - rowid = cur.fetchone()[0] - cur.execute( - "UPDATE revisions SET request_id=%s WHERE request_number=%s", - (rowid, self.number), - ) - cur.close() + with db.cursor() as cur: + cur.execute( + """INSERT INTO requests (number, creator, type, state, + source_project, source_package, source_rev) + VALUES (%s,%s,%s,%s,%s,%s,%s) RETURNING id""", + ( + self.number, + self.creator, + self.type_, + self.state, + self.source_project, + self.source_package, + self.source_rev, + ), + ) + rowid = cur.fetchone()[0] + cur.execute( + "UPDATE revisions SET request_id=%s WHERE request_number=%s", + (rowid, self.number), + ) diff --git a/lib/user.py b/lib/user.py new file mode 100644 index 0000000..68fe415 --- /dev/null +++ b/lib/user.py @@ -0,0 +1,50 @@ +FAKE_ACCOUNTS = ('unknown', 'buildservice-autocommit', 'autobuild', '_service') + + +class User: + def parse(self, xml, userid): + self.userid = userid + self.realname = xml.find("realname").text + self.email = xml.find("email").text + if self.email is None: + self.email = "" + + return self + + def __str__(self): + return f"User {self.userid}: {self.realname} {self.email}" + + def __repr__(self): + return f"[{self.__str__()}]" + + def import_into_db(self, db): + with db.cursor() as cur: + cur.execute( + """INSERT INTO users (userid, realname, email) + VALUES (%s,%s,%s)""", + ( + self.userid, + self.realname, + self.email, + ), + ) + + @staticmethod + def lookup(db, userid): + with db.cursor() as cur: + cur.execute("SELECT * FROM users where userid=%s", (userid,)) + row = cur.fetchone() + if not row: + return None + return row + + @staticmethod + def missing_users(db): + with db.cursor() as cur: + cur.execute( + """SELECT DISTINCT revisions.userid + FROM revisions LEFT JOIN users ON revisions.userid = users.userid + WHERE users.userid IS NULL AND revisions.userid NOT IN {}""".format(FAKE_ACCOUNTS) + ) + missing_users = [row[0] for row in cur.fetchall()] + return missing_users diff --git a/lib/users.py b/lib/users.py deleted file mode 100644 index f26b34f..0000000 --- a/lib/users.py +++ /dev/null @@ -1,55 +0,0 @@ -FAKE_ACCOUNTS = ('unknown','buildservice-autocommit', 'autobuild', '_service') - -class User: - def parse(self, xml, userid): - self.userid = userid - self.realname = xml.find("realname").text - self.email = xml.find("email").text - if self.email == None: - self.email = "" - - return self - - def __str__(self): - return f"User {self.userid}: {self.realname} {self.email}" - - def __repr__(self): - return f"[{self.__str__()}]" - - def import_into_db(self, db): - cur = db.cursor() - cur.execute( - """INSERT INTO users (userid, realname, email) - VALUES (%s,%s,%s) RETURNING id""", - ( - self.userid, - self.realname, - self.email, - ), - ) - cur.close() - - def lookup(db, userid): - cur= db.cursor() - cur.execute("SELECT * FROM users where userid=%s", (userid,)) - row = cur.fetchone() - if not row: - cur.close() - return None - return row - - @classmethod - def get_batch(self, db): - cur = db.cursor() - cur.execute( - """SELECT DISTINCT revisions.userid - FROM revisions LEFT JOIN users ON revisions.userid = users.userid - WHERE users.userid IS NULL AND revisions.userid NOT IN - ('unknown','buildservice-autocommit', 'autobuild', '_service')""" - ) - missing_users = [row[0] for row in cur.fetchall()] - if not missing_users: - cur.close() - return [] - return missing_users - \ No newline at end of file