add a user database #3

Merged
Ghost merged 6 commits from user-db into main 2022-10-26 14:05:48 +02:00
6 changed files with 217 additions and 146 deletions

View File

@ -22,7 +22,7 @@ class DB:
def schema_version(self): def schema_version(self):
# create a cursor # create a cursor
cur = self.conn.cursor() with self.conn.cursor() as cur:
# execute a statement # execute a statement
try: try:
@ -35,7 +35,6 @@ class DB:
db_version = cur.fetchone() db_version = cur.fetchone()
cur.close()
return db_version[0] return db_version[0]
def close(self): def close(self):
@ -154,20 +153,29 @@ class DB:
"create index request_number_idx2 on requests(number)", "create index request_number_idx2 on requests(number)",
"UPDATE scheme SET version=12", "UPDATE scheme SET version=12",
) )
schemes[13] = (
"""
CREATE TABLE users (
id SERIAL PRIMARY KEY,
userid VARCHAR(255) NOT NULL,
email VARCHAR(255) NOT NULL,
realname VARCHAR(255) NOT NULL
)
""",
"UPDATE scheme SET version=13",
)
schema_version = self.schema_version() schema_version = self.schema_version()
if (schema_version + 1) not in schemes: if (schema_version + 1) not in schemes:
return return
try: try:
cur = self.conn.cursor() with self.conn.cursor() as cur:
# create table one by one # create table one by one
for version, commands in schemes.items(): for version, commands in schemes.items():
if version <= schema_version: if version <= schema_version:
continue continue
for command in commands: for command in commands:
cur.execute(command) cur.execute(command)
# close communication with the PostgreSQL database server
cur.close()
# commit the changes # commit the changes
self.conn.commit() self.conn.commit()
except (Exception, psycopg2.DatabaseError) as error: except (Exception, psycopg2.DatabaseError) as error:

View File

@ -23,14 +23,15 @@ class DBRevision:
return f"[{self.__str__()}]" return f"[{self.__str__()}]"
def links_to(self, db, project, package): def links_to(self, db, project, package):
db.cursor().execute( with db.cursor() as cur:
cur.execute(
"INSERT INTO links (revision_id, project, package) VALUES (%s,%s,%s)", "INSERT INTO links (revision_id, project, package) VALUES (%s,%s,%s)",
(self.dbid, project, package), (self.dbid, project, package),
) )
@classmethod @classmethod
def import_obs_rev(cls, db, revision): def import_obs_rev(cls, db, revision):
cur = db.cursor() with db.cursor() as cur:
cur.execute( cur.execute(
"""INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, commit_time, userid, comment, request_number) """INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, commit_time, userid, comment, request_number)
VALUES(%s, %s, %s, %s, %s, %s, %s, %s)""", VALUES(%s, %s, %s, %s, %s, %s, %s, %s)""",
@ -45,36 +46,33 @@ class DBRevision:
revision.request_number, revision.request_number,
), ),
) )
cur.close()
return cls.fetch_revision(db, revision.project, revision.package, revision.rev) return cls.fetch_revision(db, revision.project, revision.package, revision.rev)
@classmethod @staticmethod
def fetch_revision(cls, db, project, package, rev): def fetch_revision(db, project, package, rev):
cur = db.cursor() with db.cursor() as cur:
cur.execute( cur.execute(
"SELECT * FROM revisions where project=%s and package=%s and rev=%s", "SELECT * FROM revisions where project=%s and package=%s and rev=%s",
(project, package, str(rev)), (project, package, str(rev)),
) )
row = cur.fetchone() row = cur.fetchone()
cur.close()
return DBRevision(row) return DBRevision(row)
@classmethod @staticmethod
def latest_revision(cls, db, project, package): def latest_revision(db, project, package):
cur = db.cursor() with db.cursor() as cur:
cur.execute( cur.execute(
"SELECT MAX(rev) FROM revisions where project=%s and package=%s", "SELECT MAX(rev) FROM revisions where project=%s and package=%s",
(project, package), (project, package),
) )
max = cur.fetchone()[0] max = cur.fetchone()[0]
cur.close()
if max: if max:
return DBRevision.fetch_revision(db, project, package, int(max)) return DBRevision.fetch_revision(db, project, package, int(max))
return None return None
@classmethod @staticmethod
def all_revisions(cls, db, project, package): def all_revisions(db, project, package):
cur = db.cursor() with db.cursor() as cur:
cur.execute( cur.execute(
"SELECT * FROM revisions where project=%s and package=%s", "SELECT * FROM revisions where project=%s and package=%s",
(project, package), (project, package),
@ -82,17 +80,15 @@ class DBRevision:
ret = [] ret = []
for row in cur.fetchall(): for row in cur.fetchall():
ret.append(DBRevision(row)) ret.append(DBRevision(row))
cur.close()
return ret return ret
def linked_rev(self, db): def linked_rev(self, db):
cur = db.cursor() with db.cursor() as cur:
cur.execute( cur.execute(
"SELECT project,package FROM links where revision_id=%s", (self.dbid,) "SELECT project,package FROM links where revision_id=%s", (self.dbid,)
) )
row = cur.fetchone() row = cur.fetchone()
if not row: if not row:
cur.close()
return None return None
project, package = row project, package = row
cur.execute( cur.execute(
@ -100,18 +96,17 @@ class DBRevision:
(project, package, self.commit_time), (project, package, self.commit_time),
) )
revisions = [DBRevision(row) for row in cur.fetchall()] revisions = [DBRevision(row) for row in cur.fetchall()]
cur.close()
if revisions: if revisions:
return revisions[0] return revisions[0]
return None return None
def set_broken(self, db): def set_broken(self, db):
cur = db.cursor() with db.cursor() as cur:
cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,)) cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,))
cur.close()
def import_dir_list(self, db, xml): def import_dir_list(self, db, xml):
cur = db.cursor() with db.cursor() as cur:
cur.execute( cur.execute(
"UPDATE revisions SET expanded_srcmd5=%s where id=%s", "UPDATE revisions SET expanded_srcmd5=%s where id=%s",
(xml.get("srcmd5"), self.dbid), (xml.get("srcmd5"), self.dbid),
@ -128,11 +123,10 @@ class DBRevision:
self.dbid, self.dbid,
), ),
) )
cur.close()
@classmethod @staticmethod
def requests_to_fetch(self, db, project, package): def requests_to_fetch(db, project, package):
cur = db.cursor() with db.cursor() as cur:
cur.execute( cur.execute(
"""SELECT request_number FROM revisions revs left join requests """SELECT request_number FROM revisions revs left join requests
reqs on reqs.number=revs.request_number where reqs.id is null and reqs on reqs.number=revs.request_number where reqs.id is null and
@ -140,5 +134,4 @@ class DBRevision:
(project, package), (project, package),
) )
ret = [row[0] for row in cur.fetchall()] ret = [row[0] for row in cur.fetchall()]
cur.close()
return ret return ret

View File

@ -10,6 +10,8 @@ from lib.obs import OBS
from lib.obs_revision import OBSRevision from lib.obs_revision import OBSRevision
from lib.proxy_sha256 import ProxySHA256, md5, sha256 from lib.proxy_sha256 import ProxySHA256, md5, sha256
from lib.request import Request from lib.request import Request
import xml.etree.ElementTree as ET
from lib.user import User
Ghost marked this conversation as resolved Outdated
Outdated
Review

Please rename that to lib.user

Please rename that to lib.user
def _files_hash(hash_alg, dirpath): def _files_hash(hash_alg, dirpath):
@ -149,7 +151,7 @@ class Importer:
for project, _, api_url in self.projects: for project, _, api_url in self.projects:
self.obs.change_url(api_url) self.obs.change_url(api_url)
self.update_db_package(db, project, self.package) self.update_db_package(db, project, self.package)
cur = db.cursor() with db.cursor() as cur:
cur.execute( cur.execute(
"SELECT DISTINCT l.project, l.package from links l join revisions r on r.id=l.revision_id WHERE r.project=%s AND r.package=%s", "SELECT DISTINCT l.project, l.package from links l join revisions r on r.id=l.revision_id WHERE r.project=%s AND r.package=%s",
(project, self.package), (project, self.package),
@ -158,6 +160,12 @@ class Importer:
(lproject, lpackage) = row (lproject, lpackage) = row
self.update_db_package(db, lproject, lpackage) self.update_db_package(db, lproject, lpackage)
missing_users = User.missing_users(db)
Ghost marked this conversation as resolved Outdated

I think that from here you can remove all from the with context (at least I do not see any access to cur)

I think that from here you can remove all from the `with` context (at least I do not see any access to `cur`)
for userid in missing_users:
Ghost marked this conversation as resolved Outdated
Outdated
Review

print left

print left
missing_user = self.obs.user(userid)
if missing_user:
missing_user.import_into_db(db)
for rev in DBRevision.all_revisions(db, project, self.package): for rev in DBRevision.all_revisions(db, project, self.package):
# TODO move into SELECT # TODO move into SELECT
Ghost marked this conversation as resolved Outdated
Outdated
Review

That else is strange. If there are no missing users, I would expect get_batch returns an empty list and you don't even end up in the for loop's body

That else is strange. If there are no missing users, I would expect get_batch returns an empty list and you don't even end up in the for loop's body

Indeed.

IMHO avoid is None, == []. Use the truth value (if missing_user:)

Indeed. IMHO avoid `is None`, `== []`. Use the truth value (`if missing_user:`)
if rev.broken or rev.expanded_srcmd5: if rev.broken or rev.expanded_srcmd5:
@ -176,7 +184,6 @@ class Importer:
for number in DBRevision.requests_to_fetch(db, project, self.package): for number in DBRevision.requests_to_fetch(db, project, self.package):
self.obs.request(number).import_into_db(db) self.obs.request(number).import_into_db(db)
cur.close()
db.conn.commit() db.conn.commit()
Ghost marked this conversation as resolved Outdated
Outdated
Review

Dead code?

Dead code?
def import_all_revisions(self, gc): def import_all_revisions(self, gc):

View File

@ -8,6 +8,7 @@ from urllib.error import HTTPError
import osc.core import osc.core
from lib.request import Request from lib.request import Request
from lib.user import User
# Add a retry wrapper for some of the HTTP actions. # Add a retry wrapper for some of the HTTP actions.
@ -84,6 +85,14 @@ class OBS:
return None return None
return root return root
def _user(self, userid, **params):
try:
root = self._xml(f"/person/{userid}", **params)
except HTTPError:
logging.error(f"UserID {userid} not found")
return None
return root
def _link(self, project, package, rev): def _link(self, project, package, rev):
try: try:
root = self._xml(f"source/{project}/{package}/_link", rev=rev) root = self._xml(f"source/{project}/{package}/_link", rev=rev)
@ -122,6 +131,11 @@ class OBS:
if root is not None: if root is not None:
return Request().parse(root) return Request().parse(root)
def user(self, userid):
root = self._user(userid)
if root is not None:
return User().parse(root, userid)
aplanas marked this conversation as resolved
Review

It is OK how you write it, but I would expect root and userid as part of the __init__ constructor (give initial state).

It is OK how you write it, but I would expect `root` and `userid` as part of the `__init__` constructor (give initial state).
Review

root not as it's OBS specific while we want to have Users also coming from the DB

root not as it's OBS specific while we want to have Users also coming from the DB
def files(self, project, package, revision): def files(self, project, package, revision):
root = self._xml(f"source/{project}/{package}", rev=revision, expand=1) root = self._xml(f"source/{project}/{package}", rev=revision, expand=1)
return [ return [

View File

@ -34,7 +34,7 @@ class Request:
return f"[{self.__str__()}]" return f"[{self.__str__()}]"
def import_into_db(self, db): def import_into_db(self, db):
cur = db.cursor() with db.cursor() as cur:
cur.execute( cur.execute(
"""INSERT INTO requests (number, creator, type, state, """INSERT INTO requests (number, creator, type, state,
source_project, source_package, source_rev) source_project, source_package, source_rev)
@ -54,4 +54,3 @@ class Request:
"UPDATE revisions SET request_id=%s WHERE request_number=%s", "UPDATE revisions SET request_id=%s WHERE request_number=%s",
(rowid, self.number), (rowid, self.number),
) )
cur.close()

50
lib/user.py Normal file
View File

@ -0,0 +1,50 @@
FAKE_ACCOUNTS = ('unknown', 'buildservice-autocommit', 'autobuild', '_service')
class User:
def parse(self, xml, userid):
self.userid = userid
self.realname = xml.find("realname").text
self.email = xml.find("email").text
if self.email is None:
self.email = ""
return self
def __str__(self):
return f"User {self.userid}: {self.realname} {self.email}"
def __repr__(self):
return f"[{self.__str__()}]"
def import_into_db(self, db):
with db.cursor() as cur:
cur.execute(
"""INSERT INTO users (userid, realname, email)
VALUES (%s,%s,%s)""",
(
self.userid,
self.realname,
self.email,
),
)
@staticmethod
def lookup(db, userid):
with db.cursor() as cur:
cur.execute("SELECT * FROM users where userid=%s", (userid,))
row = cur.fetchone()
if not row:
return None
return row
@staticmethod
def missing_users(db):
with db.cursor() as cur:
cur.execute(
"""SELECT DISTINCT revisions.userid
FROM revisions LEFT JOIN users ON revisions.userid = users.userid
WHERE users.userid IS NULL AND revisions.userid NOT IN {}""".format(FAKE_ACCOUNTS)
)
missing_users = [row[0] for row in cur.fetchall()]
return missing_users