add a user database #3

Merged
Ghost merged 6 commits from user-db into main 2022-10-26 14:05:48 +02:00
6 changed files with 217 additions and 146 deletions

View File

@ -22,7 +22,7 @@ class DB:
def schema_version(self):
# create a cursor
cur = self.conn.cursor()
with self.conn.cursor() as cur:
# execute a statement
try:
@ -35,7 +35,6 @@ class DB:
db_version = cur.fetchone()
cur.close()
return db_version[0]
def close(self):
@ -154,20 +153,29 @@ class DB:
"create index request_number_idx2 on requests(number)",
"UPDATE scheme SET version=12",
)
schemes[13] = (
"""
CREATE TABLE users (
id SERIAL PRIMARY KEY,
userid VARCHAR(255) NOT NULL,
email VARCHAR(255) NOT NULL,
realname VARCHAR(255) NOT NULL
)
""",
"UPDATE scheme SET version=13",
)
schema_version = self.schema_version()
if (schema_version + 1) not in schemes:
return
try:
cur = self.conn.cursor()
with self.conn.cursor() as cur:
# create table one by one
for version, commands in schemes.items():
if version <= schema_version:
continue
for command in commands:
cur.execute(command)
# close communication with the PostgreSQL database server
cur.close()
# commit the changes
self.conn.commit()
except (Exception, psycopg2.DatabaseError) as error:

View File

@ -23,14 +23,15 @@ class DBRevision:
return f"[{self.__str__()}]"
def links_to(self, db, project, package):
db.cursor().execute(
with db.cursor() as cur:
cur.execute(
"INSERT INTO links (revision_id, project, package) VALUES (%s,%s,%s)",
(self.dbid, project, package),
)
@classmethod
def import_obs_rev(cls, db, revision):
cur = db.cursor()
with db.cursor() as cur:
cur.execute(
"""INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, commit_time, userid, comment, request_number)
VALUES(%s, %s, %s, %s, %s, %s, %s, %s)""",
@ -45,36 +46,33 @@ class DBRevision:
revision.request_number,
),
)
cur.close()
return cls.fetch_revision(db, revision.project, revision.package, revision.rev)
@classmethod
def fetch_revision(cls, db, project, package, rev):
cur = db.cursor()
@staticmethod
def fetch_revision(db, project, package, rev):
with db.cursor() as cur:
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s and rev=%s",
(project, package, str(rev)),
)
row = cur.fetchone()
cur.close()
return DBRevision(row)
@classmethod
def latest_revision(cls, db, project, package):
cur = db.cursor()
@staticmethod
def latest_revision(db, project, package):
with db.cursor() as cur:
cur.execute(
"SELECT MAX(rev) FROM revisions where project=%s and package=%s",
(project, package),
)
max = cur.fetchone()[0]
cur.close()
if max:
return DBRevision.fetch_revision(db, project, package, int(max))
return None
@classmethod
def all_revisions(cls, db, project, package):
cur = db.cursor()
@staticmethod
def all_revisions(db, project, package):
with db.cursor() as cur:
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s",
(project, package),
@ -82,17 +80,15 @@ class DBRevision:
ret = []
for row in cur.fetchall():
ret.append(DBRevision(row))
cur.close()
return ret
def linked_rev(self, db):
cur = db.cursor()
with db.cursor() as cur:
cur.execute(
"SELECT project,package FROM links where revision_id=%s", (self.dbid,)
)
row = cur.fetchone()
if not row:
cur.close()
return None
project, package = row
cur.execute(
@ -100,18 +96,17 @@ class DBRevision:
(project, package, self.commit_time),
)
revisions = [DBRevision(row) for row in cur.fetchall()]
cur.close()
if revisions:
return revisions[0]
return None
def set_broken(self, db):
cur = db.cursor()
with db.cursor() as cur:
cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,))
cur.close()
def import_dir_list(self, db, xml):
cur = db.cursor()
with db.cursor() as cur:
cur.execute(
"UPDATE revisions SET expanded_srcmd5=%s where id=%s",
(xml.get("srcmd5"), self.dbid),
@ -128,11 +123,10 @@ class DBRevision:
self.dbid,
),
)
cur.close()
@classmethod
def requests_to_fetch(self, db, project, package):
cur = db.cursor()
@staticmethod
def requests_to_fetch(db, project, package):
with db.cursor() as cur:
cur.execute(
"""SELECT request_number FROM revisions revs left join requests
reqs on reqs.number=revs.request_number where reqs.id is null and
@ -140,5 +134,4 @@ class DBRevision:
(project, package),
)
ret = [row[0] for row in cur.fetchall()]
cur.close()
return ret

View File

@ -10,6 +10,8 @@ from lib.obs import OBS
from lib.obs_revision import OBSRevision
from lib.proxy_sha256 import ProxySHA256, md5, sha256
from lib.request import Request
import xml.etree.ElementTree as ET
from lib.user import User
Ghost marked this conversation as resolved Outdated
Outdated
Review

Please rename that to lib.user

Please rename that to lib.user
def _files_hash(hash_alg, dirpath):
@ -149,7 +151,7 @@ class Importer:
for project, _, api_url in self.projects:
self.obs.change_url(api_url)
self.update_db_package(db, project, self.package)
cur = db.cursor()
with db.cursor() as cur:
cur.execute(
"SELECT DISTINCT l.project, l.package from links l join revisions r on r.id=l.revision_id WHERE r.project=%s AND r.package=%s",
(project, self.package),
@ -158,6 +160,12 @@ class Importer:
(lproject, lpackage) = row
self.update_db_package(db, lproject, lpackage)
missing_users = User.missing_users(db)
Ghost marked this conversation as resolved Outdated

I think that from here you can remove all from the with context (at least I do not see any access to cur)

I think that from here you can remove all from the `with` context (at least I do not see any access to `cur`)
for userid in missing_users:
Ghost marked this conversation as resolved Outdated
Outdated
Review

print left

print left
missing_user = self.obs.user(userid)
if missing_user:
missing_user.import_into_db(db)
for rev in DBRevision.all_revisions(db, project, self.package):
# TODO move into SELECT
Ghost marked this conversation as resolved Outdated
Outdated
Review

That else is strange. If there are no missing users, I would expect get_batch returns an empty list and you don't even end up in the for loop's body

That else is strange. If there are no missing users, I would expect get_batch returns an empty list and you don't even end up in the for loop's body

Indeed.

IMHO avoid is None, == []. Use the truth value (if missing_user:)

Indeed. IMHO avoid `is None`, `== []`. Use the truth value (`if missing_user:`)
if rev.broken or rev.expanded_srcmd5:
@ -176,7 +184,6 @@ class Importer:
for number in DBRevision.requests_to_fetch(db, project, self.package):
self.obs.request(number).import_into_db(db)
cur.close()
db.conn.commit()
Ghost marked this conversation as resolved Outdated
Outdated
Review

Dead code?

Dead code?
def import_all_revisions(self, gc):

View File

@ -8,6 +8,7 @@ from urllib.error import HTTPError
import osc.core
from lib.request import Request
from lib.user import User
# Add a retry wrapper for some of the HTTP actions.
@ -84,6 +85,14 @@ class OBS:
return None
return root
def _user(self, userid, **params):
try:
root = self._xml(f"/person/{userid}", **params)
except HTTPError:
logging.error(f"UserID {userid} not found")
return None
return root
def _link(self, project, package, rev):
try:
root = self._xml(f"source/{project}/{package}/_link", rev=rev)
@ -122,6 +131,11 @@ class OBS:
if root is not None:
return Request().parse(root)
def user(self, userid):
root = self._user(userid)
if root is not None:
return User().parse(root, userid)
aplanas marked this conversation as resolved
Review

It is OK how you write it, but I would expect root and userid as part of the __init__ constructor (give initial state).

It is OK how you write it, but I would expect `root` and `userid` as part of the `__init__` constructor (give initial state).
Review

root not as it's OBS specific while we want to have Users also coming from the DB

root not as it's OBS specific while we want to have Users also coming from the DB
def files(self, project, package, revision):
root = self._xml(f"source/{project}/{package}", rev=revision, expand=1)
return [

View File

@ -34,7 +34,7 @@ class Request:
return f"[{self.__str__()}]"
def import_into_db(self, db):
cur = db.cursor()
with db.cursor() as cur:
cur.execute(
"""INSERT INTO requests (number, creator, type, state,
source_project, source_package, source_rev)
@ -54,4 +54,3 @@ class Request:
"UPDATE revisions SET request_id=%s WHERE request_number=%s",
(rowid, self.number),
)
cur.close()

50
lib/user.py Normal file
View File

@ -0,0 +1,50 @@
FAKE_ACCOUNTS = ('unknown', 'buildservice-autocommit', 'autobuild', '_service')
class User:
def parse(self, xml, userid):
self.userid = userid
self.realname = xml.find("realname").text
self.email = xml.find("email").text
if self.email is None:
self.email = ""
return self
def __str__(self):
return f"User {self.userid}: {self.realname} {self.email}"
def __repr__(self):
return f"[{self.__str__()}]"
def import_into_db(self, db):
with db.cursor() as cur:
cur.execute(
"""INSERT INTO users (userid, realname, email)
VALUES (%s,%s,%s)""",
(
self.userid,
self.realname,
self.email,
),
)
@staticmethod
def lookup(db, userid):
with db.cursor() as cur:
cur.execute("SELECT * FROM users where userid=%s", (userid,))
row = cur.fetchone()
if not row:
return None
return row
@staticmethod
def missing_users(db):
with db.cursor() as cur:
cur.execute(
"""SELECT DISTINCT revisions.userid
FROM revisions LEFT JOIN users ON revisions.userid = users.userid
WHERE users.userid IS NULL AND revisions.userid NOT IN {}""".format(FAKE_ACCOUNTS)
)
missing_users = [row[0] for row in cur.fetchall()]
return missing_users