add a user database #3

Merged
Ghost merged 6 commits from user-db into main 2022-10-26 14:05:48 +02:00
6 changed files with 217 additions and 146 deletions

View File

@ -22,20 +22,19 @@ class DB:
def schema_version(self):
# create a cursor
cur = self.conn.cursor()
with self.conn.cursor() as cur:
# execute a statement
try:
cur.execute("SELECT MAX(version) from scheme")
except psycopg2.errors.UndefinedTable as error:
cur.close()
self.close()
self.connect()
return 0
# execute a statement
try:
cur.execute("SELECT MAX(version) from scheme")
except psycopg2.errors.UndefinedTable as error:
cur.close()
self.close()
self.connect()
return 0
db_version = cur.fetchone()
db_version = cur.fetchone()
cur.close()
return db_version[0]
def close(self):
@ -154,20 +153,29 @@ class DB:
"create index request_number_idx2 on requests(number)",
"UPDATE scheme SET version=12",
)
schemes[13] = (
"""
CREATE TABLE users (
id SERIAL PRIMARY KEY,
userid VARCHAR(255) NOT NULL,
email VARCHAR(255) NOT NULL,
realname VARCHAR(255) NOT NULL
)
""",
"UPDATE scheme SET version=13",
)
schema_version = self.schema_version()
if (schema_version + 1) not in schemes:
return
try:
cur = self.conn.cursor()
# create table one by one
for version, commands in schemes.items():
if version <= schema_version:
continue
for command in commands:
cur.execute(command)
# close communication with the PostgreSQL database server
cur.close()
with self.conn.cursor() as cur:
# create table one by one
for version, commands in schemes.items():
if version <= schema_version:
continue
for command in commands:
cur.execute(command)
# commit the changes
self.conn.commit()
except (Exception, psycopg2.DatabaseError) as error:

View File

@ -23,122 +23,115 @@ class DBRevision:
return f"[{self.__str__()}]"
def links_to(self, db, project, package):
db.cursor().execute(
"INSERT INTO links (revision_id, project, package) VALUES (%s,%s,%s)",
(self.dbid, project, package),
)
with db.cursor() as cur:
cur.execute(
"INSERT INTO links (revision_id, project, package) VALUES (%s,%s,%s)",
(self.dbid, project, package),
)
@classmethod
def import_obs_rev(cls, db, revision):
cur = db.cursor()
cur.execute(
"""INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, commit_time, userid, comment, request_number)
VALUES(%s, %s, %s, %s, %s, %s, %s, %s)""",
(
revision.project,
revision.package,
revision.rev,
revision.unexpanded_srcmd5,
revision.time,
revision.userid,
revision.comment,
revision.request_number,
),
)
cur.close()
with db.cursor() as cur:
cur.execute(
"""INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, commit_time, userid, comment, request_number)
VALUES(%s, %s, %s, %s, %s, %s, %s, %s)""",
(
revision.project,
revision.package,
revision.rev,
revision.unexpanded_srcmd5,
revision.time,
revision.userid,
revision.comment,
revision.request_number,
),
)
return cls.fetch_revision(db, revision.project, revision.package, revision.rev)
@classmethod
def fetch_revision(cls, db, project, package, rev):
cur = db.cursor()
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s and rev=%s",
(project, package, str(rev)),
)
row = cur.fetchone()
cur.close()
@staticmethod
def fetch_revision(db, project, package, rev):
with db.cursor() as cur:
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s and rev=%s",
(project, package, str(rev)),
)
row = cur.fetchone()
return DBRevision(row)
@classmethod
def latest_revision(cls, db, project, package):
cur = db.cursor()
cur.execute(
"SELECT MAX(rev) FROM revisions where project=%s and package=%s",
(project, package),
)
max = cur.fetchone()[0]
cur.close()
@staticmethod
def latest_revision(db, project, package):
with db.cursor() as cur:
cur.execute(
"SELECT MAX(rev) FROM revisions where project=%s and package=%s",
(project, package),
)
max = cur.fetchone()[0]
if max:
return DBRevision.fetch_revision(db, project, package, int(max))
return None
@classmethod
def all_revisions(cls, db, project, package):
cur = db.cursor()
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s",
(project, package),
)
ret = []
for row in cur.fetchall():
ret.append(DBRevision(row))
cur.close()
@staticmethod
def all_revisions(db, project, package):
with db.cursor() as cur:
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s",
(project, package),
)
ret = []
for row in cur.fetchall():
ret.append(DBRevision(row))
return ret
def linked_rev(self, db):
cur = db.cursor()
cur.execute(
"SELECT project,package FROM links where revision_id=%s", (self.dbid,)
)
row = cur.fetchone()
if not row:
cur.close()
return None
project, package = row
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s and commit_time <= %s ORDER BY commit_time DESC LIMIT 1",
(project, package, self.commit_time),
)
revisions = [DBRevision(row) for row in cur.fetchall()]
cur.close()
with db.cursor() as cur:
cur.execute(
"SELECT project,package FROM links where revision_id=%s", (self.dbid,)
)
row = cur.fetchone()
if not row:
return None
project, package = row
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s and commit_time <= %s ORDER BY commit_time DESC LIMIT 1",
(project, package, self.commit_time),
)
revisions = [DBRevision(row) for row in cur.fetchall()]
if revisions:
return revisions[0]
return None
def set_broken(self, db):
cur = db.cursor()
cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,))
cur.close()
with db.cursor() as cur:
cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,))
def import_dir_list(self, db, xml):
cur = db.cursor()
cur.execute(
"UPDATE revisions SET expanded_srcmd5=%s where id=%s",
(xml.get("srcmd5"), self.dbid),
)
for entry in xml.findall("entry"):
with db.cursor() as cur:
cur.execute(
"""INSERT INTO files (name, md5, size, mtime, revision_id)
VALUES (%s,%s,%s,%s,%s)""",
(
entry.get("name"),
entry.get("md5"),
entry.get("size"),
entry.get("mtime"),
self.dbid,
),
"UPDATE revisions SET expanded_srcmd5=%s where id=%s",
(xml.get("srcmd5"), self.dbid),
)
cur.close()
for entry in xml.findall("entry"):
cur.execute(
"""INSERT INTO files (name, md5, size, mtime, revision_id)
VALUES (%s,%s,%s,%s,%s)""",
(
entry.get("name"),
entry.get("md5"),
entry.get("size"),
entry.get("mtime"),
self.dbid,
),
)
@classmethod
def requests_to_fetch(self, db, project, package):
cur = db.cursor()
cur.execute(
"""SELECT request_number FROM revisions revs left join requests
reqs on reqs.number=revs.request_number where reqs.id is null and
revs.request_number is not null and project=%s and package=%s;""",
(project, package),
)
ret = [row[0] for row in cur.fetchall()]
cur.close()
@staticmethod
def requests_to_fetch(db, project, package):
with db.cursor() as cur:
cur.execute(
"""SELECT request_number FROM revisions revs left join requests
reqs on reqs.number=revs.request_number where reqs.id is null and
revs.request_number is not null and project=%s and package=%s;""",
(project, package),
)
ret = [row[0] for row in cur.fetchall()]
return ret

View File

@ -10,6 +10,8 @@ from lib.obs import OBS
from lib.obs_revision import OBSRevision
from lib.proxy_sha256 import ProxySHA256, md5, sha256
from lib.request import Request
import xml.etree.ElementTree as ET
from lib.user import User
Ghost marked this conversation as resolved Outdated
Outdated
Review

Please rename that to lib.user

Please rename that to lib.user
def _files_hash(hash_alg, dirpath):
@ -149,14 +151,20 @@ class Importer:
for project, _, api_url in self.projects:
self.obs.change_url(api_url)
self.update_db_package(db, project, self.package)
cur = db.cursor()
cur.execute(
"SELECT DISTINCT l.project, l.package from links l join revisions r on r.id=l.revision_id WHERE r.project=%s AND r.package=%s",
(project, self.package),
)
for row in cur.fetchall():
(lproject, lpackage) = row
self.update_db_package(db, lproject, lpackage)
with db.cursor() as cur:
cur.execute(
"SELECT DISTINCT l.project, l.package from links l join revisions r on r.id=l.revision_id WHERE r.project=%s AND r.package=%s",
(project, self.package),
)
for row in cur.fetchall():
(lproject, lpackage) = row
self.update_db_package(db, lproject, lpackage)
missing_users = User.missing_users(db)
Ghost marked this conversation as resolved Outdated

I think that from here you can remove all from the with context (at least I do not see any access to cur)

I think that from here you can remove all from the `with` context (at least I do not see any access to `cur`)
for userid in missing_users:
Ghost marked this conversation as resolved Outdated
Outdated
Review

print left

print left
missing_user = self.obs.user(userid)
if missing_user:
missing_user.import_into_db(db)
for rev in DBRevision.all_revisions(db, project, self.package):
# TODO move into SELECT
Ghost marked this conversation as resolved Outdated
Outdated
Review

That else is strange. If there are no missing users, I would expect get_batch returns an empty list and you don't even end up in the for loop's body

That else is strange. If there are no missing users, I would expect get_batch returns an empty list and you don't even end up in the for loop's body

Indeed.

IMHO avoid is None, == []. Use the truth value (if missing_user:)

Indeed. IMHO avoid `is None`, `== []`. Use the truth value (`if missing_user:`)
@ -173,10 +181,9 @@ class Importer:
else:
rev.set_broken(db)
for number in DBRevision.requests_to_fetch(db, project, self.package):
self.obs.request(number).import_into_db(db)
for number in DBRevision.requests_to_fetch(db, project, self.package):
self.obs.request(number).import_into_db(db)
cur.close()
db.conn.commit()
Ghost marked this conversation as resolved Outdated
Outdated
Review

Dead code?

Dead code?
def import_all_revisions(self, gc):

View File

@ -8,6 +8,7 @@ from urllib.error import HTTPError
import osc.core
from lib.request import Request
from lib.user import User
# Add a retry wrapper for some of the HTTP actions.
@ -84,6 +85,14 @@ class OBS:
return None
return root
def _user(self, userid, **params):
try:
root = self._xml(f"/person/{userid}", **params)
except HTTPError:
logging.error(f"UserID {userid} not found")
return None
return root
def _link(self, project, package, rev):
try:
root = self._xml(f"source/{project}/{package}/_link", rev=rev)
@ -122,6 +131,11 @@ class OBS:
if root is not None:
return Request().parse(root)
def user(self, userid):
root = self._user(userid)
if root is not None:
return User().parse(root, userid)
aplanas marked this conversation as resolved
Review

It is OK how you write it, but I would expect root and userid as part of the __init__ constructor (give initial state).

It is OK how you write it, but I would expect `root` and `userid` as part of the `__init__` constructor (give initial state).
Review

root not as it's OBS specific while we want to have Users also coming from the DB

root not as it's OBS specific while we want to have Users also coming from the DB
def files(self, project, package, revision):
root = self._xml(f"source/{project}/{package}", rev=revision, expand=1)
return [

View File

@ -34,24 +34,23 @@ class Request:
return f"[{self.__str__()}]"
def import_into_db(self, db):
cur = db.cursor()
cur.execute(
"""INSERT INTO requests (number, creator, type, state,
source_project, source_package, source_rev)
VALUES (%s,%s,%s,%s,%s,%s,%s) RETURNING id""",
(
self.number,
self.creator,
self.type_,
self.state,
self.source_project,
self.source_package,
self.source_rev,
),
)
rowid = cur.fetchone()[0]
cur.execute(
"UPDATE revisions SET request_id=%s WHERE request_number=%s",
(rowid, self.number),
)
cur.close()
with db.cursor() as cur:
cur.execute(
"""INSERT INTO requests (number, creator, type, state,
source_project, source_package, source_rev)
VALUES (%s,%s,%s,%s,%s,%s,%s) RETURNING id""",
(
self.number,
self.creator,
self.type_,
self.state,
self.source_project,
self.source_package,
self.source_rev,
),
)
rowid = cur.fetchone()[0]
cur.execute(
"UPDATE revisions SET request_id=%s WHERE request_number=%s",
(rowid, self.number),
)

50
lib/user.py Normal file
View File

@ -0,0 +1,50 @@
FAKE_ACCOUNTS = ('unknown', 'buildservice-autocommit', 'autobuild', '_service')
class User:
def parse(self, xml, userid):
self.userid = userid
self.realname = xml.find("realname").text
self.email = xml.find("email").text
if self.email is None:
self.email = ""
return self
def __str__(self):
return f"User {self.userid}: {self.realname} {self.email}"
def __repr__(self):
return f"[{self.__str__()}]"
def import_into_db(self, db):
with db.cursor() as cur:
cur.execute(
"""INSERT INTO users (userid, realname, email)
VALUES (%s,%s,%s)""",
(
self.userid,
self.realname,
self.email,
),
)
@staticmethod
def lookup(db, userid):
with db.cursor() as cur:
cur.execute("SELECT * FROM users where userid=%s", (userid,))
row = cur.fetchone()
if not row:
return None
return row
@staticmethod
def missing_users(db):
with db.cursor() as cur:
cur.execute(
"""SELECT DISTINCT revisions.userid
FROM revisions LEFT JOIN users ON revisions.userid = users.userid
WHERE users.userid IS NULL AND revisions.userid NOT IN {}""".format(FAKE_ACCOUNTS)
)
missing_users = [row[0] for row in cur.fetchall()]
return missing_users