Merge pull request 'add a user database' (#3) from user-db into main

Reviewed-on: https://gitea.opensuse.org/importers/git-importer/pulls/3
This commit is contained in:
Nico Krapp 2022-10-26 14:05:47 +02:00
commit 87d9fcc131
6 changed files with 217 additions and 146 deletions

View File

@ -22,7 +22,7 @@ class DB:
def schema_version(self):
# create a cursor
cur = self.conn.cursor()
with self.conn.cursor() as cur:
# execute a statement
try:
@ -35,7 +35,6 @@ class DB:
db_version = cur.fetchone()
cur.close()
return db_version[0]
def close(self):
@ -154,20 +153,29 @@ class DB:
"create index request_number_idx2 on requests(number)",
"UPDATE scheme SET version=12",
)
schemes[13] = (
"""
CREATE TABLE users (
id SERIAL PRIMARY KEY,
userid VARCHAR(255) NOT NULL,
email VARCHAR(255) NOT NULL,
realname VARCHAR(255) NOT NULL
)
""",
"UPDATE scheme SET version=13",
)
schema_version = self.schema_version()
if (schema_version + 1) not in schemes:
return
try:
cur = self.conn.cursor()
with self.conn.cursor() as cur:
# create table one by one
for version, commands in schemes.items():
if version <= schema_version:
continue
for command in commands:
cur.execute(command)
# close communication with the PostgreSQL database server
cur.close()
# commit the changes
self.conn.commit()
except (Exception, psycopg2.DatabaseError) as error:

View File

@ -23,14 +23,15 @@ class DBRevision:
return f"[{self.__str__()}]"
def links_to(self, db, project, package):
db.cursor().execute(
with db.cursor() as cur:
cur.execute(
"INSERT INTO links (revision_id, project, package) VALUES (%s,%s,%s)",
(self.dbid, project, package),
)
@classmethod
def import_obs_rev(cls, db, revision):
cur = db.cursor()
with db.cursor() as cur:
cur.execute(
"""INSERT INTO revisions (project, package, rev, unexpanded_srcmd5, commit_time, userid, comment, request_number)
VALUES(%s, %s, %s, %s, %s, %s, %s, %s)""",
@ -45,36 +46,33 @@ class DBRevision:
revision.request_number,
),
)
cur.close()
return cls.fetch_revision(db, revision.project, revision.package, revision.rev)
@classmethod
def fetch_revision(cls, db, project, package, rev):
cur = db.cursor()
@staticmethod
def fetch_revision(db, project, package, rev):
with db.cursor() as cur:
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s and rev=%s",
(project, package, str(rev)),
)
row = cur.fetchone()
cur.close()
return DBRevision(row)
@classmethod
def latest_revision(cls, db, project, package):
cur = db.cursor()
@staticmethod
def latest_revision(db, project, package):
with db.cursor() as cur:
cur.execute(
"SELECT MAX(rev) FROM revisions where project=%s and package=%s",
(project, package),
)
max = cur.fetchone()[0]
cur.close()
if max:
return DBRevision.fetch_revision(db, project, package, int(max))
return None
@classmethod
def all_revisions(cls, db, project, package):
cur = db.cursor()
@staticmethod
def all_revisions(db, project, package):
with db.cursor() as cur:
cur.execute(
"SELECT * FROM revisions where project=%s and package=%s",
(project, package),
@ -82,17 +80,15 @@ class DBRevision:
ret = []
for row in cur.fetchall():
ret.append(DBRevision(row))
cur.close()
return ret
def linked_rev(self, db):
cur = db.cursor()
with db.cursor() as cur:
cur.execute(
"SELECT project,package FROM links where revision_id=%s", (self.dbid,)
)
row = cur.fetchone()
if not row:
cur.close()
return None
project, package = row
cur.execute(
@ -100,18 +96,17 @@ class DBRevision:
(project, package, self.commit_time),
)
revisions = [DBRevision(row) for row in cur.fetchall()]
cur.close()
if revisions:
return revisions[0]
return None
def set_broken(self, db):
cur = db.cursor()
with db.cursor() as cur:
cur.execute("UPDATE revisions SET broken=TRUE where id=%s", (self.dbid,))
cur.close()
def import_dir_list(self, db, xml):
cur = db.cursor()
with db.cursor() as cur:
cur.execute(
"UPDATE revisions SET expanded_srcmd5=%s where id=%s",
(xml.get("srcmd5"), self.dbid),
@ -128,11 +123,10 @@ class DBRevision:
self.dbid,
),
)
cur.close()
@classmethod
def requests_to_fetch(self, db, project, package):
cur = db.cursor()
@staticmethod
def requests_to_fetch(db, project, package):
with db.cursor() as cur:
cur.execute(
"""SELECT request_number FROM revisions revs left join requests
reqs on reqs.number=revs.request_number where reqs.id is null and
@ -140,5 +134,4 @@ class DBRevision:
(project, package),
)
ret = [row[0] for row in cur.fetchall()]
cur.close()
return ret

View File

@ -10,6 +10,8 @@ from lib.obs import OBS
from lib.obs_revision import OBSRevision
from lib.proxy_sha256 import ProxySHA256, md5, sha256
from lib.request import Request
import xml.etree.ElementTree as ET
from lib.user import User
def _files_hash(hash_alg, dirpath):
@ -149,7 +151,7 @@ class Importer:
for project, _, api_url in self.projects:
self.obs.change_url(api_url)
self.update_db_package(db, project, self.package)
cur = db.cursor()
with db.cursor() as cur:
cur.execute(
"SELECT DISTINCT l.project, l.package from links l join revisions r on r.id=l.revision_id WHERE r.project=%s AND r.package=%s",
(project, self.package),
@ -158,6 +160,12 @@ class Importer:
(lproject, lpackage) = row
self.update_db_package(db, lproject, lpackage)
missing_users = User.missing_users(db)
for userid in missing_users:
missing_user = self.obs.user(userid)
if missing_user:
missing_user.import_into_db(db)
for rev in DBRevision.all_revisions(db, project, self.package):
# TODO move into SELECT
if rev.broken or rev.expanded_srcmd5:
@ -176,7 +184,6 @@ class Importer:
for number in DBRevision.requests_to_fetch(db, project, self.package):
self.obs.request(number).import_into_db(db)
cur.close()
db.conn.commit()
def import_all_revisions(self, gc):

View File

@ -8,6 +8,7 @@ from urllib.error import HTTPError
import osc.core
from lib.request import Request
from lib.user import User
# Add a retry wrapper for some of the HTTP actions.
@ -84,6 +85,14 @@ class OBS:
return None
return root
def _user(self, userid, **params):
try:
root = self._xml(f"/person/{userid}", **params)
except HTTPError:
logging.error(f"UserID {userid} not found")
return None
return root
def _link(self, project, package, rev):
try:
root = self._xml(f"source/{project}/{package}/_link", rev=rev)
@ -122,6 +131,11 @@ class OBS:
if root is not None:
return Request().parse(root)
def user(self, userid):
root = self._user(userid)
if root is not None:
return User().parse(root, userid)
def files(self, project, package, revision):
root = self._xml(f"source/{project}/{package}", rev=revision, expand=1)
return [

View File

@ -34,7 +34,7 @@ class Request:
return f"[{self.__str__()}]"
def import_into_db(self, db):
cur = db.cursor()
with db.cursor() as cur:
cur.execute(
"""INSERT INTO requests (number, creator, type, state,
source_project, source_package, source_rev)
@ -54,4 +54,3 @@ class Request:
"UPDATE revisions SET request_id=%s WHERE request_number=%s",
(rowid, self.number),
)
cur.close()

50
lib/user.py Normal file
View File

@ -0,0 +1,50 @@
FAKE_ACCOUNTS = ('unknown', 'buildservice-autocommit', 'autobuild', '_service')
class User:
def parse(self, xml, userid):
self.userid = userid
self.realname = xml.find("realname").text
self.email = xml.find("email").text
if self.email is None:
self.email = ""
return self
def __str__(self):
return f"User {self.userid}: {self.realname} {self.email}"
def __repr__(self):
return f"[{self.__str__()}]"
def import_into_db(self, db):
with db.cursor() as cur:
cur.execute(
"""INSERT INTO users (userid, realname, email)
VALUES (%s,%s,%s)""",
(
self.userid,
self.realname,
self.email,
),
)
@staticmethod
def lookup(db, userid):
with db.cursor() as cur:
cur.execute("SELECT * FROM users where userid=%s", (userid,))
row = cur.fetchone()
if not row:
return None
return row
@staticmethod
def missing_users(db):
with db.cursor() as cur:
cur.execute(
"""SELECT DISTINCT revisions.userid
FROM revisions LEFT JOIN users ON revisions.userid = users.userid
WHERE users.userid IS NULL AND revisions.userid NOT IN {}""".format(FAKE_ACCOUNTS)
)
missing_users = [row[0] for row in cur.fetchall()]
return missing_users