From f14c609db28996c065e49424528d22d2ba61f481 Mon Sep 17 00:00:00 2001 From: nkrapp Date: Wed, 26 Oct 2022 09:30:02 +0200 Subject: [PATCH] Add batch import --- lib/importer.py | 18 ++++++++++++++---- lib/obs.py | 10 +++++----- lib/users.py | 21 +++++++++++++++++++-- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/lib/importer.py b/lib/importer.py index 5552d6b..7a5a17c 100644 --- a/lib/importer.py +++ b/lib/importer.py @@ -10,7 +10,8 @@ from lib.obs import OBS from lib.obs_revision import OBSRevision from lib.proxy_sha256 import ProxySHA256, md5, sha256 from lib.request import Request -from lib.users import Users +import xml.etree.ElementTree as ET +from lib.users import User def _files_hash(hash_alg, dirpath): @@ -159,6 +160,15 @@ class Importer: (lproject, lpackage) = row self.update_db_package(db, lproject, lpackage) + missing_users = User.get_batch(db) + print(missing_users) + for userid in missing_users: + missing_user = self.obs.user(userid) + if missing_user is not None: + missing_user.import_into_db(db) + else: + logging.info("No missing users") + for rev in DBRevision.all_revisions(db, project, self.package): # TODO move into SELECT if rev.broken or rev.expanded_srcmd5: @@ -174,9 +184,9 @@ class Importer: else: rev.set_broken(db) - fake_accounts = ["unknown","buildservice-autocommit", "autobuild", "_service"] - if Users.user_lookup(db, rev.userid) is None and rev.userid not in fake_accounts: - self.obs.users(rev.userid).import_into_db(db) + # fake_accounts = ["unknown","buildservice-autocommit", "autobuild", "_service"] + # if User.user_lookup(db, rev.userid) is None and rev.userid not in fake_accounts: + # self.obs.users(rev.userid).import_into_db(db) for number in DBRevision.requests_to_fetch(db, project, self.package): self.obs.request(number).import_into_db(db) diff --git a/lib/obs.py b/lib/obs.py index 750ac87..661defe 100644 --- a/lib/obs.py +++ b/lib/obs.py @@ -8,7 +8,7 @@ from urllib.error import HTTPError import osc.core from lib.request import Request -from lib.users import Users +from lib.users import User # Add a retry wrapper for some of the HTTP actions. @@ -85,7 +85,7 @@ class OBS: return None return root - def _users(self, userid, **params): + def _user(self, userid, **params): try: root = self._xml(f"/person/{userid}", **params) except HTTPError: @@ -131,10 +131,10 @@ class OBS: if root is not None: return Request().parse(root) - def users(self, userid): - root = self._users(userid) + def user(self, userid): + root = self._user(userid) if root is not None: - return Users().parse(root, userid) + return User().parse(root, userid) def files(self, project, package, revision): root = self._xml(f"source/{project}/{package}", rev=revision, expand=1) diff --git a/lib/users.py b/lib/users.py index 8862603..f26b34f 100644 --- a/lib/users.py +++ b/lib/users.py @@ -1,4 +1,6 @@ -class Users: +FAKE_ACCOUNTS = ('unknown','buildservice-autocommit', 'autobuild', '_service') + +class User: def parse(self, xml, userid): self.userid = userid self.realname = xml.find("realname").text @@ -27,7 +29,7 @@ class Users: ) cur.close() - def user_lookup(db, userid): + def lookup(db, userid): cur= db.cursor() cur.execute("SELECT * FROM users where userid=%s", (userid,)) row = cur.fetchone() @@ -35,4 +37,19 @@ class Users: cur.close() return None return row + + @classmethod + def get_batch(self, db): + cur = db.cursor() + cur.execute( + """SELECT DISTINCT revisions.userid + FROM revisions LEFT JOIN users ON revisions.userid = users.userid + WHERE users.userid IS NULL AND revisions.userid NOT IN + ('unknown','buildservice-autocommit', 'autobuild', '_service')""" + ) + missing_users = [row[0] for row in cur.fetchall()] + if not missing_users: + cur.close() + return [] + return missing_users \ No newline at end of file