From 7861a7e9b0d134a42185754e4f18079d6d7e8c21 Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Wed, 9 Nov 2022 08:32:18 +0100 Subject: [PATCH] Fix LFS register (it needs json not data) Refactored the LFS Oid handling in its class of its own and add a way to recheck all LFS handles (or re-register) --- lib/git_exporter.py | 3 ++ lib/lfs_oid.py | 129 +++++++++++++++++++++++++++++++++++++++++++- lib/proxy_sha256.py | 58 ++------------------ 3 files changed, 134 insertions(+), 56 deletions(-) diff --git a/lib/git_exporter.py b/lib/git_exporter.py index e8dd8d6..1df2cc3 100644 --- a/lib/git_exporter.py +++ b/lib/git_exporter.py @@ -6,6 +6,7 @@ import yaml from lib.binary import is_binary_or_large from lib.db import DB from lib.git import Git +from lib.lfs_oid import LFSOid from lib.obs import OBS from lib.proxy_sha256 import ProxySHA256 from lib.tree_builder import TreeBuilder @@ -60,6 +61,8 @@ class GitExporter: return left_to_commit def export_as_git(self): + if os.getenv("CHECK_ALL_LFS"): + LFSOid.check_all(self.db, self.package) tree = TreeBuilder(self.db).build(self.project, self.package) flats = tree.as_flat_list() diff --git a/lib/lfs_oid.py b/lib/lfs_oid.py index e01d254..37a82ae 100644 --- a/lib/lfs_oid.py +++ b/lib/lfs_oid.py @@ -1,13 +1,138 @@ +from __future__ import annotations + import logging +import os import sys +import requests + from lib.db import DB # no need for this class yet, so just leave the migration code here class LFSOid: - def __init__(self) -> None: - pass + def __init__(self, db: DB) -> None: + self.db = db + self.dbid = None + self.project = None + self.package = None + self.filename = None + self.revision = None + self.sha = None + self.size = None + self.mimetype = None + self.file_md5 = None + + @staticmethod + def check_all(db, package): + with db.cursor() as cur: + cur.execute( + "SELECT lfs_oid_id FROM lfs_oid_in_package WHERE package=%s ORDER BY lfs_oid_id DESC limit 10 ", + (package,), + ) + for row in cur.fetchall(): + oid = LFSOid(db).set_from_dbid(row[0]) + if not oid.check(): + oid.register() + + def add( + self, + project: str, + package: str, + filename: str, + revision: str, + sha256: str, + size: int, + mimetype: str, + file_md5: str, + ) -> None: + with self.db.cursor() as cur: + # we UPDATE here so the return functions. conflicts are likely as we look for filename/md5 but conflict on sha256 + cur.execute( + """INSERT INTO lfs_oids (project,package,filename,rev,sha256,size,mimetype,file_md5) + VALUES (%s,%s,%s,%s,%s,%s,%s,%s) + ON CONFLICT (sha256,size) DO UPDATE SET mimetype=EXCLUDED.mimetype + RETURNING id""", + ( + project, + package, + filename, + revision, + sha256, + size, + mimetype, + file_md5, + ), + ) + row = cur.fetchone() + lfs_oid_id = row[0] + cur.execute( + """INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id) + VALUES (%s,%s,%s)""", + (package, filename, lfs_oid_id), + ) + if mimetype.startswith("text/"): + cur.execute( + "INSERT INTO text_files (package,filename) VALUES (%s,%s)", + (package, filename), + ) + self.db.conn.commit() + self.from_db(lfs_oid_id) + if not self.check(): + self.register() + + def check(self): + url = f"http://gitea.opensuse.org:9999/check/{self.sha256}/{self.size}" + response = requests.get( + url, + timeout=10, + ) + return response.status_code == 200 + + def set_from_dbid(self, dbid: int) -> LFSOid: + with self.db.cursor() as cur: + cur.execute("SELECT * from lfs_oids where id=%s", (dbid,)) + row = cur.fetchone() + self.set_from_row(row) + assert self.dbid == dbid + return self + + def set_from_row(self, row: list) -> LFSOid: + ( + self.dbid, + self.project, + self.package, + self.filename, + self.revision, + self.sha256, + self.size, + self.mimetype, + self.file_md5, + ) = row + return self + + def register(self): + if not os.getenv("GITEA_REGISTER_SECRET"): + logging.info("Not registering LFS due to missing secret") + return + + data = { + "secret": os.getenv("GITEA_REGISTER_SECRET"), + "project": self.project, + "package": self.package, + "filename": self.filename, + "rev": self.revision, + "sha256": self.sha256, + "size": self.size, + } + + url = "http://gitea.opensuse.org:9999/register" + response = requests.post( + url, + json=data, + timeout=10, + ) + logging.info(f"Register LFS returned {response.status_code}") if __name__ == "__main__": diff --git a/lib/proxy_sha256.py b/lib/proxy_sha256.py index be5da7d..5b15ad7 100644 --- a/lib/proxy_sha256.py +++ b/lib/proxy_sha256.py @@ -1,6 +1,5 @@ import hashlib import logging -import os try: import magic @@ -11,6 +10,7 @@ except: import requests from lib.db import DB +from lib.lfs_oid import LFSOid from lib.obs import OBS @@ -60,59 +60,9 @@ class ProxySHA256: if not mimetype: mimetype = self.mime.from_buffer(buffer) fin.close() - sha = sha.hexdigest() - with self.db.cursor() as cur: - # we UPDATE here so the return functions. conflicts are likely as we look for filename/md5 but conflict on sha256 - cur.execute( - """INSERT INTO lfs_oids (project,package,filename,rev,sha256,size,mimetype,file_md5) - VALUES (%s,%s,%s,%s,%s,%s,%s,%s) - ON CONFLICT (sha256,size) DO UPDATE SET mimetype=EXCLUDED.mimetype - RETURNING id""", - ( - project, - package, - name, - revision, - sha, - size, - mimetype, - file_md5, - ), - ) - row = cur.fetchone() - lfs_oid_id = row[0] - cur.execute( - """INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id) - VALUES (%s,%s,%s)""", - (package, name, lfs_oid_id), - ) - if mimetype.startswith("text/"): - cur.execute( - "INSERT INTO text_files (package,filename) VALUES (%s,%s)", - (package, name), - ) - self.db.conn.commit() - - if os.getenv("GITEA_REGISTER_SECRET"): - data = { - "secret": os.getenv("GITEA_REGISTER_SECRET"), - "project": project, - "package": package, - "filename": name, - "rev": revision, - "sha256": sha, - "size": size, - } - - url = "http://gitea.opensuse.org:9999/register" - response = requests.post( - url, - data=data, - timeout=10, - ) - logging.debug(f"Registered {response.status_code}") - else: - logging.info("Not registering LFS due to missing secret") + LFSOid(self.db).add( + project, package, name, revision, sha.hexdigest(), size, mimetype, file_md5 + ) # reset self.hashes = None