import hashlib import logging import os try: import magic except: print("Install python3-python-magic, not python3-magic") raise import requests from lib.db import DB from lib.obs import OBS class ProxySHA256: def __init__(self, obs: OBS, db: DB): self.obs = obs self.db = db self.hashes = None self.texts = None self.mime = None def get(self, package, name, file_md5): if self.hashes is None: self.load_hashes(package) key = f"{file_md5}-{name}" ret = self.hashes.get(key) return ret def load_hashes(self, package): with self.db.cursor() as cur: cur.execute( """SELECT lfs_oids.file_md5,lop.filename,lfs_oids.sha256,lfs_oids.size FROM lfs_oid_in_package lop JOIN lfs_oids ON lfs_oids.id=lop.lfs_oid_id WHERE lop.package=%s""", (package,), ) self.hashes = { f"{row[0]}-{row[1]}": (row[2], row[3]) for row in cur.fetchall() } def put(self, project, package, name, revision, file_md5, size): if not self.mime: self.mime = magic.Magic(mime=True) mimetype = None logging.debug(f"Add LFS for {project}/{package}/{name}") fin = self.obs._download(project, package, name, revision) sha = hashlib.sha256() while True: buffer = fin.read(10000) if not buffer: break sha.update(buffer) # only guess from the first 10K if not mimetype: mimetype = self.mime.from_buffer(buffer) fin.close() sha = sha.hexdigest() with self.db.cursor() as cur: # we UPDATE here so the return functions. conflicts are likely as we look for filename/md5 but conflict on sha256 cur.execute( """INSERT INTO lfs_oids (project,package,filename,rev,sha256,size,mimetype,file_md5) VALUES (%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (sha256,size) DO UPDATE SET mimetype=EXCLUDED.mimetype RETURNING id""", ( project, package, name, revision, sha, size, mimetype, file_md5, ), ) row = cur.fetchone() lfs_oid_id = row[0] cur.execute( """INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id) VALUES (%s,%s,%s)""", (package, name, lfs_oid_id), ) if mimetype.startswith("text/"): cur.execute( "INSERT INTO text_files (package,filename) VALUES (%s,%s)", (package, name), ) self.db.conn.commit() if os.getenv("GITEA_REGISTER_SECRET"): data = { "secret": os.getenv("GITEA_REGISTER_SECRET"), "project": project, "package": package, "filename": name, "rev": revision, "sha256": sha, "size": size, } url = "http://gitea.opensuse.org:9999/register" response = requests.post( url, data=data, timeout=10, ) logging.debug(f"Registered {response.status_code}") else: logging.info("Not registering LFS due to missing secret") # reset self.hashes = None self.texts = None return self.get(package, name, file_md5) def is_text(self, package, filename): if self.texts is None: self.load_texts(package) return filename in self.texts def load_texts(self, package): self.texts = set() with self.db.cursor() as cur: cur.execute("SELECT filename from text_files where package=%s", (package,)) for row in cur.fetchall(): self.texts.add(row[0]) def get_or_put(self, project, package, name, revision, file_md5, size): result = self.get(package, name, file_md5) if not result: result = self.put(project, package, name, revision, file_md5, size) sha256, db_size = result assert db_size == size return sha256