2022-10-17 15:17:25 +02:00
|
|
|
import hashlib
|
2022-10-17 19:54:47 +02:00
|
|
|
import logging
|
2022-11-07 19:40:19 +01:00
|
|
|
import os
|
2022-10-17 19:54:47 +02:00
|
|
|
|
2022-11-07 19:40:19 +01:00
|
|
|
try:
|
|
|
|
import magic
|
|
|
|
except:
|
|
|
|
print("Install python3-python-magic, not python3-magic")
|
|
|
|
raise
|
2022-10-17 15:17:25 +02:00
|
|
|
|
2022-11-07 19:40:19 +01:00
|
|
|
import requests
|
2022-10-17 15:17:25 +02:00
|
|
|
|
2022-11-07 19:40:19 +01:00
|
|
|
from lib.db import DB
|
|
|
|
from lib.obs import OBS
|
2022-10-17 15:17:25 +02:00
|
|
|
|
|
|
|
|
|
|
|
class ProxySHA256:
|
2022-11-07 19:40:19 +01:00
|
|
|
def __init__(self, obs: OBS, db: DB):
|
2022-10-17 15:17:25 +02:00
|
|
|
self.obs = obs
|
2022-11-07 19:40:19 +01:00
|
|
|
self.db = db
|
2022-10-17 15:17:25 +02:00
|
|
|
self.hashes = None
|
2022-11-04 20:04:46 +01:00
|
|
|
self.texts = None
|
2022-11-07 19:40:19 +01:00
|
|
|
self.mime = None
|
2022-10-17 15:17:25 +02:00
|
|
|
|
|
|
|
def get(self, package, name, file_md5):
|
|
|
|
if self.hashes is None:
|
2022-11-07 19:40:19 +01:00
|
|
|
self.load_hashes(package)
|
|
|
|
key = f"{file_md5}-{name}"
|
|
|
|
ret = self.hashes.get(key)
|
|
|
|
return ret
|
|
|
|
|
|
|
|
def load_hashes(self, package):
|
|
|
|
with self.db.cursor() as cur:
|
|
|
|
cur.execute(
|
|
|
|
"""SELECT lfs_oids.file_md5,lop.filename,lfs_oids.sha256,lfs_oids.size
|
|
|
|
FROM lfs_oid_in_package lop
|
|
|
|
JOIN lfs_oids ON lfs_oids.id=lop.lfs_oid_id
|
|
|
|
WHERE lop.package=%s""",
|
|
|
|
(package,),
|
|
|
|
)
|
|
|
|
self.hashes = {
|
|
|
|
f"{row[0]}-{row[1]}": (row[2], row[3]) for row in cur.fetchall()
|
|
|
|
}
|
2022-10-17 15:17:25 +02:00
|
|
|
|
|
|
|
def put(self, project, package, name, revision, file_md5, size):
|
2022-11-07 19:40:19 +01:00
|
|
|
|
|
|
|
if not self.mime:
|
|
|
|
self.mime = magic.Magic(mime=True)
|
|
|
|
|
|
|
|
mimetype = None
|
|
|
|
logging.debug(f"Add LFS for {project}/{package}/{name}")
|
|
|
|
fin = self.obs._download(project, package, name, revision)
|
|
|
|
sha = hashlib.sha256()
|
|
|
|
while True:
|
|
|
|
buffer = fin.read(10000)
|
|
|
|
if not buffer:
|
|
|
|
break
|
|
|
|
sha.update(buffer)
|
|
|
|
# only guess from the first 10K
|
|
|
|
if not mimetype:
|
|
|
|
mimetype = self.mime.from_buffer(buffer)
|
|
|
|
fin.close()
|
|
|
|
sha = sha.hexdigest()
|
|
|
|
with self.db.cursor() as cur:
|
|
|
|
# we UPDATE here so the return functions. conflicts are likely as we look for filename/md5 but conflict on sha256
|
|
|
|
cur.execute(
|
|
|
|
"""INSERT INTO lfs_oids (project,package,filename,rev,sha256,size,mimetype,file_md5)
|
|
|
|
VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
|
|
|
|
ON CONFLICT (sha256,size) DO UPDATE SET mimetype=EXCLUDED.mimetype
|
|
|
|
RETURNING id""",
|
|
|
|
(
|
|
|
|
project,
|
|
|
|
package,
|
|
|
|
name,
|
|
|
|
revision,
|
|
|
|
sha,
|
|
|
|
size,
|
|
|
|
mimetype,
|
|
|
|
file_md5,
|
|
|
|
),
|
|
|
|
)
|
|
|
|
row = cur.fetchone()
|
|
|
|
lfs_oid_id = row[0]
|
|
|
|
cur.execute(
|
|
|
|
"""INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id)
|
|
|
|
VALUES (%s,%s,%s)""",
|
|
|
|
(package, name, lfs_oid_id),
|
|
|
|
)
|
|
|
|
if mimetype.startswith("text/"):
|
|
|
|
cur.execute(
|
|
|
|
"INSERT INTO text_files (package,filename) VALUES (%s,%s)",
|
|
|
|
(package, name),
|
|
|
|
)
|
|
|
|
self.db.conn.commit()
|
|
|
|
|
|
|
|
if os.getenv("GITEA_REGISTER_SECRET"):
|
|
|
|
data = {
|
|
|
|
"secret": os.getenv("GITEA_REGISTER_SECRET"),
|
|
|
|
"project": project,
|
|
|
|
"package": package,
|
|
|
|
"filename": name,
|
|
|
|
"rev": revision,
|
|
|
|
"sha256": sha,
|
|
|
|
"size": size,
|
|
|
|
}
|
|
|
|
|
|
|
|
url = "http://gitea.opensuse.org:9999/register"
|
|
|
|
response = requests.post(
|
|
|
|
url,
|
|
|
|
data=data,
|
|
|
|
timeout=10,
|
|
|
|
)
|
|
|
|
logging.debug(f"Registered {response.status_code}")
|
|
|
|
else:
|
|
|
|
logging.info("Not registering LFS due to missing secret")
|
|
|
|
|
|
|
|
# reset
|
|
|
|
self.hashes = None
|
|
|
|
self.texts = None
|
|
|
|
return self.get(package, name, file_md5)
|
2022-10-17 15:17:25 +02:00
|
|
|
|
2022-11-04 20:04:46 +01:00
|
|
|
def is_text(self, package, filename):
|
|
|
|
if self.texts is None:
|
2022-11-07 19:40:19 +01:00
|
|
|
self.load_texts(package)
|
2022-10-17 15:17:25 +02:00
|
|
|
return filename in self.texts
|
|
|
|
|
2022-11-07 19:40:19 +01:00
|
|
|
def load_texts(self, package):
|
|
|
|
self.texts = set()
|
|
|
|
with self.db.cursor() as cur:
|
|
|
|
cur.execute("SELECT filename from text_files where package=%s", (package,))
|
|
|
|
for row in cur.fetchall():
|
|
|
|
self.texts.add(row[0])
|
|
|
|
|
2022-10-17 15:17:25 +02:00
|
|
|
def get_or_put(self, project, package, name, revision, file_md5, size):
|
|
|
|
result = self.get(package, name, file_md5)
|
|
|
|
if not result:
|
|
|
|
result = self.put(project, package, name, revision, file_md5, size)
|
|
|
|
|
2022-11-07 19:40:19 +01:00
|
|
|
sha256, db_size = result
|
|
|
|
assert db_size == size
|
2022-10-17 15:17:25 +02:00
|
|
|
|
2022-11-07 19:40:19 +01:00
|
|
|
return sha256
|