Fix LFS register (it needs json not data)

Refactored the LFS Oid handling in its class of its own and
add a way to recheck all LFS handles (or re-register)
This commit is contained in:
Stephan Kulow 2022-11-09 08:32:18 +01:00
parent f5b29886ae
commit 7861a7e9b0
3 changed files with 134 additions and 56 deletions

View File

@ -6,6 +6,7 @@ import yaml
from lib.binary import is_binary_or_large
from lib.db import DB
from lib.git import Git
from lib.lfs_oid import LFSOid
from lib.obs import OBS
from lib.proxy_sha256 import ProxySHA256
from lib.tree_builder import TreeBuilder
@ -60,6 +61,8 @@ class GitExporter:
return left_to_commit
def export_as_git(self):
if os.getenv("CHECK_ALL_LFS"):
LFSOid.check_all(self.db, self.package)
tree = TreeBuilder(self.db).build(self.project, self.package)
flats = tree.as_flat_list()

View File

@ -1,13 +1,138 @@
from __future__ import annotations
import logging
import os
import sys
import requests
from lib.db import DB
# no need for this class yet, so just leave the migration code here
class LFSOid:
def __init__(self) -> None:
pass
def __init__(self, db: DB) -> None:
self.db = db
self.dbid = None
self.project = None
self.package = None
self.filename = None
self.revision = None
self.sha = None
self.size = None
self.mimetype = None
self.file_md5 = None
@staticmethod
def check_all(db, package):
with db.cursor() as cur:
cur.execute(
"SELECT lfs_oid_id FROM lfs_oid_in_package WHERE package=%s ORDER BY lfs_oid_id DESC limit 10 ",
(package,),
)
for row in cur.fetchall():
oid = LFSOid(db).set_from_dbid(row[0])
if not oid.check():
oid.register()
def add(
self,
project: str,
package: str,
filename: str,
revision: str,
sha256: str,
size: int,
mimetype: str,
file_md5: str,
) -> None:
with self.db.cursor() as cur:
# we UPDATE here so the return functions. conflicts are likely as we look for filename/md5 but conflict on sha256
cur.execute(
"""INSERT INTO lfs_oids (project,package,filename,rev,sha256,size,mimetype,file_md5)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
ON CONFLICT (sha256,size) DO UPDATE SET mimetype=EXCLUDED.mimetype
RETURNING id""",
(
project,
package,
filename,
revision,
sha256,
size,
mimetype,
file_md5,
),
)
row = cur.fetchone()
lfs_oid_id = row[0]
cur.execute(
"""INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id)
VALUES (%s,%s,%s)""",
(package, filename, lfs_oid_id),
)
if mimetype.startswith("text/"):
cur.execute(
"INSERT INTO text_files (package,filename) VALUES (%s,%s)",
(package, filename),
)
self.db.conn.commit()
self.from_db(lfs_oid_id)
if not self.check():
self.register()
def check(self):
url = f"http://gitea.opensuse.org:9999/check/{self.sha256}/{self.size}"
response = requests.get(
url,
timeout=10,
)
return response.status_code == 200
def set_from_dbid(self, dbid: int) -> LFSOid:
with self.db.cursor() as cur:
cur.execute("SELECT * from lfs_oids where id=%s", (dbid,))
row = cur.fetchone()
self.set_from_row(row)
assert self.dbid == dbid
return self
def set_from_row(self, row: list) -> LFSOid:
(
self.dbid,
self.project,
self.package,
self.filename,
self.revision,
self.sha256,
self.size,
self.mimetype,
self.file_md5,
) = row
return self
def register(self):
if not os.getenv("GITEA_REGISTER_SECRET"):
logging.info("Not registering LFS due to missing secret")
return
data = {
"secret": os.getenv("GITEA_REGISTER_SECRET"),
"project": self.project,
"package": self.package,
"filename": self.filename,
"rev": self.revision,
"sha256": self.sha256,
"size": self.size,
}
url = "http://gitea.opensuse.org:9999/register"
response = requests.post(
url,
json=data,
timeout=10,
)
logging.info(f"Register LFS returned {response.status_code}")
if __name__ == "__main__":

View File

@ -1,6 +1,5 @@
import hashlib
import logging
import os
try:
import magic
@ -11,6 +10,7 @@ except:
import requests
from lib.db import DB
from lib.lfs_oid import LFSOid
from lib.obs import OBS
@ -60,59 +60,9 @@ class ProxySHA256:
if not mimetype:
mimetype = self.mime.from_buffer(buffer)
fin.close()
sha = sha.hexdigest()
with self.db.cursor() as cur:
# we UPDATE here so the return functions. conflicts are likely as we look for filename/md5 but conflict on sha256
cur.execute(
"""INSERT INTO lfs_oids (project,package,filename,rev,sha256,size,mimetype,file_md5)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
ON CONFLICT (sha256,size) DO UPDATE SET mimetype=EXCLUDED.mimetype
RETURNING id""",
(
project,
package,
name,
revision,
sha,
size,
mimetype,
file_md5,
),
)
row = cur.fetchone()
lfs_oid_id = row[0]
cur.execute(
"""INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id)
VALUES (%s,%s,%s)""",
(package, name, lfs_oid_id),
)
if mimetype.startswith("text/"):
cur.execute(
"INSERT INTO text_files (package,filename) VALUES (%s,%s)",
(package, name),
)
self.db.conn.commit()
if os.getenv("GITEA_REGISTER_SECRET"):
data = {
"secret": os.getenv("GITEA_REGISTER_SECRET"),
"project": project,
"package": package,
"filename": name,
"rev": revision,
"sha256": sha,
"size": size,
}
url = "http://gitea.opensuse.org:9999/register"
response = requests.post(
url,
data=data,
timeout=10,
)
logging.debug(f"Registered {response.status_code}")
else:
logging.info("Not registering LFS due to missing secret")
LFSOid(self.db).add(
project, package, name, revision, sha.hexdigest(), size, mimetype, file_md5
)
# reset
self.hashes = None