2022-11-09 08:32:18 +01:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2022-11-07 19:40:19 +01:00
|
|
|
import logging
|
2022-11-09 08:32:18 +01:00
|
|
|
import os
|
2022-11-07 19:40:19 +01:00
|
|
|
import sys
|
|
|
|
|
2022-11-09 08:32:18 +01:00
|
|
|
import requests
|
|
|
|
|
2022-11-11 16:22:18 +01:00
|
|
|
from lib.binary import is_text_mimetype
|
2022-11-07 19:40:19 +01:00
|
|
|
from lib.db import DB
|
|
|
|
|
|
|
|
|
|
|
|
# no need for this class yet, so just leave the migration code here
|
|
|
|
class LFSOid:
|
2022-11-09 08:32:18 +01:00
|
|
|
def __init__(self, db: DB) -> None:
|
|
|
|
self.db = db
|
|
|
|
self.dbid = None
|
|
|
|
self.project = None
|
|
|
|
self.package = None
|
|
|
|
self.filename = None
|
|
|
|
self.revision = None
|
|
|
|
self.sha = None
|
|
|
|
self.size = None
|
|
|
|
self.mimetype = None
|
|
|
|
self.file_md5 = None
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def check_all(db, package):
|
|
|
|
with db.cursor() as cur:
|
|
|
|
cur.execute(
|
|
|
|
"SELECT lfs_oid_id FROM lfs_oid_in_package WHERE package=%s ORDER BY lfs_oid_id DESC limit 10 ",
|
|
|
|
(package,),
|
|
|
|
)
|
|
|
|
for row in cur.fetchall():
|
|
|
|
oid = LFSOid(db).set_from_dbid(row[0])
|
|
|
|
if not oid.check():
|
|
|
|
oid.register()
|
|
|
|
|
|
|
|
def add(
|
|
|
|
self,
|
|
|
|
project: str,
|
|
|
|
package: str,
|
|
|
|
filename: str,
|
|
|
|
revision: str,
|
|
|
|
sha256: str,
|
|
|
|
size: int,
|
|
|
|
mimetype: str,
|
|
|
|
file_md5: str,
|
|
|
|
) -> None:
|
|
|
|
with self.db.cursor() as cur:
|
|
|
|
# we UPDATE here so the return functions. conflicts are likely as we look for filename/md5 but conflict on sha256
|
|
|
|
cur.execute(
|
|
|
|
"""INSERT INTO lfs_oids (project,package,filename,rev,sha256,size,mimetype,file_md5)
|
|
|
|
VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
|
|
|
|
ON CONFLICT (sha256,size) DO UPDATE SET mimetype=EXCLUDED.mimetype
|
|
|
|
RETURNING id""",
|
|
|
|
(
|
|
|
|
project,
|
|
|
|
package,
|
|
|
|
filename,
|
|
|
|
revision,
|
|
|
|
sha256,
|
|
|
|
size,
|
|
|
|
mimetype,
|
|
|
|
file_md5,
|
|
|
|
),
|
|
|
|
)
|
|
|
|
row = cur.fetchone()
|
|
|
|
lfs_oid_id = row[0]
|
|
|
|
cur.execute(
|
|
|
|
"""INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id)
|
|
|
|
VALUES (%s,%s,%s)""",
|
|
|
|
(package, filename, lfs_oid_id),
|
|
|
|
)
|
2022-11-11 16:22:18 +01:00
|
|
|
if is_text_mimetype(mimetype):
|
2022-11-09 08:32:18 +01:00
|
|
|
cur.execute(
|
|
|
|
"INSERT INTO text_files (package,filename) VALUES (%s,%s)",
|
|
|
|
(package, filename),
|
|
|
|
)
|
|
|
|
self.db.conn.commit()
|
2022-11-11 16:22:18 +01:00
|
|
|
self.set_from_dbid(lfs_oid_id)
|
2022-11-09 08:32:18 +01:00
|
|
|
if not self.check():
|
|
|
|
self.register()
|
|
|
|
|
|
|
|
def check(self):
|
2023-11-22 14:39:55 +01:00
|
|
|
url = f"http://localhost:9999/check/{self.sha256}/{self.size}"
|
2022-11-09 08:32:18 +01:00
|
|
|
response = requests.get(
|
|
|
|
url,
|
|
|
|
timeout=10,
|
|
|
|
)
|
|
|
|
return response.status_code == 200
|
|
|
|
|
|
|
|
def set_from_dbid(self, dbid: int) -> LFSOid:
|
|
|
|
with self.db.cursor() as cur:
|
|
|
|
cur.execute("SELECT * from lfs_oids where id=%s", (dbid,))
|
|
|
|
row = cur.fetchone()
|
|
|
|
self.set_from_row(row)
|
|
|
|
assert self.dbid == dbid
|
|
|
|
return self
|
|
|
|
|
|
|
|
def set_from_row(self, row: list) -> LFSOid:
|
|
|
|
(
|
|
|
|
self.dbid,
|
|
|
|
self.project,
|
|
|
|
self.package,
|
|
|
|
self.filename,
|
|
|
|
self.revision,
|
|
|
|
self.sha256,
|
|
|
|
self.size,
|
|
|
|
self.mimetype,
|
|
|
|
self.file_md5,
|
|
|
|
) = row
|
|
|
|
return self
|
|
|
|
|
|
|
|
def register(self):
|
|
|
|
if not os.getenv("GITEA_REGISTER_SECRET"):
|
|
|
|
logging.info("Not registering LFS due to missing secret")
|
|
|
|
return
|
|
|
|
|
|
|
|
data = {
|
|
|
|
"secret": os.getenv("GITEA_REGISTER_SECRET"),
|
|
|
|
"project": self.project,
|
|
|
|
"package": self.package,
|
|
|
|
"filename": self.filename,
|
|
|
|
"rev": self.revision,
|
|
|
|
"sha256": self.sha256,
|
|
|
|
"size": self.size,
|
|
|
|
}
|
|
|
|
|
2023-11-22 14:39:55 +01:00
|
|
|
url = "http://localhost:9999/register"
|
2022-11-09 08:32:18 +01:00
|
|
|
response = requests.post(
|
|
|
|
url,
|
|
|
|
json=data,
|
|
|
|
timeout=10,
|
|
|
|
)
|
|
|
|
logging.info(f"Register LFS returned {response.status_code}")
|
2022-11-07 19:40:19 +01:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
"""
|
|
|
|
Import the old data - it only makes sense on a DB with previously scanned revisions
|
|
|
|
curl -s https://stephan.kulow.org/git_lfs.csv.xz | xz -cd | PYTHONPATH=$PWD /usr/bin/python3 lib/lfs_oid.py
|
|
|
|
"""
|
|
|
|
db = DB()
|
|
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
|
|
with db.cursor() as cur:
|
|
|
|
while True:
|
|
|
|
line = sys.stdin.readline()
|
|
|
|
if not line:
|
|
|
|
break
|
|
|
|
(
|
|
|
|
project,
|
|
|
|
package,
|
|
|
|
filename,
|
|
|
|
rev,
|
|
|
|
sha256,
|
|
|
|
size,
|
|
|
|
mimetype,
|
|
|
|
md5,
|
|
|
|
) = line.strip().split("\t")
|
|
|
|
cur.execute(
|
|
|
|
"""INSERT INTO lfs_oids (project,package,filename,rev,sha256,size,mimetype,file_md5)
|
|
|
|
VALUES (%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING""",
|
|
|
|
(project, package, filename, rev, sha256, size, mimetype, md5),
|
|
|
|
)
|
|
|
|
|
|
|
|
cur.execute(
|
|
|
|
"""
|
|
|
|
CREATE TEMPORARY TABLE lfs_oid_in_revision (
|
|
|
|
revision_id INTEGER,
|
|
|
|
lfs_oid_id INTEGER NOT NULL,
|
|
|
|
name VARCHAR(255) NOT NULL
|
|
|
|
)
|
|
|
|
"""
|
|
|
|
)
|
|
|
|
cur.execute(
|
|
|
|
"""INSERT INTO lfs_oid_in_revision (revision_id, lfs_oid_id, name)
|
|
|
|
SELECT revision_id,lfs_oids.id,files.name FROM lfs_oids JOIN files ON files.md5=lfs_oids.file_md5"""
|
|
|
|
)
|
|
|
|
cur.execute(
|
|
|
|
"""INSERT INTO text_files (package,filename)
|
|
|
|
SELECT DISTINCT r.package, lfs_oid_in_revision.name FROM lfs_oids
|
|
|
|
JOIN lfs_oid_in_revision on lfs_oid_in_revision.lfs_oid_id=lfs_oids.id
|
|
|
|
JOIN revisions r ON r.id=lfs_oid_in_revision.revision_id
|
|
|
|
WHERE lfs_oids.mimetype like 'text/%' ON CONFLICT DO NOTHING"""
|
|
|
|
)
|
|
|
|
cur.execute(
|
|
|
|
"""INSERT INTO lfs_oid_in_package (lfs_oid_id, package, filename)
|
|
|
|
SELECT DISTINCT lfs_oids.id,r.package, lfs_oid_in_revision.name FROM lfs_oids
|
|
|
|
JOIN lfs_oid_in_revision on lfs_oid_in_revision.lfs_oid_id=lfs_oids.id
|
|
|
|
JOIN revisions r ON r.id=lfs_oid_in_revision.revision_id"""
|
|
|
|
)
|
|
|
|
db.conn.commit()
|