import logging import sys from lib.db import DB # no need for this class yet, so just leave the migration code here class LFSOid: def __init__(self) -> None: pass if __name__ == "__main__": """ Import the old data - it only makes sense on a DB with previously scanned revisions curl -s https://stephan.kulow.org/git_lfs.csv.xz | xz -cd | PYTHONPATH=$PWD /usr/bin/python3 lib/lfs_oid.py """ db = DB() logging.basicConfig(level=logging.DEBUG) with db.cursor() as cur: while True: line = sys.stdin.readline() if not line: break ( project, package, filename, rev, sha256, size, mimetype, md5, ) = line.strip().split("\t") cur.execute( """INSERT INTO lfs_oids (project,package,filename,rev,sha256,size,mimetype,file_md5) VALUES (%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING""", (project, package, filename, rev, sha256, size, mimetype, md5), ) cur.execute( """ CREATE TEMPORARY TABLE lfs_oid_in_revision ( revision_id INTEGER, lfs_oid_id INTEGER NOT NULL, name VARCHAR(255) NOT NULL ) """ ) cur.execute( """INSERT INTO lfs_oid_in_revision (revision_id, lfs_oid_id, name) SELECT revision_id,lfs_oids.id,files.name FROM lfs_oids JOIN files ON files.md5=lfs_oids.file_md5""" ) cur.execute( """INSERT INTO text_files (package,filename) SELECT DISTINCT r.package, lfs_oid_in_revision.name FROM lfs_oids JOIN lfs_oid_in_revision on lfs_oid_in_revision.lfs_oid_id=lfs_oids.id JOIN revisions r ON r.id=lfs_oid_in_revision.revision_id WHERE lfs_oids.mimetype like 'text/%' ON CONFLICT DO NOTHING""" ) cur.execute( """INSERT INTO lfs_oid_in_package (lfs_oid_id, package, filename) SELECT DISTINCT lfs_oids.id,r.package, lfs_oid_in_revision.name FROM lfs_oids JOIN lfs_oid_in_revision on lfs_oid_in_revision.lfs_oid_id=lfs_oids.id JOIN revisions r ON r.id=lfs_oid_in_revision.revision_id""" ) db.conn.commit()