git-importer/lib/lfs_oid.py
Stephan Kulow 3e1fbaa1c3 Migrate the ProxySHA256 data into postgresql DB
The calculation of the sha256 and the mimetype is local due to that
2022-11-07 21:50:31 +01:00

68 lines
2.5 KiB
Python

import logging
import sys
from lib.db import DB
# no need for this class yet, so just leave the migration code here
class LFSOid:
def __init__(self) -> None:
pass
if __name__ == "__main__":
"""
Import the old data - it only makes sense on a DB with previously scanned revisions
curl -s https://stephan.kulow.org/git_lfs.csv.xz | xz -cd | PYTHONPATH=$PWD /usr/bin/python3 lib/lfs_oid.py
"""
db = DB()
logging.basicConfig(level=logging.DEBUG)
with db.cursor() as cur:
while True:
line = sys.stdin.readline()
if not line:
break
(
project,
package,
filename,
rev,
sha256,
size,
mimetype,
md5,
) = line.strip().split("\t")
cur.execute(
"""INSERT INTO lfs_oids (project,package,filename,rev,sha256,size,mimetype,file_md5)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING""",
(project, package, filename, rev, sha256, size, mimetype, md5),
)
cur.execute(
"""
CREATE TEMPORARY TABLE lfs_oid_in_revision (
revision_id INTEGER,
lfs_oid_id INTEGER NOT NULL,
name VARCHAR(255) NOT NULL
)
"""
)
cur.execute(
"""INSERT INTO lfs_oid_in_revision (revision_id, lfs_oid_id, name)
SELECT revision_id,lfs_oids.id,files.name FROM lfs_oids JOIN files ON files.md5=lfs_oids.file_md5"""
)
cur.execute(
"""INSERT INTO text_files (package,filename)
SELECT DISTINCT r.package, lfs_oid_in_revision.name FROM lfs_oids
JOIN lfs_oid_in_revision on lfs_oid_in_revision.lfs_oid_id=lfs_oids.id
JOIN revisions r ON r.id=lfs_oid_in_revision.revision_id
WHERE lfs_oids.mimetype like 'text/%' ON CONFLICT DO NOTHING"""
)
cur.execute(
"""INSERT INTO lfs_oid_in_package (lfs_oid_id, package, filename)
SELECT DISTINCT lfs_oids.id,r.package, lfs_oid_in_revision.name FROM lfs_oids
JOIN lfs_oid_in_revision on lfs_oid_in_revision.lfs_oid_id=lfs_oids.id
JOIN revisions r ON r.id=lfs_oid_in_revision.revision_id"""
)
db.conn.commit()