git-importer/lib/db.py
Stephan Kulow 3e1fbaa1c3 Migrate the ProxySHA256 data into postgresql DB
The calculation of the sha256 and the mimetype is local due to that
2022-11-07 21:50:31 +01:00

288 lines
9.9 KiB
Python

import logging
import psycopg2
from psycopg2.extras import LoggingConnection
from lib.config import config
class DB:
def __init__(self, section="production"):
self.config_section = section
self.connect()
self.create_tables()
def connect(self):
try:
# read the connection parameters
params = config(section=self.config_section)
# connect to the PostgreSQL server
self.conn = psycopg2.connect(connection_factory=LoggingConnection, **params)
logger = logging.getLogger(__name__)
self.conn.initialize(logger)
except (Exception, psycopg2.DatabaseError) as error:
print(error)
raise error
def schema_version(self):
# create a cursor
with self.conn.cursor() as cur:
# execute a statement
try:
cur.execute("SELECT MAX(version) from scheme")
except psycopg2.errors.UndefinedTable as error:
cur.close()
self.close()
self.connect()
return 0
db_version = cur.fetchone()
return db_version[0]
def close(self):
if self.conn is not None:
self.conn.close()
self.conn = None
def create_tables(self):
"""Create the tables if not existing - assumes connected"""
schemes = dict()
schemes[1] = (
"""
CREATE TABLE scheme (
id SERIAL PRIMARY KEY,
version SMALLINT NOT NULL
)
""",
"INSERT INTO scheme (version) VALUES(1)",
"""DROP TABLE IF EXISTS revisions """,
""" CREATE TABLE revisions (
id SERIAL PRIMARY KEY,
project VARCHAR(255) NOT NULL,
package VARCHAR(255) NOT NULL,
rev INTEGER NOT NULL,
unexpanded_srcmd5 VARCHAR(255) NOT NULL,
commit_time timestamp NOT NULL,
userid VARCHAR(255) NOT NULL,
comment TEXT,
requestid INTEGER
)
""",
"""
CREATE UNIQUE INDEX ppr ON revisions (project, package, rev);
""",
)
schemes[2] = (
"""DROP TABLE IF EXISTS links""",
"""
CREATE TABLE links (
id SERIAL PRIMARY KEY,
revision_id INTEGER NOT NULL,
project VARCHAR(255) NOT NULL,
package VARCHAR(255) NOT NULL
)
""",
"UPDATE scheme SET version=2",
)
schemes[3] = (
"""
ALTER TABLE revisions ADD broken boolean NOT NULL DEFAULT(FALSE)
""",
"UPDATE scheme SET version=3",
)
schemes[4] = (
"""
ALTER TABLE revisions ADD expanded_srcmd5 VARCHAR(255)
""",
"UPDATE scheme SET version=4",
)
schemes[5] = (
"""DROP TABLE IF EXISTS files""",
"""
CREATE TABLE files (
id SERIAL PRIMARY KEY,
revision_id INTEGER NOT NULL,
name VARCHAR(255) NOT NULL,
md5 VARCHAR(255) NOT NULL,
size INTEGER NOT NULL,
mtime INTEGER NOT NULL
)
""",
"UPDATE scheme SET version=5",
)
schemes[6] = (
"""DROP TABLE IF EXISTS requests""",
"""
CREATE TABLE requests (
id SERIAL PRIMARY KEY,
number INTEGER NOT NULL,
revision_id INTEGER NOT NULL,
creator VARCHAR(255) NOT NULL,
type VARCHAR(10) NOT NULL,
state VARCHAR(10) NOT NULL,
source_package VARCHAR(255),
source_project VARCHAR(255)
)
""",
"UPDATE scheme SET version=6",
)
schemes[7] = (
"ALTER TABLE requests DROP COLUMN revision_id",
"UPDATE scheme SET version=7",
)
schemes[8] = (
"ALTER TABLE requests ADD COLUMN source_rev VARCHAR(40)",
"UPDATE scheme SET version=8",
)
schemes[9] = (
"ALTER TABLE revisions ADD COLUMN request_number INTEGER",
"UPDATE revisions SET request_number=requestid",
"ALTER TABLE revisions DROP COLUMN requestid",
"UPDATE scheme SET version=9",
)
schemes[10] = (
"ALTER TABLE revisions ADD COLUMN request_id INTEGER",
"""ALTER TABLE revisions
ADD CONSTRAINT request_id_foreign_key
FOREIGN KEY (request_id)
REFERENCES requests (id)""",
"UPDATE scheme SET version=10",
)
schemes[11] = (
"create index request_number_idx on revisions (request_number)",
"UPDATE scheme SET version=11",
)
schemes[12] = (
"create index request_number_idx2 on requests(number)",
"UPDATE scheme SET version=12",
)
schemes[13] = (
"DROP TABLE IF EXISTS users",
"""CREATE TABLE users (
id SERIAL PRIMARY KEY,
userid VARCHAR(255) NOT NULL,
email VARCHAR(255) NOT NULL,
realname VARCHAR(255) NOT NULL
)
""",
"UPDATE scheme SET version=13",
)
schemes[14] = (
"DROP TABLE IF EXISTS linked_revs",
"""
CREATE TABLE linked_revs (
id SERIAL PRIMARY KEY,
revision_id INTEGER NOT NULL,
linked_id INTEGER NOT NULL
)
""",
"UPDATE scheme SET version=14",
)
schemes[15] = (
"ALTER TABLE revisions ALTER COLUMN rev TYPE real USING rev::real",
"UPDATE scheme SET version=15",
)
schemes[16] = (
"""DROP TABLE IF EXISTS fake_revs""",
"""
CREATE TABLE fake_revs (
id SERIAL PRIMARY KEY,
revision_id INTEGER NOT NULL,
linked_id INTEGER NOT NULL
)
""",
"create index revs_linked on fake_revs (revision_id,linked_id)",
"UPDATE scheme SET version=16",
)
schemes[17] = (
"ALTER TABLE revisions ADD COLUMN files_hash VARCHAR(40)",
"UPDATE scheme SET version=17",
)
schemes[18] = (
"ALTER TABLE linked_revs ADD COLUMN considered BOOLEAN DEFAULT FALSE",
"UPDATE scheme SET version=18",
)
schemes[19] = (
"CREATE INDEX ON files(revision_id)",
"UPDATE scheme SET version=19",
)
schemes[20] = (
"CREATE INDEX ON linked_revs(linked_id)",
"CREATE INDEX ON linked_revs(considered)",
"UPDATE scheme SET version=20",
)
schemes[21] = (
"ALTER TABLE revisions ADD COLUMN api_url VARCHAR(40)",
"UPDATE revisions SET api_url='https://api.opensuse.org'",
"ALTER TABLE revisions ALTER COLUMN api_url SET NOT NULL",
"UPDATE scheme SET version=21",
)
schemes[22] = (
"""DROP TABLE IF EXISTS lfs_oids""",
"""
CREATE TABLE lfs_oids (
id SERIAL PRIMARY KEY,
project VARCHAR(255) NOT NULL,
package VARCHAR(255) NOT NULL,
filename VARCHAR(255) NOT NULL,
rev VARCHAR(40) NOT NULL,
sha256 VARCHAR(70) NOT NULL,
size INTEGER NOT NULL,
mimetype VARCHAR(255) NOT NULL,
file_md5 VARCHAR(40) NOT NULL
)
""",
"CREATE UNIQUE INDEX ON lfs_oids (sha256,size)",
"CREATE INDEX ON revisions(package)",
"""DROP TABLE IF EXISTS text_files""",
"""
CREATE TABLE text_files (
id SERIAL PRIMARY KEY,
package VARCHAR(255) NOT NULL,
filename VARCHAR(255) NOT NULL
)
""",
"CREATE UNIQUE INDEX ON text_files (package,filename)",
"""DROP TABLE IF EXISTS lfs_oid_in_package""",
"""
CREATE TABLE lfs_oid_in_package (
id SERIAL PRIMARY KEY,
lfs_oid_id INTEGER NOT NULL,
package VARCHAR(255) NOT NULL,
filename VARCHAR(255) NOT NULL
)
""",
"CREATE INDEX ON text_files(package)",
"CREATE INDEX ON lfs_oid_in_package(package)",
"UPDATE scheme SET version=22",
)
schema_version = self.schema_version()
if (schema_version + 1) not in schemes:
return
try:
with self.conn.cursor() as cur:
# create table one by one
for version, commands in schemes.items():
if version <= schema_version:
continue
for command in commands:
cur.execute(command)
# commit the changes
self.conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
self.close()
raise error
def cursor(self):
return self.conn.cursor()
if __name__ == "__main__":
db = DB()
db.create_tables()