21 Commits

Author SHA1 Message Date
eba3fece91 Add and remove literal files
pathspec in git has special characters that we should not trigger.
Assume every filespec as literal
2024-07-31 12:38:49 +02:00
Dirk Mueller
b74f36c81e Merge pull request 'Make sure we create devel branch, even if no diff to Factory' (#29) from adamm/git-importer:always_create_devel into use_git_for_sha256
Reviewed-on: importers/git-importer#29
2024-06-13 15:02:30 +02:00
Adam Majer
7b9b033adc Make sure we create devel branch, when no diff to Factory 2024-06-13 09:51:27 +02:00
Dirk Mueller
c44cb17e5d Merge pull request 'Ignore .osc directory' (#28) from adamm/git-importer:gitingore into use_git_for_sha256
Reviewed-on: importers/git-importer#28
Reviewed-by: Dirk Mueller <dirkmueller@noreply@src.opensuse.org>
2024-06-10 18:13:33 +02:00
Dirk Mueller
3384a6f2b1 Merge pull request 'Fix typo' (#27) from adamm/git-importer:typo into use_git_for_sha256
Reviewed-on: importers/git-importer#27
2024-06-10 18:10:40 +02:00
Adam Majer
1ec72ac80c Ignore .osc directory 2024-06-10 17:31:14 +02:00
Dirk Mueller
033b8f8cee Merge pull request 'New branch is empty' (#26) from adamm/git-importer:fix_inintial_branch into use_git_for_sha256
Reviewed-on: importers/git-importer#26
Reviewed-by: Dirk Mueller <dirkmueller@noreply@src.opensuse.org>
2024-06-10 17:03:48 +02:00
Adam Majer
17062df04e Fix typo 2024-06-10 15:44:21 +02:00
Adam Majer
aa73d97b35 New branch is empty
New branches must be born empty
2024-06-10 15:35:11 +02:00
Dirk Müller
574bc9aa10 Avoid guessing in switch 2024-05-17 20:07:16 +02:00
Dirk Müller
0414b33206 Fix testing for origin
The previous code path was untested and not working
2024-05-17 20:06:25 +02:00
Dirk Müller
b9670821a9 Only init the repository if it doesn't exist already
harmless, but avoids a scary warning
2024-05-17 20:05:54 +02:00
Dirk Müller
073550825c Fixups to improve the conversion process 2024-05-17 14:41:42 +02:00
Dirk Müller
5a353c98d3 Add tasks 2024-05-17 11:46:18 +02:00
Dirk Müller
1fc466d15b Add monitor for commits 2024-05-17 11:40:19 +02:00
Dirk Müller
39fde7744a Code cleanup 2024-05-16 15:47:45 +02:00
Dirk Müller
f5ffc83a69 Remove double quoting of url parameters
makeurl quotes by itself, so this was messing it up
2024-05-16 11:49:14 +02:00
Dirk Müller
d0ccf83684 Revert "Try to fetch the element as deleted if initial access failed"
The OBS api has been fixed to provide an automatic fallback via
https://github.com/openSUSE/open-build-service/pull/15655

This reverts commit c9e07e536f.
2024-05-16 11:49:14 +02:00
Dirk Müller
b0ffb01c59 cleanups 2024-05-16 11:49:14 +02:00
Dirk Müller
28d5c6e606 Switch to psycopg rather than psycopg2
It's a bit more modern and uses dedicated c bindings
2024-05-16 11:49:14 +02:00
Dirk Mueller
1e22c2895a Merge pull request 'Switch to sha-256 git repo and use git tools again' (#23) from adamm/git-importer:main into main
Reviewed-on: importers/git-importer#23
2024-05-16 11:48:36 +02:00
12 changed files with 177 additions and 160 deletions

View File

@@ -1,4 +1,4 @@
sudo zypper in python3-psycopg2 sudo zypper in python3-psycopg
sudo su - postgres sudo su - postgres
# `createdb -O <LOCAL_USER> imported_git` # `createdb -O <LOCAL_USER> imported_git`

View File

@@ -14,8 +14,6 @@ def config(filename="database.ini", section="production"):
for param in params: for param in params:
db[param[0]] = param[1] db[param[0]] = param[1]
else: else:
raise Exception( raise Exception(f"Section {section} not found in the {filename} file")
"Section {0} not found in the {1} file".format(section, filename)
)
return db return db

View File

@@ -1,7 +1,6 @@
import logging import logging
import psycopg2 import psycopg
from psycopg2.extras import LoggingConnection
from lib.config import config from lib.config import config
@@ -17,22 +16,20 @@ class DB:
# read the connection parameters # read the connection parameters
params = config(section=self.config_section) params = config(section=self.config_section)
# connect to the PostgreSQL server # connect to the PostgreSQL server
self.conn = psycopg2.connect(connection_factory=LoggingConnection, **params) self.conn = psycopg.connect(conninfo=f"dbname={params['database']}")
logger = logging.getLogger(__name__) logging.getLogger("psycopg.pool").setLevel(logging.INFO)
self.conn.initialize(logger)
except (Exception, psycopg2.DatabaseError) as error: except (Exception, psycopg.DatabaseError) as error:
print(error) print(error)
raise error raise error
def schema_version(self): def schema_version(self):
# create a cursor # create a cursor
with self.conn.cursor() as cur: with self.conn.cursor() as cur:
# execute a statement # execute a statement
try: try:
cur.execute("SELECT MAX(version) from scheme") cur.execute("SELECT MAX(version) from scheme")
except psycopg2.errors.UndefinedTable as error: except psycopg.errors.UndefinedTable:
cur.close() cur.close()
self.close() self.close()
self.connect() self.connect()
@@ -146,9 +143,9 @@ class DB:
) )
schemes[10] = ( schemes[10] = (
"ALTER TABLE revisions ADD COLUMN request_id INTEGER", "ALTER TABLE revisions ADD COLUMN request_id INTEGER",
"""ALTER TABLE revisions """ALTER TABLE revisions
ADD CONSTRAINT request_id_foreign_key ADD CONSTRAINT request_id_foreign_key
FOREIGN KEY (request_id) FOREIGN KEY (request_id)
REFERENCES requests (id)""", REFERENCES requests (id)""",
"UPDATE scheme SET version=10", "UPDATE scheme SET version=10",
) )
@@ -273,7 +270,7 @@ class DB:
cur.execute(command) cur.execute(command)
# commit the changes # commit the changes
self.conn.commit() self.conn.commit()
except (Exception, psycopg2.DatabaseError) as error: except (Exception, psycopg.DatabaseError) as error:
print(error) print(error)
self.close() self.close()
raise error raise error

View File

@@ -2,7 +2,6 @@ from __future__ import annotations
from hashlib import md5 from hashlib import md5
from pathlib import Path from pathlib import Path
from typing import Optional
from lib.db import DB from lib.db import DB
from lib.obs_revision import OBSRevision from lib.obs_revision import OBSRevision
@@ -206,7 +205,7 @@ class DBRevision:
): ):
continue continue
cur.execute( cur.execute(
"""INSERT INTO files (name, md5, size, mtime, revision_id) """INSERT INTO files (name, md5, size, mtime, revision_id)
VALUES (%s,%s,%s,%s,%s)""", VALUES (%s,%s,%s,%s,%s)""",
( (
entry.get("name"), entry.get("name"),
@@ -255,7 +254,7 @@ class DBRevision:
self._files.sort(key=lambda x: x["name"]) self._files.sort(key=lambda x: x["name"])
return self._files return self._files
def calc_delta(self, current_rev: Optional[DBRevision]): def calc_delta(self, current_rev: DBRevision | None):
"""Calculate the list of files to download and to delete. """Calculate the list of files to download and to delete.
Param current_rev is the revision that's currently checked out. Param current_rev is the revision that's currently checked out.
If it's None, the repository is empty. If it's None, the repository is empty.

View File

@@ -28,61 +28,69 @@ class Git:
self.path.mkdir(parents=True, exist_ok=True) self.path.mkdir(parents=True, exist_ok=True)
self.open() self.open()
def open(self): def git_run(self, args, **kwargs):
subprocess.run( """Run a git command"""
['git', 'init', '--object-format=sha256', '-b', 'factory'], if "env" in kwargs:
envs = kwargs["env"].copy()
del kwargs["env"]
else:
envs = os.environ.copy()
envs["GIT_LFS_SKIP_SMUDGE"] = "1"
envs["GIT_CONFIG_GLOBAL"] = "/dev/null"
return subprocess.run(
["git"] + args,
cwd=self.path, cwd=self.path,
check=True, check=True,
env=envs,
**kwargs,
) )
def open(self):
if not self.exists():
self.git_run(["init", "--object-format=sha256", "-b", "factory"])
def is_dirty(self): def is_dirty(self):
"""Check if there is something to commit""" """Check if there is something to commit"""
status_str = subprocess.run( status_str = self.git_run(
['git', 'status', '--porcelain=2'], ["status", "--porcelain=2"],
cwd=self.path,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
check=True ).stdout.decode("utf-8")
).stdout.decode('utf-8') return len(list(filter(None, status_str.split("\n")))) > 0
return len(list(filter(None, status_str.split('\n')))) > 0
def branches(self): def branches(self):
br=subprocess.run( br = (
['git', 'for-each-ref', '--format=%(refname:short)', 'refs/heads/'], self.git_run(
cwd=self.path, ["for-each-ref", "--format=%(refname:short)", "refs/heads/"],
check=True, stdout=subprocess.PIPE,
stdout=subprocess.PIPE )
).stdout.decode('utf-8').split() .stdout.decode("utf-8")
.split()
)
if len(br) == 0: if len(br) == 0:
br.append('factory') # unborn branch? br.append("factory") # unborn branch?
return br return br
def branch(self, branch, commit='HEAD'): def branch(self, branch, commit="HEAD"):
commit = subprocess.run( commit = (
['git', 'rev-parse', '--verify', '--end-of-options', commit + '^{commit}'], self.git_run(
cwd=self.path, ["rev-parse", "--verify", "--end-of-options", commit + "^{commit}"],
check=True, stdout=subprocess.PIPE,
stdout=subprocess.PIPE )
).stdout.decode('utf-8').strip() .stdout.decode("utf-8")
return subprocess.run(['git', 'branch', branch, commit], check=True) .strip()
)
return self.git_run(["branch", branch, commit])
def checkout(self, branch): def checkout(self, branch):
"""Checkout into the branch HEAD""" """Checkout into the branch HEAD"""
new_branch = False new_branch = False
if branch not in self.branches(): if branch not in self.branches():
subprocess.run( self.git_run(["switch", "-q", "--orphan", branch])
['git', 'branch', '-q', branch, 'HEAD'],
cwd=self.path,
check=True
)
new_branch = True new_branch = True
else: else:
ref = f"refs/heads/{branch}" ref = f"refs/heads/{branch}"
if (self.path/'.git'/ref).exists(): if (self.path / ".git" / ref).exists():
subprocess.run( self.git_run(["switch", "--no-guess", "-q", branch])
['git', 'checkout', '-q', branch],
cwd=self.path,
check=True
)
return new_branch return new_branch
def commit( def commit(
@@ -106,87 +114,79 @@ class Git:
committer_time = committer_time if committer_time else user_time committer_time = committer_time if committer_time else user_time
if self.is_dirty(): if self.is_dirty():
subprocess.run( self.git_run(["add", "--all", "."])
["git", "add", "--all", "."],
cwd=self.path,
check=True,
)
tree_id = subprocess.run( tree_id = (
['git', 'write-tree'], self.git_run(["write-tree"], stdout=subprocess.PIPE)
cwd=self.path, .stdout.decode("utf-8")
check=True, .strip()
stdout=subprocess.PIPE )
).stdout.decode('utf-8').strip()
parent_array = [] parent_array = []
if isinstance(parents, list): if isinstance(parents, list):
for parent in filter(None, parents): for parent in filter(None, parents):
parent_array = parent_array + ['-p', parent] parent_array = parent_array + ["-p", parent]
elif isinstance(parents, str): elif isinstance(parents, str):
parents_array = ['-p', parents] parent_array = ["-p", parents]
commit_id = subprocess.run( commit_id = (
['git', 'commit-tree'] + parent_array + [tree_id], self.git_run(
cwd=self.path, ["commit-tree"] + parent_array + [tree_id],
env={ env={
"GIT_AUTHOR_NAME": user, "GIT_AUTHOR_NAME": user,
"GIT_AUTHOR_EMAIL": user_email, "GIT_AUTHOR_EMAIL": user_email,
"GIT_AUTHOR_DATE": f"{int(user_time.timestamp())} +0000", "GIT_AUTHOR_DATE": f"{int(user_time.timestamp())} +0000",
"GIT_COMMITTER_NAME": committer, "GIT_COMMITTER_NAME": committer,
"GIT_COMMITTER_EMAIL": committer_email, "GIT_COMMITTER_EMAIL": committer_email,
"GIT_COMMITTER_DATE": f"{int(committer_time.timestamp())} +0000", "GIT_COMMITTER_DATE": f"{int(committer_time.timestamp())} +0000",
}, },
input=message.encode('utf-8'), input=message.encode("utf-8"),
check=True, stdout=subprocess.PIPE,
stdout=subprocess.PIPE )
).stdout.decode('utf-8').rstrip() .stdout.decode("utf-8")
subprocess.run( .rstrip()
['git', 'reset', '--soft', commit_id],
cwd=self.path,
check=True,
) )
self.git_run(["reset", "--soft", commit_id])
return commit_id return commit_id
def branch_head(self, branch='HEAD'): def branch_head(self, branch="HEAD"):
return subprocess.run( return (
['git', 'rev-parse', '--verify', '--end-of-options', branch], self.git_run(
cwd=self.path, ["rev-parse", "--verify", "--end-of-options", branch],
check=True, stdout=subprocess.PIPE,
stdout=subprocess.PIPE )
).stdout.decode('utf-8').strip() .stdout.decode("utf-8")
.strip()
)
def set_branch_head(self, branch, commit): def set_branch_head(self, branch, commit):
return subprocess.run( return self.git_run(["update-ref", f"refs/heads/{branch}", commit])
['git', 'branch', '-f', branch, commit],
cwd=self.path,
check=True,
)
def gc(self): def gc(self):
logging.debug(f"Garbage recollect and repackage {self.path}") logging.debug(f"Garbage recollect and repackage {self.path}")
subprocess.run( self.git_run(
["git", "gc", "--auto"], ["gc", "--auto"],
cwd=self.path,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
) )
# def clean(self): # def clean(self):
# for path, _ in self.repo.status().items(): # for path, _ in self.repo.status().items():
# logging.debug(f"Cleaning {path}") # logging.debug(f"Cleaning {path}")
# try: # try:
# (self.path / path).unlink() # (self.path / path).unlink()
# self.repo.index.remove(path) # self.repo.index.remove(path)
# except Exception as e: # except Exception as e:
# logging.warning(f"Error removing file {path}: {e}") # logging.warning(f"Error removing file {path}: {e}")
def add(self, filename): def add(self, filename):
subprocess.run( self.git_run(["add", ":(literal)" + filename])
['git', 'add', filename],
cwd=self.path, def add_default_gitignore(self):
check=True, if not (self.path / ".gitignore").exists():
) with (self.path / ".gitignore").open("w") as f:
f.write(".osc\n")
self.add(".gitignore")
def add_default_lfs_gitattributes(self, force=False): def add_default_lfs_gitattributes(self, force=False):
if not (self.path / ".gitattributes").exists() or force: if not (self.path / ".gitattributes").exists() or force:
@@ -240,10 +240,8 @@ class Git:
return any(fnmatch.fnmatch(filename, line) for line in patterns) return any(fnmatch.fnmatch(filename, line) for line in patterns)
def remove(self, file: pathlib.Path): def remove(self, file: pathlib.Path):
subprocess.run( self.git_run(
['git', 'rm', '-q', '--ignore-unmatch', file.name], ["rm", "-q", "-f", "--ignore-unmatch", ":(literal)" + file.name],
cwd=self.path,
check=True,
) )
patterns = self.get_specific_lfs_gitattributes() patterns = self.get_specific_lfs_gitattributes()
if file.name in patterns: if file.name in patterns:
@@ -270,22 +268,22 @@ class Git:
if response.status_code not in (201, 409): if response.status_code not in (201, 409):
print(response.data) print(response.data)
url = f"gitea@src.opensuse.org:{org_name}/{repo_name}.git" url = f"gitea@src.opensuse.org:{org_name}/{repo_name}.git"
subprocess.run( self.git_run(
['git', 'remote', 'add', 'origin', url], ["remote", "add", "origin", url],
cwd=self.path,
check=True,
) )
def push(self, force=False): def push(self, force=False):
cmd = ['git', 'push']; if "origin" not in self.git_run(
if force: ["remote"],
cmd.append('-f') stdout=subprocess.PIPE,
cmd.append('origin') ).stdout.decode("utf-8"):
cmd.append('refs/heads/factory'); logging.warning("Not pushing to remote because no 'origin' configured")
cmd.append('refs/heads/devel'); return
subprocess.run(
cmd,
cwd=self.path,
check=True,
)
cmd = ["push"]
if force:
cmd.append("-f")
cmd.append("origin")
cmd.append("refs/heads/factory")
cmd.append("refs/heads/devel")
self.git_run(cmd)

View File

@@ -40,9 +40,9 @@ class GitExporter:
def check_repo_state(self, flats, branch_state): def check_repo_state(self, flats, branch_state):
state_data = dict() state_data = dict()
if os.path.exists(self.state_file): if os.path.exists(self.state_file):
with open(self.state_file, "r") as f: with open(self.state_file) as f:
state_data = yaml.safe_load(f) state_data = yaml.safe_load(f)
if type(state_data) != dict: if not isinstance(state_data, dict):
state_data = {} state_data = {}
left_to_commit = [] left_to_commit = []
for flat in reversed(flats): for flat in reversed(flats):
@@ -86,6 +86,11 @@ class GitExporter:
logging.debug(f"Committing {flat}") logging.debug(f"Committing {flat}")
self.commit_flat(flat, branch_state) self.commit_flat(flat, branch_state)
# make sure that we create devel branch
if not branch_state["devel"]:
logging.debug("force creating devel")
self.git.set_branch_head("devel", self.git.branch_head("factory"))
self.git.push(force=True) self.git.push(force=True)
def run_gc(self): def run_gc(self):
@@ -150,6 +155,7 @@ class GitExporter:
# create file if not existant # create file if not existant
self.git.add_default_lfs_gitattributes(force=False) self.git.add_default_lfs_gitattributes(force=False)
self.git.add_default_gitignore()
to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch]) to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
for file in to_delete: for file in to_delete:

View File

@@ -167,7 +167,7 @@ class Importer:
with self.db.cursor() as cur: with self.db.cursor() as cur:
cur.execute( cur.execute(
"SELECT * FROM revisions WHERE package=%s AND broken=FALSE AND expanded_srcmd5 IS NULL", "SELECT * FROM revisions WHERE package=%s AND broken=FALSE AND expanded_srcmd5 IS NULL",
(package, ) (package,),
) )
return [DBRevision(self.db, row) for row in cur.fetchall()] return [DBRevision(self.db, row) for row in cur.fetchall()]

View File

@@ -148,28 +148,12 @@ class OBS:
] ]
def _download(self, project, package, name, revision): def _download(self, project, package, name, revision):
# the object might be deleted but we can only pass deleted=1
# if it is actually deleted
deleted = 0
while deleted < 2:
url = osc.core.makeurl(
self.url,
["source", project, package, urllib.parse.quote(name)],
{"rev": revision, "expand": 1, "deleted": deleted if deleted else ()},
)
try:
osc.core.http_request("HEAD", url)
break
except Exception:
pass
deleted += 1
url = osc.core.makeurl( url = osc.core.makeurl(
self.url, self.url,
["source", project, package, urllib.parse.quote(name)], ["source", project, package, name],
{"rev": revision, "expand": 1, "deleted": 1 if deleted else ()}, {"rev": revision, "expand": 1},
) )
return osc.core.http_request("GET", url) return osc.core.http_GET(url)
def download( def download(
self, self,
@@ -181,7 +165,6 @@ class OBS:
cachedir: str, cachedir: str,
file_md5: str, file_md5: str,
) -> None: ) -> None:
cached_file = self._path_from_md5(name, cachedir, file_md5) cached_file = self._path_from_md5(name, cachedir, file_md5)
if not self.in_cache(name, cachedir, file_md5): if not self.in_cache(name, cachedir, file_md5):
with (dirpath / name).open("wb") as f: with (dirpath / name).open("wb") as f:

View File

@@ -41,7 +41,6 @@ class ProxySHA256:
} }
def put(self, project, package, name, revision, file_md5, size): def put(self, project, package, name, revision, file_md5, size):
if not self.mime: if not self.mime:
self.mime = magic.Magic(mime=True) self.mime = magic.Magic(mime=True)

View File

@@ -1,4 +1,3 @@
from typing import Dict
from xmlrpc.client import Boolean from xmlrpc.client import Boolean
from lib.db_revision import DBRevision from lib.db_revision import DBRevision
@@ -138,7 +137,7 @@ class TreeBuilder:
self.requests.add(node.revision.request_id) self.requests.add(node.revision.request_id)
class FindMergeWalker(AbstractWalker): class FindMergeWalker(AbstractWalker):
def __init__(self, builder: TreeBuilder, requests: Dict) -> None: def __init__(self, builder: TreeBuilder, requests: dict) -> None:
super().__init__() super().__init__()
self.source_revisions = dict() self.source_revisions = dict()
self.builder = builder self.builder = builder

37
opensuse-monitor.py Executable file
View File

@@ -0,0 +1,37 @@
#!/usr/bin/python3
import json
from pathlib import Path
import pika
import sys
MY_TASKS_DIR = Path(__file__).parent / "tasks"
connection = pika.BlockingConnection(pika.URLParameters("amqps://opensuse:opensuse@rabbit.opensuse.org"))
channel = connection.channel()
channel.exchange_declare(exchange='pubsub', exchange_type='topic', passive=True, durable=True)
result = channel.queue_declare("", exclusive=True)
queue_name = result.method.queue
channel.queue_bind(exchange='pubsub',
queue=queue_name,routing_key='#')
print(' [*] Waiting for logs. To exit press CTRL+C')
def callback(ch, method, properties, body):
if method.routing_key not in ("opensuse.obs.package.commit",):
return
body = json.loads(body)
if 'project' in body and 'package' in body and body['project'] == 'openSUSE:Factory':
if '/' in body['package']:
return
(MY_TASKS_DIR / body['package']).touch()
print(" [x] %r:%r" % (method.routing_key, body['package']))
channel.basic_consume(queue_name,
callback,
auto_ack=True)
channel.start_consuming()

1
tasks/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
*