13 Commits

Author SHA1 Message Date
3d1684f01b Don't calculate diffs ....
Instead of calculating diffs, just apply the revision to the branch
as-is. If things don't match, update them. Done.

Problems happens if there are devel project changes and histories
are no longer linear and the diffs are not calculated properly.
eg. trytond_stock_lot devel branch was imported incorrectly
2024-06-07 16:25:17 +02:00
574bc9aa10 Avoid guessing in switch 2024-05-17 20:07:16 +02:00
0414b33206 Fix testing for origin
The previous code path was untested and not working
2024-05-17 20:06:25 +02:00
b9670821a9 Only init the repository if it doesn't exist already
harmless, but avoids a scary warning
2024-05-17 20:05:54 +02:00
073550825c Fixups to improve the conversion process 2024-05-17 14:41:42 +02:00
5a353c98d3 Add tasks 2024-05-17 11:46:18 +02:00
1fc466d15b Add monitor for commits 2024-05-17 11:40:19 +02:00
39fde7744a Code cleanup 2024-05-16 15:47:45 +02:00
f5ffc83a69 Remove double quoting of url parameters
makeurl quotes by itself, so this was messing it up
2024-05-16 11:49:14 +02:00
d0ccf83684 Revert "Try to fetch the element as deleted if initial access failed"
The OBS api has been fixed to provide an automatic fallback via
https://github.com/openSUSE/open-build-service/pull/15655

This reverts commit c9e07e536f.
2024-05-16 11:49:14 +02:00
b0ffb01c59 cleanups 2024-05-16 11:49:14 +02:00
28d5c6e606 Switch to psycopg rather than psycopg2
It's a bit more modern and uses dedicated c bindings
2024-05-16 11:49:14 +02:00
1e22c2895a Merge pull request 'Switch to sha-256 git repo and use git tools again' (#23) from adamm/git-importer:main into main
Reviewed-on: importers/git-importer#23
2024-05-16 11:48:36 +02:00
12 changed files with 207 additions and 165 deletions

View File

@ -1,4 +1,4 @@
sudo zypper in python3-psycopg2
sudo zypper in python3-psycopg
sudo su - postgres
# `createdb -O <LOCAL_USER> imported_git`

View File

@ -14,8 +14,6 @@ def config(filename="database.ini", section="production"):
for param in params:
db[param[0]] = param[1]
else:
raise Exception(
"Section {0} not found in the {1} file".format(section, filename)
)
raise Exception(f"Section {section} not found in the {filename} file")
return db

View File

@ -1,7 +1,6 @@
import logging
import psycopg2
from psycopg2.extras import LoggingConnection
import psycopg
from lib.config import config
@ -17,22 +16,20 @@ class DB:
# read the connection parameters
params = config(section=self.config_section)
# connect to the PostgreSQL server
self.conn = psycopg2.connect(connection_factory=LoggingConnection, **params)
logger = logging.getLogger(__name__)
self.conn.initialize(logger)
self.conn = psycopg.connect(conninfo=f"dbname={params['database']}")
logging.getLogger("psycopg.pool").setLevel(logging.INFO)
except (Exception, psycopg2.DatabaseError) as error:
except (Exception, psycopg.DatabaseError) as error:
print(error)
raise error
def schema_version(self):
# create a cursor
with self.conn.cursor() as cur:
# execute a statement
try:
cur.execute("SELECT MAX(version) from scheme")
except psycopg2.errors.UndefinedTable as error:
except psycopg.errors.UndefinedTable:
cur.close()
self.close()
self.connect()
@ -146,9 +143,9 @@ class DB:
)
schemes[10] = (
"ALTER TABLE revisions ADD COLUMN request_id INTEGER",
"""ALTER TABLE revisions
"""ALTER TABLE revisions
ADD CONSTRAINT request_id_foreign_key
FOREIGN KEY (request_id)
FOREIGN KEY (request_id)
REFERENCES requests (id)""",
"UPDATE scheme SET version=10",
)
@ -273,7 +270,7 @@ class DB:
cur.execute(command)
# commit the changes
self.conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
except (Exception, psycopg.DatabaseError) as error:
print(error)
self.close()
raise error

View File

@ -2,7 +2,6 @@ from __future__ import annotations
from hashlib import md5
from pathlib import Path
from typing import Optional
from lib.db import DB
from lib.obs_revision import OBSRevision
@ -206,7 +205,7 @@ class DBRevision:
):
continue
cur.execute(
"""INSERT INTO files (name, md5, size, mtime, revision_id)
"""INSERT INTO files (name, md5, size, mtime, revision_id)
VALUES (%s,%s,%s,%s,%s)""",
(
entry.get("name"),
@ -255,7 +254,7 @@ class DBRevision:
self._files.sort(key=lambda x: x["name"])
return self._files
def calc_delta(self, current_rev: Optional[DBRevision]):
def calc_delta(self, current_rev: DBRevision | None):
"""Calculate the list of files to download and to delete.
Param current_rev is the revision that's currently checked out.
If it's None, the repository is empty.

View File

@ -28,61 +28,69 @@ class Git:
self.path.mkdir(parents=True, exist_ok=True)
self.open()
def open(self):
subprocess.run(
['git', 'init', '--object-format=sha256', '-b', 'factory'],
def git_run(self, args, **kwargs):
"""Run a git command"""
if "env" in kwargs:
envs = kwargs["env"].copy()
del kwargs["env"]
else:
envs = os.environ.copy()
envs["GIT_LFS_SKIP_SMUDGE"] = "1"
envs["GIT_CONFIG_GLOBAL"] = "/dev/null"
return subprocess.run(
["git"] + args,
cwd=self.path,
check=True,
env=envs,
**kwargs,
)
def open(self):
if not self.exists():
self.git_run(["init", "--object-format=sha256", "-b", "factory"])
def is_dirty(self):
"""Check if there is something to commit"""
status_str = subprocess.run(
['git', 'status', '--porcelain=2'],
cwd=self.path,
status_str = self.git_run(
["status", "--porcelain=2"],
stdout=subprocess.PIPE,
check=True
).stdout.decode('utf-8')
return len(list(filter(None, status_str.split('\n')))) > 0
).stdout.decode("utf-8")
return len(list(filter(None, status_str.split("\n")))) > 0
def branches(self):
br=subprocess.run(
['git', 'for-each-ref', '--format=%(refname:short)', 'refs/heads/'],
cwd=self.path,
check=True,
stdout=subprocess.PIPE
).stdout.decode('utf-8').split()
br = (
self.git_run(
["for-each-ref", "--format=%(refname:short)", "refs/heads/"],
stdout=subprocess.PIPE,
)
.stdout.decode("utf-8")
.split()
)
if len(br) == 0:
br.append('factory') # unborn branch?
br.append("factory") # unborn branch?
return br
def branch(self, branch, commit='HEAD'):
commit = subprocess.run(
['git', 'rev-parse', '--verify', '--end-of-options', commit + '^{commit}'],
cwd=self.path,
check=True,
stdout=subprocess.PIPE
).stdout.decode('utf-8').strip()
return subprocess.run(['git', 'branch', branch, commit], check=True)
def branch(self, branch, commit="HEAD"):
commit = (
self.git_run(
["rev-parse", "--verify", "--end-of-options", commit + "^{commit}"],
stdout=subprocess.PIPE,
)
.stdout.decode("utf-8")
.strip()
)
return self.git_run(["branch", branch, commit])
def checkout(self, branch):
"""Checkout into the branch HEAD"""
new_branch = False
if branch not in self.branches():
subprocess.run(
['git', 'branch', '-q', branch, 'HEAD'],
cwd=self.path,
check=True
)
self.git_run(["branch", "-q", branch, "HEAD"])
new_branch = True
else:
ref = f"refs/heads/{branch}"
if (self.path/'.git'/ref).exists():
subprocess.run(
['git', 'checkout', '-q', branch],
cwd=self.path,
check=True
)
if (self.path / ".git" / ref).exists():
self.git_run(["switch", "--no-guess", "-q", branch])
return new_branch
def commit(
@ -106,87 +114,73 @@ class Git:
committer_time = committer_time if committer_time else user_time
if self.is_dirty():
subprocess.run(
["git", "add", "--all", "."],
cwd=self.path,
check=True,
)
self.git_run(["add", "--all", "."])
tree_id = subprocess.run(
['git', 'write-tree'],
cwd=self.path,
check=True,
stdout=subprocess.PIPE
).stdout.decode('utf-8').strip()
tree_id = (
self.git_run(["write-tree"], stdout=subprocess.PIPE)
.stdout.decode("utf-8")
.strip()
)
parent_array = []
if isinstance(parents, list):
for parent in filter(None, parents):
parent_array = parent_array + ['-p', parent]
parent_array = parent_array + ["-p", parent]
elif isinstance(parents, str):
parents_array = ['-p', parents]
parent_array = ["-p", parents]
commit_id = subprocess.run(
['git', 'commit-tree'] + parent_array + [tree_id],
cwd=self.path,
env={
"GIT_AUTHOR_NAME": user,
"GIT_AUTHOR_EMAIL": user_email,
"GIT_AUTHOR_DATE": f"{int(user_time.timestamp())} +0000",
"GIT_COMMITTER_NAME": committer,
"GIT_COMMITTER_EMAIL": committer_email,
"GIT_COMMITTER_DATE": f"{int(committer_time.timestamp())} +0000",
},
input=message.encode('utf-8'),
check=True,
stdout=subprocess.PIPE
).stdout.decode('utf-8').rstrip()
subprocess.run(
['git', 'reset', '--soft', commit_id],
cwd=self.path,
check=True,
commit_id = (
self.git_run(
["commit-tree"] + parent_array + [tree_id],
env={
"GIT_AUTHOR_NAME": user,
"GIT_AUTHOR_EMAIL": user_email,
"GIT_AUTHOR_DATE": f"{int(user_time.timestamp())} +0000",
"GIT_COMMITTER_NAME": committer,
"GIT_COMMITTER_EMAIL": committer_email,
"GIT_COMMITTER_DATE": f"{int(committer_time.timestamp())} +0000",
},
input=message.encode("utf-8"),
stdout=subprocess.PIPE,
)
.stdout.decode("utf-8")
.rstrip()
)
self.git_run(["reset", "--soft", commit_id])
return commit_id
def branch_head(self, branch='HEAD'):
return subprocess.run(
['git', 'rev-parse', '--verify', '--end-of-options', branch],
cwd=self.path,
check=True,
stdout=subprocess.PIPE
).stdout.decode('utf-8').strip()
def branch_head(self, branch="HEAD"):
return (
self.git_run(
["rev-parse", "--verify", "--end-of-options", branch],
stdout=subprocess.PIPE,
)
.stdout.decode("utf-8")
.strip()
)
def set_branch_head(self, branch, commit):
return subprocess.run(
['git', 'branch', '-f', branch, commit],
cwd=self.path,
check=True,
)
return self.git_run(["update-ref", f"refs/heads/{branch}", commit])
def gc(self):
logging.debug(f"Garbage recollect and repackage {self.path}")
subprocess.run(
["git", "gc", "--auto"],
cwd=self.path,
self.git_run(
["gc", "--auto"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
# def clean(self):
# for path, _ in self.repo.status().items():
# logging.debug(f"Cleaning {path}")
# try:
# (self.path / path).unlink()
# self.repo.index.remove(path)
# except Exception as e:
# logging.warning(f"Error removing file {path}: {e}")
# def clean(self):
# for path, _ in self.repo.status().items():
# logging.debug(f"Cleaning {path}")
# try:
# (self.path / path).unlink()
# self.repo.index.remove(path)
# except Exception as e:
# logging.warning(f"Error removing file {path}: {e}")
def add(self, filename):
subprocess.run(
['git', 'add', filename],
cwd=self.path,
check=True,
)
self.git_run(["add", filename])
def add_default_lfs_gitattributes(self, force=False):
if not (self.path / ".gitattributes").exists() or force:
@ -240,10 +234,8 @@ class Git:
return any(fnmatch.fnmatch(filename, line) for line in patterns)
def remove(self, file: pathlib.Path):
subprocess.run(
['git', 'rm', '-q', '--ignore-unmatch', file.name],
cwd=self.path,
check=True,
self.git_run(
["rm", "-q", "-f", "--ignore-unmatch", file.name],
)
patterns = self.get_specific_lfs_gitattributes()
if file.name in patterns:
@ -270,22 +262,22 @@ class Git:
if response.status_code not in (201, 409):
print(response.data)
url = f"gitea@src.opensuse.org:{org_name}/{repo_name}.git"
subprocess.run(
['git', 'remote', 'add', 'origin', url],
cwd=self.path,
check=True,
self.git_run(
["remote", "add", "origin", url],
)
def push(self, force=False):
cmd = ['git', 'push'];
if force:
cmd.append('-f')
cmd.append('origin')
cmd.append('refs/heads/factory');
cmd.append('refs/heads/devel');
subprocess.run(
cmd,
cwd=self.path,
check=True,
)
if "origin" not in self.git_run(
["remote"],
stdout=subprocess.PIPE,
).stdout.decode("utf-8"):
logger.warning("Not pushing to remote because no 'origin' configured")
return
cmd = ["push"]
if force:
cmd.append("-f")
cmd.append("origin")
cmd.append("refs/heads/factory")
cmd.append("refs/heads/devel")
self.git_run(cmd)

View File

@ -2,6 +2,8 @@ import logging
import os
import yaml
from hashlib import md5
from pathlib import Path
from lib.binary import is_binary_or_large
from lib.db import DB
@ -40,9 +42,9 @@ class GitExporter:
def check_repo_state(self, flats, branch_state):
state_data = dict()
if os.path.exists(self.state_file):
with open(self.state_file, "r") as f:
with open(self.state_file) as f:
state_data = yaml.safe_load(f)
if type(state_data) != dict:
if not isinstance(state_data, dict):
state_data = {}
left_to_commit = []
for flat in reversed(flats):
@ -133,6 +135,12 @@ class GitExporter:
return True
return flat.parent1 == branch_state[flat.branch]
def file_md5(self, file):
m = md5()
with open(file, 'rb') as f:
m.update(f.read())
return m.hexdigest()
def commit_flat(self, flat, branch_state):
parents = []
self.git.checkout(flat.branch)
@ -151,11 +159,40 @@ class GitExporter:
# create file if not existant
self.git.add_default_lfs_gitattributes(force=False)
to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
for file in to_delete:
self.git.remove(file)
for file, size, md5 in to_download:
self.commit_file(flat, file, size, md5)
new_files = flat.commit.files_list()
cur_files = os.listdir(self.git.path)
for cf in cur_files:
if cf[0] == '.':
continue
found = False
for nf in new_files:
if nf['name'] == cf:
found = True
break
if found:
# check if file is modified
file_path = self.git.path.joinpath(cf)
stat = file_path.stat()
if stat.st_size != nf['size'] or self.file_md5(file_path) != nf['md5']:
logging.debug(f"updating {file_path.name}")
self.commit_file(flat, Path(cf), nf['size'], nf['md5'])
else:
logging.debug(f"leaving {file_path.name}")
else:
# file not exist in new commit
self.git.remove(Path(cf))
# new files?
for file in new_files:
found = False
for cf in cur_files:
if file['name'] == cf:
found = True
break
if not found:
self.commit_file(flat, Path(file['name']), file['size'], file['md5'])
commit = self.git.commit(
flat.user.realname,

View File

@ -167,7 +167,7 @@ class Importer:
with self.db.cursor() as cur:
cur.execute(
"SELECT * FROM revisions WHERE package=%s AND broken=FALSE AND expanded_srcmd5 IS NULL",
(package, )
(package,),
)
return [DBRevision(self.db, row) for row in cur.fetchall()]

View File

@ -148,28 +148,12 @@ class OBS:
]
def _download(self, project, package, name, revision):
# the object might be deleted but we can only pass deleted=1
# if it is actually deleted
deleted = 0
while deleted < 2:
url = osc.core.makeurl(
self.url,
["source", project, package, urllib.parse.quote(name)],
{"rev": revision, "expand": 1, "deleted": deleted if deleted else ()},
)
try:
osc.core.http_request("HEAD", url)
break
except Exception:
pass
deleted += 1
url = osc.core.makeurl(
self.url,
["source", project, package, urllib.parse.quote(name)],
{"rev": revision, "expand": 1, "deleted": 1 if deleted else ()},
)
return osc.core.http_request("GET", url)
self.url,
["source", project, package, name],
{"rev": revision, "expand": 1},
)
return osc.core.http_GET(url)
def download(
self,
@ -181,7 +165,6 @@ class OBS:
cachedir: str,
file_md5: str,
) -> None:
cached_file = self._path_from_md5(name, cachedir, file_md5)
if not self.in_cache(name, cachedir, file_md5):
with (dirpath / name).open("wb") as f:

View File

@ -41,7 +41,6 @@ class ProxySHA256:
}
def put(self, project, package, name, revision, file_md5, size):
if not self.mime:
self.mime = magic.Magic(mime=True)

View File

@ -1,4 +1,3 @@
from typing import Dict
from xmlrpc.client import Boolean
from lib.db_revision import DBRevision
@ -138,7 +137,7 @@ class TreeBuilder:
self.requests.add(node.revision.request_id)
class FindMergeWalker(AbstractWalker):
def __init__(self, builder: TreeBuilder, requests: Dict) -> None:
def __init__(self, builder: TreeBuilder, requests: dict) -> None:
super().__init__()
self.source_revisions = dict()
self.builder = builder

37
opensuse-monitor.py Executable file
View File

@ -0,0 +1,37 @@
#!/usr/bin/python3
import json
from pathlib import Path
import pika
import sys
MY_TASKS_DIR = Path(__file__).parent / "tasks"
connection = pika.BlockingConnection(pika.URLParameters("amqps://opensuse:opensuse@rabbit.opensuse.org"))
channel = connection.channel()
channel.exchange_declare(exchange='pubsub', exchange_type='topic', passive=True, durable=True)
result = channel.queue_declare("", exclusive=True)
queue_name = result.method.queue
channel.queue_bind(exchange='pubsub',
queue=queue_name,routing_key='#')
print(' [*] Waiting for logs. To exit press CTRL+C')
def callback(ch, method, properties, body):
if method.routing_key not in ("opensuse.obs.package.commit",):
return
body = json.loads(body)
if 'project' in body and 'package' in body and body['project'] == 'openSUSE:Factory':
if '/' in body['package']:
return
(MY_TASKS_DIR / body['package']).touch()
print(" [x] %r:%r" % (method.routing_key, body['package']))
channel.basic_consume(queue_name,
callback,
auto_ack=True)
channel.start_consuming()

1
tasks/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*