9 Commits

Author SHA1 Message Date
eba3fece91 Add and remove literal files
pathspec in git has special characters that we should not trigger.
Assume every filespec as literal
2024-07-31 12:38:49 +02:00
Dirk Mueller
b74f36c81e Merge pull request 'Make sure we create devel branch, even if no diff to Factory' (#29) from adamm/git-importer:always_create_devel into use_git_for_sha256
Reviewed-on: importers/git-importer#29
2024-06-13 15:02:30 +02:00
Adam Majer
7b9b033adc Make sure we create devel branch, when no diff to Factory 2024-06-13 09:51:27 +02:00
Dirk Mueller
c44cb17e5d Merge pull request 'Ignore .osc directory' (#28) from adamm/git-importer:gitingore into use_git_for_sha256
Reviewed-on: importers/git-importer#28
Reviewed-by: Dirk Mueller <dirkmueller@noreply@src.opensuse.org>
2024-06-10 18:13:33 +02:00
Dirk Mueller
3384a6f2b1 Merge pull request 'Fix typo' (#27) from adamm/git-importer:typo into use_git_for_sha256
Reviewed-on: importers/git-importer#27
2024-06-10 18:10:40 +02:00
Adam Majer
1ec72ac80c Ignore .osc directory 2024-06-10 17:31:14 +02:00
Dirk Mueller
033b8f8cee Merge pull request 'New branch is empty' (#26) from adamm/git-importer:fix_inintial_branch into use_git_for_sha256
Reviewed-on: importers/git-importer#26
Reviewed-by: Dirk Mueller <dirkmueller@noreply@src.opensuse.org>
2024-06-10 17:03:48 +02:00
Adam Majer
17062df04e Fix typo 2024-06-10 15:44:21 +02:00
Adam Majer
aa73d97b35 New branch is empty
New branches must be born empty
2024-06-10 15:35:11 +02:00
11 changed files with 66 additions and 269 deletions

View File

@@ -42,8 +42,8 @@ PROJECTS = [
]
def export_package(project, package, repodir, cachedir, gc):
exporter = GitExporter(URL_OBS, project, package, repodir, cachedir)
def export_package(package, repodir, cachedir, gc):
exporter = GitExporter(URL_OBS, "openSUSE:Factory", package, repodir, cachedir)
exporter.set_gc_interval(gc)
exporter.export_as_git()
@@ -51,12 +51,6 @@ def export_package(project, package, repodir, cachedir, gc):
def main():
parser = argparse.ArgumentParser(description="OBS history importer into git")
parser.add_argument("packages", help="OBS package names", nargs="*")
parser.add_argument(
"-p",
"--project",
default="openSUSE:Factory",
help="Project to import/export, default is openSUSE:Factory",
)
parser.add_argument(
"-r",
"--repodir",
@@ -116,13 +110,10 @@ def main():
if not args.cachedir:
args.cachedir = pathlib.Path("~/.cache/git-import/").expanduser()
importer = Importer(URL_OBS, args.project, args.packages)
importer = Importer(URL_OBS, "openSUSE:Factory", args.packages)
importer.import_into_db()
for package in args.packages:
if not importer.package_with_scmsync(args.project, package):
export_package(args.project, package, args.repodir, args.cachedir, args.gc)
else:
logging.debug(f"{args.project}/{package} has scmsync links - skipping export")
export_package(package, args.repodir, args.cachedir, args.gc)
if __name__ == "__main__":

View File

@@ -204,11 +204,6 @@ class DBRevision:
and self.package == "_project"
):
continue
# do not import _service:* files as those are created by OBS on source imports
if entry.get("name")[0:9] == "_service:":
continue
cur.execute(
"""INSERT INTO files (name, md5, size, mtime, revision_id)
VALUES (%s,%s,%s,%s,%s)""",

View File

@@ -20,7 +20,7 @@ class FlatTreeWalker(AbstractWalker):
def __init__(self, rebase_devel=False) -> None:
super().__init__()
self.flats:list[FlatNode] = []
self.flats = []
# the rebase_devel won't work as such as rebasing the branch needs an explicit action
self.rebase_devel = rebase_devel
# remember the last merge point so we can know the parent of it for the root of the sources

View File

@@ -48,7 +48,6 @@ class Git:
def open(self):
if not self.exists():
self.git_run(["init", "--object-format=sha256", "-b", "factory"])
self.git_run(["config", "lfs.allowincompletepush", "true"])
def is_dirty(self):
"""Check if there is something to commit"""
@@ -160,12 +159,6 @@ class Git:
.strip()
)
def branch_commit(self, branch="HEAD"):
try:
return (self.git_run(["cat-file", "commit", branch], stdout=subprocess.PIPE).stdout.decode("utf-8").strip())
except:
return ''
def set_branch_head(self, branch, commit):
return self.git_run(["update-ref", f"refs/heads/{branch}", commit])
@@ -187,7 +180,7 @@ class Git:
# logging.warning(f"Error removing file {path}: {e}")
def add(self, filename):
self.git_run(["add", ":(literal)" + str(filename)])
self.git_run(["add", ":(literal)" + filename])
def add_default_gitignore(self):
if not (self.path / ".gitignore").exists():
@@ -257,7 +250,7 @@ class Git:
def add_gitea_remote(self, package):
repo_name = package.replace("+", "_")
org_name = "pool"
org_name = "rpm"
if not os.getenv("GITEA_TOKEN"):
logging.warning("Not adding a remote due to missing $GITEA_TOKEN")
@@ -266,7 +259,7 @@ class Git:
url = f"https://src.opensuse.org/api/v1/org/{org_name}/repos"
response = requests.post(
url,
data={"name": repo_name, "object_format_name": "sha256"},
data={"name": repo_name},
headers={"Authorization": f"token {os.getenv('GITEA_TOKEN')}"},
timeout=10,
)
@@ -290,5 +283,7 @@ class Git:
cmd = ["push"]
if force:
cmd.append("-f")
cmd += ["origin", "--all"]
cmd.append("origin")
cmd.append("refs/heads/factory")
cmd.append("refs/heads/devel")
self.git_run(cmd)

View File

@@ -1,12 +1,8 @@
import logging
import os
from urllib.parse import parse_qs
import psycopg
from urllib3.util import url
import yaml
from lib import db
from lib.binary import is_binary_or_large
from lib.db import DB
from lib.git import Git
@@ -16,12 +12,6 @@ from lib.proxy_sha256 import ProxySHA256
from lib.tree_builder import TreeBuilder
from lib.user import User
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
class GitExporter:
def __init__(self, api_url, project, package, repodir, cachedir):
@@ -47,107 +37,42 @@ class GitExporter:
def set_gc_interval(self, gc):
self.gc_interval = gc
def reconstruct_state(self, flats):
state_data = dict()
prefix = "OBS-URL: "
for line in self.git.branch_commit("factory").splitlines():
if line.startswith(prefix):
u = url.parse_url(line.strip(prefix))
if u.path != f"/package/show/openSUSE:Factory/{self.package}" or "rev=" not in u.query:
continue
v = parse_qs(u.query)
rev = v['rev'][0]
with self.db.cursor() as cur:
try:
if is_number(rev):
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", ('openSUSE:Factory', self.package, rev,))
else:
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", ('openSUSE:Factory', self.package, rev, rev))
except psycopg.Error as e:
logging.error(e)
self.db.conn.rollback()
row = cur.fetchone()
if not row:
return state_data
state_data['factory'] = row[0]
try:
print("devel reconstruct")
d = self.devel_rev(flats)
if d is not None:
prj = d.commit.project
for line in self.git.branch_commit("devel").splitlines():
if line.startswith(prefix):
u = url.parse_url(line.strip(prefix))
if u.path != f"/package/show/{prj}/{self.package}" or u.query is None or "rev=" not in u.query:
continue
v = parse_qs(u.query)
rev = v['rev'][0]
try:
with self.db.cursor() as cur:
logging.debug(f"finding id for ({prj, self.package, rev}")
if is_number(rev):
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", (prj, self.package, rev,))
else:
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", (prj, self.package, rev,))
row = cur.fetchone()
if not row:
logging.info(" ** cannot find revision for devel branch:", rev)
return state_data
state_data['devel'] = row[0]
except psycopg.Error as e:
logging.error(e)
self.db.conn.rollback()
if state_data['factory'] is not None:
state_data['devel'] = state_data['factory']
except:
if state_data['factory'] is not None:
state_data['devel'] = state_data['factory']
return state_data
def check_repo_state(self, flats, branch_state, branch):
def check_repo_state(self, flats, branch_state):
state_data = dict()
if os.path.exists(self.state_file):
with open(self.state_file) as f:
state_data = yaml.safe_load(f)
if not isinstance(state_data, dict):
state_data = {}
else:
state_data = self.reconstruct_state(flats)
logging.debug(f"state data: {state_data}")
left_to_commit = []
for flat in reversed(flats):
found_state = False
if flat.commit.dbid == state_data.get(branch):
branch_state[branch] = flat.commit
flat.commit.git_commit = self.git.branch_head(branch)
logging.debug(
f"Found {self.git.path}'s {branch} branch in state {flat}"
)
left_to_commit = []
found_state = True
for branch in ["factory", "devel"]:
if flat.commit.dbid == state_data.get(branch):
branch_state[branch] = flat.commit
flat.commit.git_commit = self.git.branch_head(branch)
logging.debug(
f"Found {self.git.path}'s {branch} branch in state {flat}"
)
left_to_commit = []
found_state = True
if not found_state:
left_to_commit.append(flat)
return left_to_commit
def devel_rev(self, tree):
for flat in tree:
if flat.branch == "devel":
return flat
return None
def export_as_git(self):
if os.getenv("CHECK_ALL_LFS"):
LFSOid.check_all(self.db, self.package)
tree = TreeBuilder(self.db).build(self.project, self.package)
added_commits = False
if tree == None: # eg. python-M2Crypto errors
return
flats = tree.as_flat_list()
branch_state = {"factory": None, "devel": None}
left_to_commit = self.check_repo_state(flats, branch_state, "factory")
logging.info(f"Commiting into {self.git.path} {len(left_to_commit)} into factory")
left_to_commit = self.check_repo_state(flats, branch_state)
if not left_to_commit:
return
logging.info(f"Commiting into {self.git.path}")
self.run_gc()
users = dict()
@@ -160,40 +85,13 @@ class GitExporter:
self.run_gc()
logging.debug(f"Committing {flat}")
self.commit_flat(flat, branch_state)
added_commits = True
# export the devel_tree head commits based on the devel branch
if self.project == "openSUSE:Factory":
devel_head = self.devel_rev(flats)
flat_devel = None
if devel_head is not None:
logging.debug(f"building devel revisions chain for {devel_head.commit.project} / {self.package}")
flat_devel = TreeBuilder(self.db).revisions_chain(devel_head.commit.project, self.package).as_flat_list()
for f in flat_devel:
f.branch = "devel"
if flat_devel is not None:
left_to_commit = self.check_repo_state(flat_devel, branch_state, "devel")
logging.debug(branch_state)
logging.debug(f"appending {len(left_to_commit)} items on top of devel")
for flat in left_to_commit:
if flat.commit.userid not in users:
users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
flat.user = users[flat.commit.userid]
self.gc_cnt -= 1
if self.gc_cnt <= 0 and self.gc_interval:
self.run_gc()
logging.debug(f"Committing {flat}")
self.commit_flat(flat, branch_state)
added_commits = True
# make sure that we create devel branch
if not branch_state["devel"]:
logging.debug("force creating devel")
self.git.set_branch_head("devel", self.git.branch_head("factory"))
#if added_commits:
# self.git.push(force=True)
self.git.push(force=True)
def run_gc(self):
self.gc_cnt = self.gc_interval
@@ -205,10 +103,6 @@ class GitExporter:
return not self.proxy_sha256.is_text(package, filename)
def commit_file(self, flat, file, size, md5):
# don't export imported _service: files, if any
if file.name[0:9] == '_service:':
return
# have such files been detected as text mimetype before?
if self.is_lfs_file(flat.commit.package, file.name, size):
file_sha256 = self.proxy_sha256.get_or_put(

View File

@@ -26,24 +26,19 @@ class Importer:
# Import multiple Factory packages into the database
self.packages = packages
self.project = project
self.scmsync_cache = dict()
self.packages_with_scmsync = set()
self.db = DB()
self.obs = OBS(api_url)
assert not self.has_scmsync(project)
assert project == "openSUSE:Factory"
self.refreshed_packages = set()
self.gone_packages_set = None
def import_request(self, number):
self.obs.request(number).import_into_db(self.db)
def update_db_package(self, project, package):
root = self.obs._history(project, package)
if root is None:
if self.project == "openSUSE:Factory" and project == self.project:
exit(10)
return
latest = DBRevision.max_rev(self.db, project, package)
for r in root.findall("revision"):
@@ -218,10 +213,6 @@ class Importer:
return
logging.debug(f"Refresh {project}/{package}")
self.refreshed_packages.add(key)
if self.has_scmsync(project) or self.has_scmsync(key):
self.packages_with_scmsync.add((project, package))
logging.debug(f"{project}/{package} already in Git - skipping")
return
self.update_db_package(project, package)
self.fetch_all_linked_packages(project, package)
@@ -264,18 +255,3 @@ class Importer:
for line in f.readlines():
self.gone_packages_set.add(line.strip())
return key in self.gone_packages_set
def has_scmsync(self, key):
if key in self.scmsync_cache:
return self.scmsync_cache[key]
root = self.obs._meta(key)
scmsync_exists = False
if root is not None:
scmsync_exists = root.find('scmsync') is not None
self.scmsync_cache[key] = scmsync_exists
return scmsync_exists
def package_with_scmsync(self, project, package):
return (project, package) in self.packages_with_scmsync

View File

@@ -73,11 +73,11 @@ class OBS:
logging.debug(f"GET {url}")
return ET.parse(osc.core.http_GET(url)).getroot()
def _meta(self, key, **params):
def _meta(self, project, package, **params):
try:
root = self._xml(f"source/{key}/_meta", **params)
root = self._xml(f"source/{project}/{package}/_meta", **params)
except HTTPError:
logging.error(f"Project/Package [{key} {params}] has no meta")
logging.error(f"Package [{project}/{package} {params}] has no meta")
return None
return root
@@ -118,13 +118,13 @@ class OBS:
return root
def exists(self, project, package):
root = self._meta(f"{project}/{package}")
root = self._meta(project, package)
if root is None:
return False
return root.get("project") == project
def devel_project(self, project, package):
root = self._meta(f"{project}/{package}")
root = self._meta(project, package)
devel = root.find("devel")
if devel is None:
return None
@@ -148,21 +148,12 @@ class OBS:
]
def _download(self, project, package, name, revision):
try:
url = osc.core.makeurl(
self.url,
["source", project, package, name],
{"rev": revision, "expand": 1},
)
return osc.core.http_GET(url)
except HTTPError as e:
if e.status == 404:
url = osc.core.makeurl(
self.url,
["source", project, package, name],
{"rev": revision, "expand": 1, "deleted": 1},
)
return osc.core.http_GET(url)
url = osc.core.makeurl(
self.url,
["source", project, package, name],
{"rev": revision, "expand": 1},
)
return osc.core.http_GET(url)
def download(
self,
@@ -198,7 +189,7 @@ class OBS:
try:
root = self._xml(f"source/{project}/{package}", **params)
except HTTPError as e:
if e.code == 400 or e.code == 404:
if e.code == 400:
logging.error(
f"Package [{project}/{package} {params}] can't be expanded: {e}"
)

View File

@@ -50,12 +50,12 @@ class ProxySHA256:
sha = hashlib.sha256()
while True:
buffer = fin.read(10000)
# only guess from the first 10K
if not mimetype:
mimetype = self.mime.from_buffer(buffer)
if not buffer:
break
sha.update(buffer)
# only guess from the first 10K
if not mimetype:
mimetype = self.mime.from_buffer(buffer)
fin.close()
LFSOid(self.db).add(
project, package, name, revision, sha.hexdigest(), size, mimetype, file_md5

View File

@@ -216,8 +216,6 @@ class TreeBuilder:
def build(self, project, package):
"""Create a Factory tree (returning the top)"""
factory_revisions = self.revisions_chain(project, package)
if factory_revisions == None:
return None
self.add_merge_points(factory_revisions)
# factory_revisions.print()
self.prune_loose_end(factory_revisions)

View File

@@ -2,60 +2,36 @@
import json
from pathlib import Path
import pika
import random
import time
import sys
MY_TASKS_DIR = Path(__file__).parent / "tasks"
connection = pika.BlockingConnection(pika.URLParameters("amqps://opensuse:opensuse@rabbit.opensuse.org"))
channel = connection.channel()
def listen_events():
connection = pika.BlockingConnection(
pika.URLParameters("amqps://opensuse:opensuse@rabbit.opensuse.org")
)
channel = connection.channel()
channel.exchange_declare(exchange='pubsub', exchange_type='topic', passive=True, durable=True)
channel.exchange_declare(
exchange="pubsub", exchange_type="topic", passive=True, durable=False
)
result = channel.queue_declare("", exclusive=True)
queue_name = result.method.queue
result = channel.queue_declare("", exclusive=True)
queue_name = result.method.queue
channel.queue_bind(exchange='pubsub',
queue=queue_name,routing_key='#')
channel.queue_bind(
exchange="pubsub", queue=queue_name, routing_key="opensuse.obs.package.commit"
)
print(' [*] Waiting for logs. To exit press CTRL+C')
print(" [*] Waiting for logs. To exit press CTRL+C")
def callback(ch, method, properties, body):
if method.routing_key not in ("opensuse.obs.package.commit",):
def callback(ch, method, properties, body):
if method.routing_key not in ("opensuse.obs.package.commit",):
return
body = json.loads(body)
if 'project' in body and 'package' in body and body['project'] == 'openSUSE:Factory':
if '/' in body['package']:
return
body = json.loads(body)
if (
"project" in body
and "package" in body
and body["project"] == "openSUSE:Factory"
):
# Strip multibuild flavors
package = body["package"].partition(':')[0]
if "/" in package:
return
(MY_TASKS_DIR / package).touch()
print(" [x] %r:%r" % (method.routing_key, body["package"]))
(MY_TASKS_DIR / body['package']).touch()
print(" [x] %r:%r" % (method.routing_key, body['package']))
channel.basic_consume(queue_name, callback, auto_ack=True)
channel.basic_consume(queue_name,
callback,
auto_ack=True)
channel.start_consuming()
def main():
while True:
try:
listen_events()
except (pika.exceptions.ConnectionClosed, pika.exceptions.AMQPHeartbeatTimeout):
time.sleep(random.randint(10, 100))
if __name__ == "__main__":
main()
channel.start_consuming()

View File

@@ -1,19 +0,0 @@
#!/bin/bash
#
cd /space/dmueller/git-importer
source credentials.sh
while true; do
for i in $PWD/tasks/*; do
if test -f "$i"; then
echo "$(date): Importing $(basename $i)"
if ! python3 ./git-importer.py -c repos/.cache $(basename $i); then
mkdir -p $PWD/failed-tasks
mv -f $i $PWD/failed-tasks
fi
rm -f $i
fi
done
inotifywait -q -e create $PWD/tasks
done