13 Commits
devel ... main

Author SHA1 Message Date
5a28f62fb9 reconstruct state data
If the state file is missing, we can reconstruct which parts were
exported based on revision ids

Also, packages could have branches in Git, but not be in Git. We
need to check (project,package) tuple for this and not just abort
based on the package name alone.
2025-08-09 18:09:35 +02:00
17888407df Include all the devel commits
If commits in devel are younger than factory, we should
probalby export them too....
2025-08-09 18:09:35 +02:00
d5eb5c0db6 Don't export service files from overlay 2025-08-09 18:09:35 +02:00
5a6a55868f Look at deleted packages if they are not found 2025-08-09 18:09:35 +02:00
5d55a8c2fe Empty files should still have a mimetype assigned 2025-08-09 18:09:35 +02:00
32d1924a0d Ignore OBS service generated files
_service: files should not be imported. They are created
by OBS service and will be re-created by OBS during scm sync

Also, not allowed in Factory by policy anyway. So this affects
devel and home projects only
2025-08-09 18:09:35 +02:00
7684388193 re-enable LFS checks 2025-08-09 18:09:35 +02:00
4ef980d1c8 fix bug 2025-02-21 17:24:23 +01:00
fb80d0c105 Revert "Only stop importing when it isn't a jengelh repository"
This reverts commit 44b4d690db.

this breaks detection of scmsync projects. Jan can live with not synced
git for a few weeks
2025-02-21 13:24:40 +01:00
Dirk Müller
44b4d690db Only stop importing when it isn't a jengelh repository 2024-12-02 09:34:49 +01:00
Dirk Müller
a69e861614 Switch the operating organization on the "pool" 2024-12-02 09:33:52 +01:00
Dirk Müller
1da740bd8b Strip multibuild flavors from monitoring 2024-09-09 09:34:12 +02:00
Dirk Mueller
b3107ba3bf Merge pull request 'Stop importing/exporting scmsync packages/projects' (#32) from adamm/git-importer:option_for_non_factory into main
Reviewed-on: importers/git-importer#32
Reviewed-by: Dirk Mueller <dirkmueller@noreply@src.opensuse.org>
2024-09-03 12:40:00 +02:00
10 changed files with 168 additions and 36 deletions

View File

@@ -119,7 +119,7 @@ def main():
importer = Importer(URL_OBS, args.project, args.packages) importer = Importer(URL_OBS, args.project, args.packages)
importer.import_into_db() importer.import_into_db()
for package in args.packages: for package in args.packages:
if not importer.package_with_scmsync(package): if not importer.package_with_scmsync(args.project, package):
export_package(args.project, package, args.repodir, args.cachedir, args.gc) export_package(args.project, package, args.repodir, args.cachedir, args.gc)
else: else:
logging.debug(f"{args.project}/{package} has scmsync links - skipping export") logging.debug(f"{args.project}/{package} has scmsync links - skipping export")

View File

@@ -204,6 +204,11 @@ class DBRevision:
and self.package == "_project" and self.package == "_project"
): ):
continue continue
# do not import _service:* files as those are created by OBS on source imports
if entry.get("name")[0:9] == "_service:":
continue
cur.execute( cur.execute(
"""INSERT INTO files (name, md5, size, mtime, revision_id) """INSERT INTO files (name, md5, size, mtime, revision_id)
VALUES (%s,%s,%s,%s,%s)""", VALUES (%s,%s,%s,%s,%s)""",

View File

@@ -20,7 +20,7 @@ class FlatTreeWalker(AbstractWalker):
def __init__(self, rebase_devel=False) -> None: def __init__(self, rebase_devel=False) -> None:
super().__init__() super().__init__()
self.flats = [] self.flats:list[FlatNode] = []
# the rebase_devel won't work as such as rebasing the branch needs an explicit action # the rebase_devel won't work as such as rebasing the branch needs an explicit action
self.rebase_devel = rebase_devel self.rebase_devel = rebase_devel
# remember the last merge point so we can know the parent of it for the root of the sources # remember the last merge point so we can know the parent of it for the root of the sources

View File

@@ -160,6 +160,12 @@ class Git:
.strip() .strip()
) )
def branch_commit(self, branch="HEAD"):
try:
return (self.git_run(["cat-file", "commit", branch], stdout=subprocess.PIPE).stdout.decode("utf-8").strip())
except:
return ''
def set_branch_head(self, branch, commit): def set_branch_head(self, branch, commit):
return self.git_run(["update-ref", f"refs/heads/{branch}", commit]) return self.git_run(["update-ref", f"refs/heads/{branch}", commit])
@@ -251,7 +257,7 @@ class Git:
def add_gitea_remote(self, package): def add_gitea_remote(self, package):
repo_name = package.replace("+", "_") repo_name = package.replace("+", "_")
org_name = "rpm" org_name = "pool"
if not os.getenv("GITEA_TOKEN"): if not os.getenv("GITEA_TOKEN"):
logging.warning("Not adding a remote due to missing $GITEA_TOKEN") logging.warning("Not adding a remote due to missing $GITEA_TOKEN")

View File

@@ -1,8 +1,12 @@
import logging import logging
import os import os
from urllib.parse import parse_qs
import psycopg
from urllib3.util import url
import yaml import yaml
from lib import db
from lib.binary import is_binary_or_large from lib.binary import is_binary_or_large
from lib.db import DB from lib.db import DB
from lib.git import Git from lib.git import Git
@@ -12,6 +16,12 @@ from lib.proxy_sha256 import ProxySHA256
from lib.tree_builder import TreeBuilder from lib.tree_builder import TreeBuilder
from lib.user import User from lib.user import User
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
class GitExporter: class GitExporter:
def __init__(self, api_url, project, package, repodir, cachedir): def __init__(self, api_url, project, package, repodir, cachedir):
@@ -37,17 +47,77 @@ class GitExporter:
def set_gc_interval(self, gc): def set_gc_interval(self, gc):
self.gc_interval = gc self.gc_interval = gc
def check_repo_state(self, flats, branch_state): def reconstruct_state(self, flats):
state_data = dict()
prefix = "OBS-URL: "
for line in self.git.branch_commit("factory").splitlines():
if line.startswith(prefix):
u = url.parse_url(line.strip(prefix))
if u.path != f"/package/show/openSUSE:Factory/{self.package}" or "rev=" not in u.query:
continue
v = parse_qs(u.query)
rev = v['rev'][0]
with self.db.cursor() as cur:
try:
if is_number(rev):
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", ('openSUSE:Factory', self.package, rev,))
else:
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", ('openSUSE:Factory', self.package, rev, rev))
except psycopg.Error as e:
logging.error(e)
self.db.conn.rollback()
row = cur.fetchone()
if not row:
return state_data
state_data['factory'] = row[0]
try:
print("devel reconstruct")
d = self.devel_rev(flats)
if d is not None:
prj = d.commit.project
for line in self.git.branch_commit("devel").splitlines():
if line.startswith(prefix):
u = url.parse_url(line.strip(prefix))
if u.path != f"/package/show/{prj}/{self.package}" or u.query is None or "rev=" not in u.query:
continue
v = parse_qs(u.query)
rev = v['rev'][0]
try:
with self.db.cursor() as cur:
logging.debug(f"finding id for ({prj, self.package, rev}")
if is_number(rev):
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", (prj, self.package, rev,))
else:
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", (prj, self.package, rev,))
row = cur.fetchone()
if not row:
logging.info(" ** cannot find revision for devel branch:", rev)
return state_data
state_data['devel'] = row[0]
except psycopg.Error as e:
logging.error(e)
self.db.conn.rollback()
if state_data['factory'] is not None:
state_data['devel'] = state_data['factory']
except:
if state_data['factory'] is not None:
state_data['devel'] = state_data['factory']
return state_data
def check_repo_state(self, flats, branch_state, branch):
state_data = dict() state_data = dict()
if os.path.exists(self.state_file): if os.path.exists(self.state_file):
with open(self.state_file) as f: with open(self.state_file) as f:
state_data = yaml.safe_load(f) state_data = yaml.safe_load(f)
if not isinstance(state_data, dict): if not isinstance(state_data, dict):
state_data = {} state_data = {}
else:
state_data = self.reconstruct_state(flats)
logging.debug(f"state data: {state_data}")
left_to_commit = [] left_to_commit = []
for flat in reversed(flats): for flat in reversed(flats):
found_state = False found_state = False
for branch in ["factory"]:
if flat.commit.dbid == state_data.get(branch): if flat.commit.dbid == state_data.get(branch):
branch_state[branch] = flat.commit branch_state[branch] = flat.commit
flat.commit.git_commit = self.git.branch_head(branch) flat.commit.git_commit = self.git.branch_head(branch)
@@ -60,19 +130,24 @@ class GitExporter:
left_to_commit.append(flat) left_to_commit.append(flat)
return left_to_commit return left_to_commit
def devel_rev(self, tree):
for flat in tree:
if flat.branch == "devel":
return flat
return None
def export_as_git(self): def export_as_git(self):
if os.getenv("CHECK_ALL_LFS"): if os.getenv("CHECK_ALL_LFS"):
LFSOid.check_all(self.db, self.package) LFSOid.check_all(self.db, self.package)
tree = TreeBuilder(self.db).build(self.project, self.package) tree = TreeBuilder(self.db).build(self.project, self.package)
flats = tree.as_flat_list() added_commits = False
branch_state = {"factory": None, "devel": None} if tree == None: # eg. python-M2Crypto errors
left_to_commit = self.check_repo_state(flats, branch_state)
if not left_to_commit:
return return
flats = tree.as_flat_list()
logging.info(f"Commiting into {self.git.path}") branch_state = {"factory": None, "devel": None}
left_to_commit = self.check_repo_state(flats, branch_state, "factory")
logging.info(f"Commiting into {self.git.path} {len(left_to_commit)} into factory")
self.run_gc() self.run_gc()
users = dict() users = dict()
@@ -85,13 +160,40 @@ class GitExporter:
self.run_gc() self.run_gc()
logging.debug(f"Committing {flat}") logging.debug(f"Committing {flat}")
self.commit_flat(flat, branch_state) self.commit_flat(flat, branch_state)
added_commits = True
# export the devel_tree head commits based on the devel branch
if self.project == "openSUSE:Factory":
devel_head = self.devel_rev(flats)
flat_devel = None
if devel_head is not None:
logging.debug(f"building devel revisions chain for {devel_head.commit.project} / {self.package}")
flat_devel = TreeBuilder(self.db).revisions_chain(devel_head.commit.project, self.package).as_flat_list()
for f in flat_devel:
f.branch = "devel"
if flat_devel is not None:
left_to_commit = self.check_repo_state(flat_devel, branch_state, "devel")
logging.debug(branch_state)
logging.debug(f"appending {len(left_to_commit)} items on top of devel")
for flat in left_to_commit:
if flat.commit.userid not in users:
users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
flat.user = users[flat.commit.userid]
self.gc_cnt -= 1
if self.gc_cnt <= 0 and self.gc_interval:
self.run_gc()
logging.debug(f"Committing {flat}")
self.commit_flat(flat, branch_state)
added_commits = True
# make sure that we create devel branch # make sure that we create devel branch
if not branch_state["devel"]: if not branch_state["devel"]:
logging.debug("force creating devel") logging.debug("force creating devel")
self.git.set_branch_head("devel", self.git.branch_head("factory")) self.git.set_branch_head("devel", self.git.branch_head("factory"))
self.git.push(force=True) #if added_commits:
# self.git.push(force=True)
def run_gc(self): def run_gc(self):
self.gc_cnt = self.gc_interval self.gc_cnt = self.gc_interval
@@ -103,6 +205,10 @@ class GitExporter:
return not self.proxy_sha256.is_text(package, filename) return not self.proxy_sha256.is_text(package, filename)
def commit_file(self, flat, file, size, md5): def commit_file(self, flat, file, size, md5):
# don't export imported _service: files, if any
if file.name[0:9] == '_service:':
return
# have such files been detected as text mimetype before? # have such files been detected as text mimetype before?
if self.is_lfs_file(flat.commit.package, file.name, size): if self.is_lfs_file(flat.commit.package, file.name, size):
file_sha256 = self.proxy_sha256.get_or_put( file_sha256 = self.proxy_sha256.get_or_put(

View File

@@ -42,6 +42,8 @@ class Importer:
def update_db_package(self, project, package): def update_db_package(self, project, package):
root = self.obs._history(project, package) root = self.obs._history(project, package)
if root is None: if root is None:
if self.project == "openSUSE:Factory" and project == self.project:
exit(10)
return return
latest = DBRevision.max_rev(self.db, project, package) latest = DBRevision.max_rev(self.db, project, package)
for r in root.findall("revision"): for r in root.findall("revision"):
@@ -217,7 +219,7 @@ class Importer:
logging.debug(f"Refresh {project}/{package}") logging.debug(f"Refresh {project}/{package}")
self.refreshed_packages.add(key) self.refreshed_packages.add(key)
if self.has_scmsync(project) or self.has_scmsync(key): if self.has_scmsync(project) or self.has_scmsync(key):
self.packages_with_scmsync.add(package) self.packages_with_scmsync.add((project, package))
logging.debug(f"{project}/{package} already in Git - skipping") logging.debug(f"{project}/{package} already in Git - skipping")
return return
self.update_db_package(project, package) self.update_db_package(project, package)
@@ -274,6 +276,6 @@ class Importer:
self.scmsync_cache[key] = scmsync_exists self.scmsync_cache[key] = scmsync_exists
return scmsync_exists return scmsync_exists
def package_with_scmsync(self, package): def package_with_scmsync(self, project, package):
return package in self.packages_with_scmsync return (project, package) in self.packages_with_scmsync

View File

@@ -148,12 +148,21 @@ class OBS:
] ]
def _download(self, project, package, name, revision): def _download(self, project, package, name, revision):
try:
url = osc.core.makeurl( url = osc.core.makeurl(
self.url, self.url,
["source", project, package, name], ["source", project, package, name],
{"rev": revision, "expand": 1}, {"rev": revision, "expand": 1},
) )
return osc.core.http_GET(url) return osc.core.http_GET(url)
except HTTPError as e:
if e.status == 404:
url = osc.core.makeurl(
self.url,
["source", project, package, name],
{"rev": revision, "expand": 1, "deleted": 1},
)
return osc.core.http_GET(url)
def download( def download(
self, self,
@@ -189,7 +198,7 @@ class OBS:
try: try:
root = self._xml(f"source/{project}/{package}", **params) root = self._xml(f"source/{project}/{package}", **params)
except HTTPError as e: except HTTPError as e:
if e.code == 400: if e.code == 400 or e.code == 404:
logging.error( logging.error(
f"Package [{project}/{package} {params}] can't be expanded: {e}" f"Package [{project}/{package} {params}] can't be expanded: {e}"
) )

View File

@@ -50,12 +50,12 @@ class ProxySHA256:
sha = hashlib.sha256() sha = hashlib.sha256()
while True: while True:
buffer = fin.read(10000) buffer = fin.read(10000)
if not buffer:
break
sha.update(buffer)
# only guess from the first 10K # only guess from the first 10K
if not mimetype: if not mimetype:
mimetype = self.mime.from_buffer(buffer) mimetype = self.mime.from_buffer(buffer)
if not buffer:
break
sha.update(buffer)
fin.close() fin.close()
LFSOid(self.db).add( LFSOid(self.db).add(
project, package, name, revision, sha.hexdigest(), size, mimetype, file_md5 project, package, name, revision, sha.hexdigest(), size, mimetype, file_md5

View File

@@ -216,6 +216,8 @@ class TreeBuilder:
def build(self, project, package): def build(self, project, package):
"""Create a Factory tree (returning the top)""" """Create a Factory tree (returning the top)"""
factory_revisions = self.revisions_chain(project, package) factory_revisions = self.revisions_chain(project, package)
if factory_revisions == None:
return None
self.add_merge_points(factory_revisions) self.add_merge_points(factory_revisions)
# factory_revisions.print() # factory_revisions.print()
self.prune_loose_end(factory_revisions) self.prune_loose_end(factory_revisions)

View File

@@ -36,10 +36,12 @@ def listen_events():
and "package" in body and "package" in body
and body["project"] == "openSUSE:Factory" and body["project"] == "openSUSE:Factory"
): ):
if "/" in body["package"]: # Strip multibuild flavors
package = body["package"].partition(':')[0]
if "/" in package:
return return
(MY_TASKS_DIR / body["package"]).touch() (MY_TASKS_DIR / package).touch()
print(" [x] %r:%r" % (method.routing_key, body["package"])) print(" [x] %r:%r" % (method.routing_key, body["package"]))
channel.basic_consume(queue_name, callback, auto_ack=True) channel.basic_consume(queue_name, callback, auto_ack=True)