reconstruct state data

If the state file is missing, we can reconstruct which parts were
exported based on revision ids

Also, packages could have branches in Git, but not be in Git. We
need to check (project,package) tuple for this and not just abort
based on the package name alone.
This commit is contained in:
2025-08-09 18:06:35 +02:00
parent 17888407df
commit 5a28f62fb9
5 changed files with 102 additions and 23 deletions

View File

@@ -119,7 +119,7 @@ def main():
importer = Importer(URL_OBS, args.project, args.packages) importer = Importer(URL_OBS, args.project, args.packages)
importer.import_into_db() importer.import_into_db()
for package in args.packages: for package in args.packages:
if not importer.package_with_scmsync(package): if not importer.package_with_scmsync(args.project, package):
export_package(args.project, package, args.repodir, args.cachedir, args.gc) export_package(args.project, package, args.repodir, args.cachedir, args.gc)
else: else:
logging.debug(f"{args.project}/{package} has scmsync links - skipping export") logging.debug(f"{args.project}/{package} has scmsync links - skipping export")

View File

@@ -20,7 +20,7 @@ class FlatTreeWalker(AbstractWalker):
def __init__(self, rebase_devel=False) -> None: def __init__(self, rebase_devel=False) -> None:
super().__init__() super().__init__()
self.flats = [] self.flats:list[FlatNode] = []
# the rebase_devel won't work as such as rebasing the branch needs an explicit action # the rebase_devel won't work as such as rebasing the branch needs an explicit action
self.rebase_devel = rebase_devel self.rebase_devel = rebase_devel
# remember the last merge point so we can know the parent of it for the root of the sources # remember the last merge point so we can know the parent of it for the root of the sources

View File

@@ -160,6 +160,12 @@ class Git:
.strip() .strip()
) )
def branch_commit(self, branch="HEAD"):
try:
return (self.git_run(["cat-file", "commit", branch], stdout=subprocess.PIPE).stdout.decode("utf-8").strip())
except:
return ''
def set_branch_head(self, branch, commit): def set_branch_head(self, branch, commit):
return self.git_run(["update-ref", f"refs/heads/{branch}", commit]) return self.git_run(["update-ref", f"refs/heads/{branch}", commit])

View File

@@ -1,8 +1,12 @@
import logging import logging
import os import os
from urllib.parse import parse_qs
import psycopg
from urllib3.util import url
import yaml import yaml
from lib import db
from lib.binary import is_binary_or_large from lib.binary import is_binary_or_large
from lib.db import DB from lib.db import DB
from lib.git import Git from lib.git import Git
@@ -12,6 +16,12 @@ from lib.proxy_sha256 import ProxySHA256
from lib.tree_builder import TreeBuilder from lib.tree_builder import TreeBuilder
from lib.user import User from lib.user import User
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
class GitExporter: class GitExporter:
def __init__(self, api_url, project, package, repodir, cachedir): def __init__(self, api_url, project, package, repodir, cachedir):
@@ -37,6 +47,63 @@ class GitExporter:
def set_gc_interval(self, gc): def set_gc_interval(self, gc):
self.gc_interval = gc self.gc_interval = gc
def reconstruct_state(self, flats):
state_data = dict()
prefix = "OBS-URL: "
for line in self.git.branch_commit("factory").splitlines():
if line.startswith(prefix):
u = url.parse_url(line.strip(prefix))
if u.path != f"/package/show/openSUSE:Factory/{self.package}" or "rev=" not in u.query:
continue
v = parse_qs(u.query)
rev = v['rev'][0]
with self.db.cursor() as cur:
try:
if is_number(rev):
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", ('openSUSE:Factory', self.package, rev,))
else:
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", ('openSUSE:Factory', self.package, rev, rev))
except psycopg.Error as e:
logging.error(e)
self.db.conn.rollback()
row = cur.fetchone()
if not row:
return state_data
state_data['factory'] = row[0]
try:
print("devel reconstruct")
d = self.devel_rev(flats)
if d is not None:
prj = d.commit.project
for line in self.git.branch_commit("devel").splitlines():
if line.startswith(prefix):
u = url.parse_url(line.strip(prefix))
if u.path != f"/package/show/{prj}/{self.package}" or u.query is None or "rev=" not in u.query:
continue
v = parse_qs(u.query)
rev = v['rev'][0]
try:
with self.db.cursor() as cur:
logging.debug(f"finding id for ({prj, self.package, rev}")
if is_number(rev):
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", (prj, self.package, rev,))
else:
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", (prj, self.package, rev,))
row = cur.fetchone()
if not row:
logging.info(" ** cannot find revision for devel branch:", rev)
return state_data
state_data['devel'] = row[0]
except psycopg.Error as e:
logging.error(e)
self.db.conn.rollback()
if state_data['factory'] is not None:
state_data['devel'] = state_data['factory']
except:
if state_data['factory'] is not None:
state_data['devel'] = state_data['factory']
return state_data
def check_repo_state(self, flats, branch_state, branch): def check_repo_state(self, flats, branch_state, branch):
state_data = dict() state_data = dict()
if os.path.exists(self.state_file): if os.path.exists(self.state_file):
@@ -44,6 +111,10 @@ class GitExporter:
state_data = yaml.safe_load(f) state_data = yaml.safe_load(f)
if not isinstance(state_data, dict): if not isinstance(state_data, dict):
state_data = {} state_data = {}
else:
state_data = self.reconstruct_state(flats)
logging.debug(f"state data: {state_data}")
left_to_commit = [] left_to_commit = []
for flat in reversed(flats): for flat in reversed(flats):
found_state = False found_state = False
@@ -75,10 +146,8 @@ class GitExporter:
return return
flats = tree.as_flat_list() flats = tree.as_flat_list()
branch_state = {"factory": None, "devel": None} branch_state = {"factory": None, "devel": None}
self.check_repo_state(flats, branch_state, "devel") # used to set branch_state only
left_to_commit = self.check_repo_state(flats, branch_state, "factory") left_to_commit = self.check_repo_state(flats, branch_state, "factory")
logging.info(f"Commiting into {self.git.path} {len(left_to_commit)} into factory")
logging.info(f"Commiting into {self.git.path}")
self.run_gc() self.run_gc()
users = dict() users = dict()
@@ -98,31 +167,33 @@ class GitExporter:
devel_head = self.devel_rev(flats) devel_head = self.devel_rev(flats)
flat_devel = None flat_devel = None
if devel_head is not None: if devel_head is not None:
logging.debug(f"building devel revisions chain for {devel_head.commit.project} / {self.package}")
flat_devel = TreeBuilder(self.db).revisions_chain(devel_head.commit.project, self.package).as_flat_list() flat_devel = TreeBuilder(self.db).revisions_chain(devel_head.commit.project, self.package).as_flat_list()
for f in flat_devel: for f in flat_devel:
f.branch = "devel" f.branch = "devel"
left_to_commit = self.check_repo_state(flat_devel, branch_state, "devel") if flat_devel is not None:
print(branch_state) left_to_commit = self.check_repo_state(flat_devel, branch_state, "devel")
logging.debug(f"appending {len(left_to_commit)} items on top of devel") logging.debug(branch_state)
for flat in left_to_commit: logging.debug(f"appending {len(left_to_commit)} items on top of devel")
if flat.commit.userid not in users: for flat in left_to_commit:
users[flat.commit.userid] = User.find(self.db, flat.commit.userid) if flat.commit.userid not in users:
flat.user = users[flat.commit.userid] users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
self.gc_cnt -= 1 flat.user = users[flat.commit.userid]
if self.gc_cnt <= 0 and self.gc_interval: self.gc_cnt -= 1
self.run_gc() if self.gc_cnt <= 0 and self.gc_interval:
logging.debug(f"Committing {flat}") self.run_gc()
self.commit_flat(flat, branch_state) logging.debug(f"Committing {flat}")
added_commits = True self.commit_flat(flat, branch_state)
added_commits = True
# make sure that we create devel branch # make sure that we create devel branch
if not branch_state["devel"]: if not branch_state["devel"]:
logging.debug("force creating devel") logging.debug("force creating devel")
self.git.set_branch_head("devel", self.git.branch_head("factory")) self.git.set_branch_head("devel", self.git.branch_head("factory"))
if added_commits: #if added_commits:
self.git.push(force=True) # self.git.push(force=True)
def run_gc(self): def run_gc(self):
self.gc_cnt = self.gc_interval self.gc_cnt = self.gc_interval

View File

@@ -42,6 +42,8 @@ class Importer:
def update_db_package(self, project, package): def update_db_package(self, project, package):
root = self.obs._history(project, package) root = self.obs._history(project, package)
if root is None: if root is None:
if self.project == "openSUSE:Factory" and project == self.project:
exit(10)
return return
latest = DBRevision.max_rev(self.db, project, package) latest = DBRevision.max_rev(self.db, project, package)
for r in root.findall("revision"): for r in root.findall("revision"):
@@ -217,7 +219,7 @@ class Importer:
logging.debug(f"Refresh {project}/{package}") logging.debug(f"Refresh {project}/{package}")
self.refreshed_packages.add(key) self.refreshed_packages.add(key)
if self.has_scmsync(project) or self.has_scmsync(key): if self.has_scmsync(project) or self.has_scmsync(key):
self.packages_with_scmsync.add(package) self.packages_with_scmsync.add((project, package))
logging.debug(f"{project}/{package} already in Git - skipping") logging.debug(f"{project}/{package} already in Git - skipping")
return return
self.update_db_package(project, package) self.update_db_package(project, package)
@@ -274,6 +276,6 @@ class Importer:
self.scmsync_cache[key] = scmsync_exists self.scmsync_cache[key] = scmsync_exists
return scmsync_exists return scmsync_exists
def package_with_scmsync(self, package): def package_with_scmsync(self, project, package):
return package in self.packages_with_scmsync return (project, package) in self.packages_with_scmsync