Files
git-importer/lib/git_exporter.py
2025-10-08 14:32:33 +02:00

272 lines
11 KiB
Python

import logging
import os
from urllib.parse import parse_qs
import psycopg
from urllib3.util import url
import yaml
from lib import db
from lib.binary import is_binary_or_large
from lib.db import DB
from lib.git import Git
from lib.lfs_oid import LFSOid
from lib.obs import OBS
from lib.proxy_sha256 import ProxySHA256
from lib.tree_builder import TreeBuilder
from lib.user import User
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
class GitExporter:
def __init__(self, api_url, project, package, repodir, cachedir):
self.obs = OBS(api_url)
self.project = project
self.package = package
self.db = DB()
self.proxy_sha256 = ProxySHA256(self.obs, self.db)
self.git = Git(
repodir / package,
committer="Git OBS Bridge",
committer_email="obsbridge@suse.de",
)
if self.git.exists():
self.git.open()
else:
self.git.create()
self.git.add_gitea_remote(package)
self.gc_interval = 200
self.cachedir = cachedir
def set_gc_interval(self, gc):
self.gc_interval = gc
def reconstruct_state(self, flats):
state_data = dict()
prefix = "OBS-URL: "
for line in self.git.branch_commit("factory").splitlines():
if line.startswith(prefix):
u = url.parse_url(line.strip(prefix))
if u.path != f"/package/show/openSUSE:Factory/{self.package}" or "rev=" not in u.query:
continue
v = parse_qs(u.query)
rev = v['rev'][0]
with self.db.cursor() as cur:
try:
if is_number(rev):
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", ('openSUSE:Factory', self.package, rev,))
else:
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", ('openSUSE:Factory', self.package, rev, rev))
except psycopg.Error as e:
logging.error(e)
self.db.conn.rollback()
row = cur.fetchone()
if not row:
return state_data
state_data['factory'] = row[0]
try:
d = self.devel_rev(flats)
if d is not None:
prj = d.commit.project
for line in self.git.branch_commit("devel").splitlines():
if line.startswith(prefix):
u = url.parse_url(line.strip(prefix))
if u.path != f"/package/show/{prj}/{self.package}" or u.query is None or "rev=" not in u.query:
continue
v = parse_qs(u.query)
rev = v['rev'][0]
try:
with self.db.cursor() as cur:
logging.debug(f"finding id for ({prj, self.package, rev}")
if is_number(rev):
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", (prj, self.package, rev,))
else:
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", (prj, self.package, rev,))
row = cur.fetchone()
if not row:
logging.info(" ** cannot find revision for devel branch:", rev)
return state_data
state_data['devel'] = row[0]
except psycopg.Error as e:
logging.error(e)
self.db.conn.rollback()
if state_data['factory'] is not None:
state_data['devel'] = state_data['factory']
except:
if state_data['factory'] is not None:
state_data['devel'] = state_data['factory']
return state_data
def check_repo_state(self, flats, branch_state, branch):
state_data = self.reconstruct_state(flats)
logging.debug(f"state data: {state_data}")
left_to_commit = []
for flat in reversed(flats):
found_state = False
if flat.commit.dbid == state_data.get(branch):
branch_state[branch] = flat.commit
flat.commit.git_commit = self.git.branch_head(branch)
logging.debug(
f"Found {self.git.path}'s {branch} branch in state {flat}"
)
left_to_commit = []
found_state = True
if not found_state:
left_to_commit.append(flat)
return left_to_commit
def devel_rev(self, tree):
for flat in tree:
if flat.branch == "devel":
return flat
return None
def export_as_git(self):
if os.getenv("CHECK_ALL_LFS"):
LFSOid.check_all(self.db, self.package)
tree = TreeBuilder(self.db).build(self.project, self.package)
added_commits = False
if tree == None: # eg. python-M2Crypto errors
return
flats = tree.as_flat_list()
branch_state = {"factory": None, "devel": None}
left_to_commit = self.check_repo_state(flats, branch_state, "factory")
logging.info(f"Commiting into {self.git.path} {len(left_to_commit)} into factory")
self.run_gc()
users = dict()
for flat in left_to_commit:
if flat.commit.userid not in users:
users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
flat.user = users[flat.commit.userid]
self.gc_cnt -= 1
if self.gc_cnt <= 0 and self.gc_interval:
self.run_gc()
logging.debug(f"Committing {flat}")
self.commit_flat(flat, branch_state)
added_commits = True
# export the devel_tree head commits based on the devel branch
if self.project == "openSUSE:Factory":
devel_head = self.devel_rev(flats)
flat_devel = None
if devel_head is not None:
logging.debug(f"building devel revisions chain for {devel_head.commit.project} / {self.package}")
flat_devel = TreeBuilder(self.db).revisions_chain(devel_head.commit.project, self.package).as_flat_list()
for f in flat_devel:
f.branch = "devel"
if flat_devel is not None:
left_to_commit = self.check_repo_state(flat_devel, branch_state, "devel")
logging.debug(branch_state)
logging.debug(f"appending {len(left_to_commit)} items on top of devel")
for flat in left_to_commit:
if flat.commit.userid not in users:
users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
flat.user = users[flat.commit.userid]
self.gc_cnt -= 1
if self.gc_cnt <= 0 and self.gc_interval:
self.run_gc()
logging.debug(f"Committing {flat}")
self.commit_flat(flat, branch_state)
added_commits = True
# make sure that we create devel branch
# if not branch_state["devel"]:
# logging.debug("force creating devel")
# self.git.set_branch_head("devel", self.git.branch_head("factory"))
if added_commits:
self.git.push(force=True)
def run_gc(self):
self.gc_cnt = self.gc_interval
self.git.gc()
def is_lfs_file(self, package, filename, size):
if not is_binary_or_large(filename, size):
return False
return not self.proxy_sha256.is_text(package, filename)
def commit_file(self, flat, file, size, md5):
# don't export imported _service: files, if any
if file.name[0:9] == '_service:':
return
# have such files been detected as text mimetype before?
if self.is_lfs_file(flat.commit.package, file.name, size):
file_sha256 = self.proxy_sha256.get_or_put(
flat.commit.project,
flat.commit.package,
file.name,
flat.commit.expanded_srcmd5,
md5,
size,
)
# as it's newly registered, it might be a text file now, so double check
if not self.proxy_sha256.is_text(flat.commit.package, file.name):
self.git.add_lfs(file.name, file_sha256, size)
return
self.commit_non_lfs_file(flat, file, md5)
def commit_non_lfs_file(self, flat, file, md5):
self.obs.change_url(flat.commit.api_url)
self.obs.download(
flat.commit.project,
flat.commit.package,
file.name,
flat.commit.expanded_srcmd5,
self.git.path,
self.cachedir,
file_md5=md5,
)
self.git.add(file)
def branch_fits_parent1(self, flat, branch_state):
if branch_state[flat.branch] is None:
# everything fits nothing
return True
return flat.parent1 == branch_state[flat.branch]
def commit_flat(self, flat, branch_state):
parents = []
self.git.checkout(flat.branch)
if flat.parent1:
if not self.branch_fits_parent1(flat, branch_state):
logging.debug(f"Reset {flat.branch} onto {flat.parent1.short_string()}")
assert flat.parent1.git_commit
self.git.set_branch_head(flat.branch, flat.parent1.git_commit)
self.git.checkout(flat.branch)
parents.append(flat.parent1.git_commit)
if flat.parent2:
assert flat.parent2.git_commit
parents.append(flat.parent2.git_commit)
# create file if not existant
self.git.add_default_lfs_gitattributes(force=False)
self.git.add_default_gitignore()
to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
for file in to_delete:
self.git.remove(file)
for file, size, md5 in to_download:
self.commit_file(flat, file, size, md5)
commit = self.git.commit(
flat.user.realname,
flat.user.email,
flat.commit.commit_time,
flat.commit.git_commit_message(),
parents=parents,
)
flat.commit.git_commit = commit
branch_state[flat.branch] = flat.commit