We can reconstruct the state based on log messages. Otherwise, it's incorrect as the trimming function removes exported parts anyway and then we have missing commits
273 lines
11 KiB
Python
273 lines
11 KiB
Python
import logging
|
|
import os
|
|
from urllib.parse import parse_qs
|
|
|
|
import psycopg
|
|
from urllib3.util import url
|
|
import yaml
|
|
|
|
from lib import db
|
|
from lib.binary import is_binary_or_large
|
|
from lib.db import DB
|
|
from lib.git import Git
|
|
from lib.lfs_oid import LFSOid
|
|
from lib.obs import OBS
|
|
from lib.proxy_sha256 import ProxySHA256
|
|
from lib.tree_builder import TreeBuilder
|
|
from lib.user import User
|
|
|
|
def is_number(s):
|
|
try:
|
|
float(s)
|
|
return True
|
|
except ValueError:
|
|
return False
|
|
|
|
class GitExporter:
|
|
def __init__(self, api_url, project, package, repodir, cachedir):
|
|
self.obs = OBS(api_url)
|
|
self.project = project
|
|
self.package = package
|
|
self.db = DB()
|
|
self.proxy_sha256 = ProxySHA256(self.obs, self.db)
|
|
self.git = Git(
|
|
repodir / package,
|
|
committer="Git OBS Bridge",
|
|
committer_email="obsbridge@suse.de",
|
|
)
|
|
if self.git.exists():
|
|
self.git.open()
|
|
else:
|
|
self.git.create()
|
|
self.git.add_gitea_remote(package)
|
|
self.gc_interval = 200
|
|
self.cachedir = cachedir
|
|
|
|
def set_gc_interval(self, gc):
|
|
self.gc_interval = gc
|
|
|
|
def reconstruct_state(self, flats):
|
|
state_data = dict()
|
|
prefix = "OBS-URL: "
|
|
for line in self.git.branch_commit("factory").splitlines():
|
|
if line.startswith(prefix):
|
|
u = url.parse_url(line.strip(prefix))
|
|
if u.path != f"/package/show/openSUSE:Factory/{self.package}" or "rev=" not in u.query:
|
|
continue
|
|
v = parse_qs(u.query)
|
|
rev = v['rev'][0]
|
|
with self.db.cursor() as cur:
|
|
try:
|
|
if is_number(rev):
|
|
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", ('openSUSE:Factory', self.package, rev,))
|
|
else:
|
|
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", ('openSUSE:Factory', self.package, rev, rev))
|
|
except psycopg.Error as e:
|
|
logging.error(e)
|
|
self.db.conn.rollback()
|
|
row = cur.fetchone()
|
|
if not row:
|
|
return state_data
|
|
state_data['factory'] = row[0]
|
|
try:
|
|
print("devel reconstruct")
|
|
d = self.devel_rev(flats)
|
|
if d is not None:
|
|
prj = d.commit.project
|
|
for line in self.git.branch_commit("devel").splitlines():
|
|
if line.startswith(prefix):
|
|
u = url.parse_url(line.strip(prefix))
|
|
if u.path != f"/package/show/{prj}/{self.package}" or u.query is None or "rev=" not in u.query:
|
|
continue
|
|
v = parse_qs(u.query)
|
|
rev = v['rev'][0]
|
|
try:
|
|
with self.db.cursor() as cur:
|
|
logging.debug(f"finding id for ({prj, self.package, rev}")
|
|
if is_number(rev):
|
|
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", (prj, self.package, rev,))
|
|
else:
|
|
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", (prj, self.package, rev,))
|
|
row = cur.fetchone()
|
|
if not row:
|
|
logging.info(" ** cannot find revision for devel branch:", rev)
|
|
return state_data
|
|
state_data['devel'] = row[0]
|
|
except psycopg.Error as e:
|
|
logging.error(e)
|
|
self.db.conn.rollback()
|
|
if state_data['factory'] is not None:
|
|
state_data['devel'] = state_data['factory']
|
|
except:
|
|
if state_data['factory'] is not None:
|
|
state_data['devel'] = state_data['factory']
|
|
return state_data
|
|
|
|
def check_repo_state(self, flats, branch_state, branch):
|
|
state_data = self.reconstruct_state(flats)
|
|
|
|
logging.debug(f"state data: {state_data}")
|
|
left_to_commit = []
|
|
for flat in reversed(flats):
|
|
found_state = False
|
|
if flat.commit.dbid == state_data.get(branch):
|
|
branch_state[branch] = flat.commit
|
|
flat.commit.git_commit = self.git.branch_head(branch)
|
|
logging.debug(
|
|
f"Found {self.git.path}'s {branch} branch in state {flat}"
|
|
)
|
|
left_to_commit = []
|
|
found_state = True
|
|
if not found_state:
|
|
left_to_commit.append(flat)
|
|
return left_to_commit
|
|
|
|
def devel_rev(self, tree):
|
|
for flat in tree:
|
|
if flat.branch == "devel":
|
|
return flat
|
|
return None
|
|
|
|
def export_as_git(self):
|
|
if os.getenv("CHECK_ALL_LFS"):
|
|
LFSOid.check_all(self.db, self.package)
|
|
tree = TreeBuilder(self.db).build(self.project, self.package)
|
|
added_commits = False
|
|
|
|
if tree == None: # eg. python-M2Crypto errors
|
|
return
|
|
flats = tree.as_flat_list()
|
|
branch_state = {"factory": None, "devel": None}
|
|
left_to_commit = self.check_repo_state(flats, branch_state, "factory")
|
|
logging.info(f"Commiting into {self.git.path} {len(left_to_commit)} into factory")
|
|
self.run_gc()
|
|
users = dict()
|
|
|
|
for flat in left_to_commit:
|
|
if flat.commit.userid not in users:
|
|
users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
|
|
flat.user = users[flat.commit.userid]
|
|
self.gc_cnt -= 1
|
|
if self.gc_cnt <= 0 and self.gc_interval:
|
|
self.run_gc()
|
|
logging.debug(f"Committing {flat}")
|
|
self.commit_flat(flat, branch_state)
|
|
added_commits = True
|
|
|
|
# export the devel_tree head commits based on the devel branch
|
|
if self.project == "openSUSE:Factory":
|
|
devel_head = self.devel_rev(flats)
|
|
flat_devel = None
|
|
if devel_head is not None:
|
|
logging.debug(f"building devel revisions chain for {devel_head.commit.project} / {self.package}")
|
|
flat_devel = TreeBuilder(self.db).revisions_chain(devel_head.commit.project, self.package).as_flat_list()
|
|
for f in flat_devel:
|
|
f.branch = "devel"
|
|
|
|
if flat_devel is not None:
|
|
left_to_commit = self.check_repo_state(flat_devel, branch_state, "devel")
|
|
logging.debug(branch_state)
|
|
logging.debug(f"appending {len(left_to_commit)} items on top of devel")
|
|
for flat in left_to_commit:
|
|
if flat.commit.userid not in users:
|
|
users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
|
|
flat.user = users[flat.commit.userid]
|
|
self.gc_cnt -= 1
|
|
if self.gc_cnt <= 0 and self.gc_interval:
|
|
self.run_gc()
|
|
logging.debug(f"Committing {flat}")
|
|
self.commit_flat(flat, branch_state)
|
|
added_commits = True
|
|
|
|
# make sure that we create devel branch
|
|
if not branch_state["devel"]:
|
|
logging.debug("force creating devel")
|
|
self.git.set_branch_head("devel", self.git.branch_head("factory"))
|
|
|
|
#if added_commits:
|
|
# self.git.push(force=True)
|
|
|
|
def run_gc(self):
|
|
self.gc_cnt = self.gc_interval
|
|
self.git.gc()
|
|
|
|
def is_lfs_file(self, package, filename, size):
|
|
if not is_binary_or_large(filename, size):
|
|
return False
|
|
return not self.proxy_sha256.is_text(package, filename)
|
|
|
|
def commit_file(self, flat, file, size, md5):
|
|
# don't export imported _service: files, if any
|
|
if file.name[0:9] == '_service:':
|
|
return
|
|
|
|
# have such files been detected as text mimetype before?
|
|
if self.is_lfs_file(flat.commit.package, file.name, size):
|
|
file_sha256 = self.proxy_sha256.get_or_put(
|
|
flat.commit.project,
|
|
flat.commit.package,
|
|
file.name,
|
|
flat.commit.expanded_srcmd5,
|
|
md5,
|
|
size,
|
|
)
|
|
# as it's newly registered, it might be a text file now, so double check
|
|
if not self.proxy_sha256.is_text(flat.commit.package, file.name):
|
|
self.git.add_lfs(file.name, file_sha256, size)
|
|
return
|
|
self.commit_non_lfs_file(flat, file, md5)
|
|
|
|
def commit_non_lfs_file(self, flat, file, md5):
|
|
self.obs.change_url(flat.commit.api_url)
|
|
self.obs.download(
|
|
flat.commit.project,
|
|
flat.commit.package,
|
|
file.name,
|
|
flat.commit.expanded_srcmd5,
|
|
self.git.path,
|
|
self.cachedir,
|
|
file_md5=md5,
|
|
)
|
|
self.git.add(file)
|
|
|
|
def branch_fits_parent1(self, flat, branch_state):
|
|
if branch_state[flat.branch] is None:
|
|
# everything fits nothing
|
|
return True
|
|
return flat.parent1 == branch_state[flat.branch]
|
|
|
|
def commit_flat(self, flat, branch_state):
|
|
parents = []
|
|
self.git.checkout(flat.branch)
|
|
|
|
if flat.parent1:
|
|
if not self.branch_fits_parent1(flat, branch_state):
|
|
logging.debug(f"Reset {flat.branch} onto {flat.parent1.short_string()}")
|
|
assert flat.parent1.git_commit
|
|
self.git.set_branch_head(flat.branch, flat.parent1.git_commit)
|
|
self.git.checkout(flat.branch)
|
|
parents.append(flat.parent1.git_commit)
|
|
if flat.parent2:
|
|
assert flat.parent2.git_commit
|
|
parents.append(flat.parent2.git_commit)
|
|
|
|
# create file if not existant
|
|
self.git.add_default_lfs_gitattributes(force=False)
|
|
self.git.add_default_gitignore()
|
|
|
|
to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
|
|
for file in to_delete:
|
|
self.git.remove(file)
|
|
for file, size, md5 in to_download:
|
|
self.commit_file(flat, file, size, md5)
|
|
|
|
commit = self.git.commit(
|
|
flat.user.realname,
|
|
flat.user.email,
|
|
flat.commit.commit_time,
|
|
flat.commit.git_commit_message(),
|
|
parents=parents,
|
|
)
|
|
flat.commit.git_commit = commit
|
|
branch_state[flat.branch] = flat.commit
|