git-importer/lib/git_exporter.py

176 lines
6.0 KiB
Python
Raw Permalink Normal View History

import logging
import os
import yaml
from lib.binary import is_binary_or_large
from lib.db import DB
from lib.git import Git
from lib.lfs_oid import LFSOid
from lib.obs import OBS
2022-11-06 09:49:52 +01:00
from lib.proxy_sha256 import ProxySHA256
from lib.tree_builder import TreeBuilder
2022-11-06 09:49:52 +01:00
from lib.user import User
class GitExporter:
def __init__(self, api_url, project, package, repodir, cachedir):
self.obs = OBS(api_url)
self.project = project
self.package = package
self.db = DB()
self.proxy_sha256 = ProxySHA256(self.obs, self.db)
self.git = Git(
repodir / package,
committer="Git OBS Bridge",
committer_email="obsbridge@suse.de",
)
if self.git.exists():
self.git.open()
else:
self.git.create()
self.git.add_gitea_remote(package)
self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
self.gc_interval = 200
self.cachedir = cachedir
def set_gc_interval(self, gc):
self.gc_interval = gc
2022-11-06 09:49:52 +01:00
def check_repo_state(self, flats, branch_state):
state_data = dict()
if os.path.exists(self.state_file):
2024-05-16 11:18:42 +02:00
with open(self.state_file) as f:
state_data = yaml.safe_load(f)
2024-05-16 15:47:45 +02:00
if not isinstance(state_data, dict):
state_data = {}
left_to_commit = []
for flat in reversed(flats):
found_state = False
for branch in ["factory", "devel"]:
if flat.commit.dbid == state_data.get(branch):
branch_state[branch] = flat.commit
flat.commit.git_commit = self.git.branch_head(branch)
logging.debug(
f"Found {self.git.path}'s {branch} branch in state {flat}"
)
left_to_commit = []
found_state = True
if not found_state:
left_to_commit.append(flat)
2022-11-06 09:49:52 +01:00
return left_to_commit
def export_as_git(self):
if os.getenv("CHECK_ALL_LFS"):
LFSOid.check_all(self.db, self.package)
tree = TreeBuilder(self.db).build(self.project, self.package)
2022-11-06 09:49:52 +01:00
flats = tree.as_flat_list()
branch_state = {"factory": None, "devel": None}
left_to_commit = self.check_repo_state(flats, branch_state)
if not left_to_commit:
return
logging.info(f"Commiting into {self.git.path}")
self.run_gc()
users = dict()
for flat in left_to_commit:
2022-11-06 09:49:52 +01:00
if flat.commit.userid not in users:
users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
2022-11-06 09:49:52 +01:00
flat.user = users[flat.commit.userid]
self.gc_cnt -= 1
if self.gc_cnt <= 0 and self.gc_interval:
self.run_gc()
logging.debug(f"Committing {flat}")
self.commit_flat(flat, branch_state)
self.git.push(force=True)
2022-11-06 09:49:52 +01:00
def run_gc(self):
self.gc_cnt = self.gc_interval
self.git.gc()
def is_lfs_file(self, package, filename, size):
if not is_binary_or_large(filename, size):
return False
return not self.proxy_sha256.is_text(package, filename)
def commit_file(self, flat, file, size, md5):
# have such files been detected as text mimetype before?
if self.is_lfs_file(flat.commit.package, file.name, size):
file_sha256 = self.proxy_sha256.get_or_put(
flat.commit.project,
flat.commit.package,
file.name,
flat.commit.expanded_srcmd5,
md5,
size,
)
# as it's newly registered, it might be a text file now, so double check
if not self.proxy_sha256.is_text(flat.commit.package, file.name):
self.git.add_lfs(file.name, file_sha256, size)
return
self.commit_non_lfs_file(flat, file, md5)
def commit_non_lfs_file(self, flat, file, md5):
self.obs.change_url(flat.commit.api_url)
self.obs.download(
flat.commit.project,
flat.commit.package,
file.name,
flat.commit.expanded_srcmd5,
self.git.path,
self.cachedir,
file_md5=md5,
)
self.git.add(file)
def branch_fits_parent1(self, flat, branch_state):
if branch_state[flat.branch] is None:
# everything fits nothing
return True
return flat.parent1 == branch_state[flat.branch]
def commit_flat(self, flat, branch_state):
parents = []
self.git.checkout(flat.branch)
2022-11-02 20:55:09 +01:00
if flat.parent1:
if not self.branch_fits_parent1(flat, branch_state):
2022-11-06 09:49:52 +01:00
logging.debug(f"Reset {flat.branch} onto {flat.parent1.short_string()}")
assert flat.parent1.git_commit
self.git.set_branch_head(flat.branch, flat.parent1.git_commit)
self.git.checkout(flat.branch)
parents.append(flat.parent1.git_commit)
if flat.parent2:
assert flat.parent2.git_commit
parents.append(flat.parent2.git_commit)
# create file if not existant
self.git.add_default_lfs_gitattributes(force=False)
2022-11-02 20:55:09 +01:00
to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
for file in to_delete:
self.git.remove(file)
2022-11-02 20:55:09 +01:00
for file, size, md5 in to_download:
self.commit_file(flat, file, size, md5)
commit = self.git.commit(
2022-11-06 09:49:52 +01:00
flat.user.realname,
flat.user.email,
flat.commit.commit_time,
flat.commit.git_commit_message(),
parents=parents,
)
flat.commit.git_commit = commit
branch_state[flat.branch] = flat.commit
with open(self.state_file, "w") as f:
data = {}
for branch in ["factory", "devel"]:
commit = branch_state[branch]
if commit:
data[branch] = commit.dbid
yaml.dump(data, f)