Split GitExporter out of Importer class
This commit is contained in:
167
lib/git_exporter.py
Normal file
167
lib/git_exporter.py
Normal file
@@ -0,0 +1,167 @@
|
||||
import logging
|
||||
import os
|
||||
from lib.binary import is_binary_or_large
|
||||
import yaml
|
||||
from lib.db import DB
|
||||
from lib.git import Git
|
||||
from lib.obs import OBS
|
||||
from lib.proxy_sha256 import ProxySHA256, md5
|
||||
from lib.tree_builder import TreeBuilder
|
||||
|
||||
class GitExporter:
|
||||
def __init__(self, api_url, project, package, repodir):
|
||||
self.obs = OBS()
|
||||
self.project = project
|
||||
self.package = package
|
||||
# TODO: Store the api url in the revision
|
||||
self.obs.change_url(api_url)
|
||||
self.proxy_sha256 = ProxySHA256(self.obs, enabled=True)
|
||||
self.git = Git(
|
||||
repodir,
|
||||
committer="Git OBS Bridge",
|
||||
committer_email="obsbridge@suse.de",
|
||||
).create()
|
||||
self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
|
||||
self.gc_interval = 200
|
||||
|
||||
|
||||
def download(self, revision):
|
||||
obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5)
|
||||
git_files = {
|
||||
(f.name, f.stat().st_size, md5(f))
|
||||
for f in self.git.path.iterdir()
|
||||
if f.is_file() and f.name not in (".gitattributes")
|
||||
}
|
||||
|
||||
# Overwrite ".gitattributes" with the
|
||||
self.git.add_default_lfs_gitattributes(force=True)
|
||||
|
||||
# Download each file in OBS if it is not a binary (or large)
|
||||
# file
|
||||
for (name, size, file_md5) in obs_files:
|
||||
# this file creates easily 100k commits and is just useless data :(
|
||||
# unfortunately it's stored in the same meta package as the project config
|
||||
if revision.package == "_project" and name == "_staging_workflow":
|
||||
continue
|
||||
# have such files been detected as text mimetype before?
|
||||
is_text = self.proxy_sha256.is_text(name)
|
||||
if not is_text and is_binary_or_large(name, size):
|
||||
file_sha256 = self.proxy_sha256.get_or_put(
|
||||
revision.project,
|
||||
revision.package,
|
||||
name,
|
||||
revision.srcmd5,
|
||||
file_md5,
|
||||
size,
|
||||
)
|
||||
self.git.add_lfs(name, file_sha256["sha256"], size)
|
||||
else:
|
||||
if (name, size, file_md5) not in git_files:
|
||||
logging.debug(f"Download {name}")
|
||||
self.obs.download(
|
||||
revision.project,
|
||||
revision.package,
|
||||
name,
|
||||
revision.srcmd5,
|
||||
self.git.path,
|
||||
)
|
||||
# Validate the MD5 of the downloaded file
|
||||
if md5(self.git.path / name) != file_md5:
|
||||
raise Exception(f"Download error in {name}")
|
||||
self.git.add(name)
|
||||
|
||||
# Remove extra files
|
||||
obs_names = {n for (n, _, _) in obs_files}
|
||||
git_names = {n for (n, _, _) in git_files}
|
||||
for name in git_names - obs_names:
|
||||
logging.debug(f"Remove {name}")
|
||||
self.git.remove(name)
|
||||
|
||||
def set_gc_interval(self, gc):
|
||||
self.gc_interval = gc
|
||||
|
||||
def export_as_git(self):
|
||||
db = DB()
|
||||
tree = TreeBuilder(db).build(self.project, self.package)
|
||||
flats = tree.as_flat_list()
|
||||
|
||||
branch_state = {"factory": None, "devel": None}
|
||||
state_data = dict()
|
||||
if os.path.exists(self.state_file):
|
||||
with open(self.state_file, "r") as f:
|
||||
state_data = yaml.safe_load(f)
|
||||
if type(state_data) != dict:
|
||||
state_data = {}
|
||||
left_to_commit = []
|
||||
for flat in reversed(flats):
|
||||
found_state = False
|
||||
for branch in ["factory", "devel"]:
|
||||
if flat.commit.dbid == state_data.get(branch):
|
||||
branch_state[branch] = flat.commit
|
||||
flat.commit.git_commit = self.git.branch_head(branch)
|
||||
logging.debug(
|
||||
f"Found {self.git.path}'s {branch} branch in state {flat}"
|
||||
)
|
||||
left_to_commit = []
|
||||
found_state = True
|
||||
if not found_state:
|
||||
left_to_commit.append(flat)
|
||||
|
||||
gc_cnt = self.gc_interval
|
||||
if len(left_to_commit) > 0:
|
||||
self.git.gc()
|
||||
for flat in left_to_commit:
|
||||
gc_cnt -= 1
|
||||
if gc_cnt <= 0 and self.gc_interval:
|
||||
self.git.gc()
|
||||
gc_cnt = self.gc_interval
|
||||
logging.debug(f"Committing {flat}")
|
||||
self.commit_flat(db, flat, branch_state)
|
||||
|
||||
def limit_download(self, file):
|
||||
if file.endswith(".spec") or file.endswith(".changes"):
|
||||
return True
|
||||
return False
|
||||
|
||||
def commit_flat(self, db, flat, branch_state):
|
||||
parents = []
|
||||
self.git.checkout(flat.branch)
|
||||
if flat.parent1:
|
||||
parents.append(flat.parent1.git_commit)
|
||||
if flat.parent2:
|
||||
parents.append(flat.parent2.git_commit)
|
||||
to_download, to_delete = flat.commit.calc_delta(db, branch_state[flat.branch])
|
||||
for file in to_delete:
|
||||
if not self.limit_download(file):
|
||||
continue
|
||||
self.git.remove(file)
|
||||
for file in to_download:
|
||||
if not self.limit_download(file):
|
||||
continue
|
||||
self.obs.download(
|
||||
flat.commit.project,
|
||||
flat.commit.package,
|
||||
file,
|
||||
flat.commit.expanded_srcmd5,
|
||||
self.git.path,
|
||||
)
|
||||
self.git.add(file)
|
||||
|
||||
commit = self.git.commit(
|
||||
f"OBS User {flat.commit.userid}",
|
||||
"null@suse.de",
|
||||
flat.commit.commit_time,
|
||||
# TODO: Normalize better the commit message
|
||||
f"{flat.commit.comment}\n\n{flat.commit}",
|
||||
allow_empty=True,
|
||||
parents=parents,
|
||||
)
|
||||
flat.commit.git_commit = commit
|
||||
branch_state[flat.branch] = flat.commit
|
||||
with open(self.state_file, "w") as f:
|
||||
data = {}
|
||||
for branch in ["factory", "devel"]:
|
||||
commit = branch_state[branch]
|
||||
if commit:
|
||||
data[branch] = commit.dbid
|
||||
yaml.dump(data, f)
|
Reference in New Issue
Block a user