mirror of
https://github.com/openSUSE/osc.git
synced 2025-09-10 15:08:43 +02:00
96 lines
3.2 KiB
Python
96 lines
3.2 KiB
Python
import os
|
|
import shutil
|
|
import subprocess
|
|
from typing import Iterator
|
|
|
|
from . import git
|
|
|
|
|
|
GIT_EMPTY_COMMIT = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
|
|
|
|
|
|
class TarDiff:
|
|
def __init__(self, path):
|
|
self.git = git.Git(path)
|
|
os.makedirs(self.path, exist_ok=True)
|
|
self.git.init(initial_branch="empty", quiet=True, mute_stderr=True)
|
|
# the git repo is switched to this branch by default to hide the files from disk
|
|
self.git.commit("empty branch", allow_empty=True)
|
|
|
|
@property
|
|
def path(self):
|
|
return self.git.abspath
|
|
|
|
def _get_branch_name(self, filename: str, checksum: str) -> str:
|
|
filename = os.path.basename(filename)
|
|
return f"{filename}-{checksum}"
|
|
|
|
def add_archive(self, filename: str, checksum: str, data: Iterator[bytes]) -> str:
|
|
"""
|
|
Create a branch with expanded archive.
|
|
The easiest way of obtaining the `checksum` is via running `git lfs ls-files --long`.
|
|
"""
|
|
|
|
# make sure we don't use the path anywhere
|
|
filename = os.path.basename(filename)
|
|
|
|
branch = self._get_branch_name(filename, checksum)
|
|
|
|
# detect if a branch exists and is not empty; do nothing if it's the case
|
|
if self.git.branch_exists(branch) and self.git.commit_count(branch) > 0:
|
|
return branch
|
|
|
|
# create an empty branch
|
|
self.git.switch(branch, orphan=True)
|
|
# TODO: mute stdout
|
|
|
|
# remove any existing contents but ".git" directory
|
|
for fn in os.listdir(self.path):
|
|
if fn == ".git":
|
|
continue
|
|
shutil.rmtree(os.path.join(self.path, fn))
|
|
|
|
# extract archive
|
|
# We use bsdtar, because tar cannot determine compression from stdin automatically.
|
|
# Stripping the first path component works fine for regular source archives with %{name}/%{version}/ prefix
|
|
# but it may need an improvement for other archives.
|
|
proc = subprocess.Popen(
|
|
["bsdtar", "xf", "-", "--strip-components=1"],
|
|
stdin=subprocess.PIPE,
|
|
cwd=self.path,
|
|
)
|
|
assert proc.stdin is not None
|
|
for chunk in data:
|
|
proc.stdin.write(chunk)
|
|
proc.communicate()
|
|
assert proc.returncode == 0
|
|
|
|
# add files and commit
|
|
self.git.add(["--all"])
|
|
self.git.commit(msg=f"import {filename} with checksum {checksum}")
|
|
|
|
self.git.switch("empty")
|
|
|
|
# TODO: git gc?
|
|
|
|
return branch
|
|
|
|
def diff_archives(self, src_filename, src_checksum, dst_filename, dst_checksum) -> Iterator[bytes]:
|
|
if src_filename:
|
|
src_filename = os.path.basename(src_filename)
|
|
src_branch = self._get_branch_name(src_filename, src_checksum)
|
|
src_branch = f"refs/heads/{src_branch}"
|
|
else:
|
|
src_filename = "/dev/null"
|
|
src_branch = GIT_EMPTY_COMMIT
|
|
|
|
if dst_filename:
|
|
dst_filename = os.path.basename(dst_filename)
|
|
dst_branch = self._get_branch_name(dst_filename, dst_checksum)
|
|
dst_branch = f"refs/heads/{dst_branch}"
|
|
else:
|
|
dst_filename = "/dev/null"
|
|
dst_branch = GIT_EMPTY_COMMIT
|
|
|
|
yield from self.git.diff(src_branch, dst_branch, src_prefix=src_filename, dst_prefix=dst_filename)
|