From 13f49f59c9e5bd0c835259ac9781046aa015a85f Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Mon, 17 Oct 2022 14:55:30 +0200 Subject: [PATCH] Split out Git class --- git-importer.py | 257 ----------------------------------------------- git.py | 258 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 258 insertions(+), 257 deletions(-) create mode 100644 git.py diff --git a/git-importer.py b/git-importer.py index 8d9966d..4df6654 100755 --- a/git-importer.py +++ b/git-importer.py @@ -2,7 +2,6 @@ import argparse import errno -import fnmatch import functools import hashlib import itertools @@ -10,7 +9,6 @@ import logging import pathlib import re import shutil -import subprocess import sys import time import urllib.parse @@ -18,7 +16,6 @@ import xml.etree.ElementTree as ET from urllib.error import HTTPError import osc.core -import pygit2 import requests from request import Request @@ -212,260 +209,6 @@ def _files_hash(hash_alg, dirpath): files_md5 = functools.partial(_files_hash, md5) files_sha256 = functools.partial(_files_hash, sha256) - -class Git: - """Local git repository""" - - def __init__(self, path, committer=None, committer_email=None): - self.path = pathlib.Path(path) - self.committer = committer - self.committer_email = committer_email - - self.repo = None - - def is_open(self): - return self.repo is not None - - # TODO: Extend it to packages and files - def exists(self): - """Check if the path is a valid git repository""" - return (self.path / ".git").exists() - - def create(self): - """Create a local git repository""" - self.path.mkdir(parents=True, exist_ok=True) - # Convert the path to string, to avoid some limitations in - # older pygit2 - self.repo = pygit2.init_repository(str(self.path)) - return self - - def is_dirty(self): - """Check if there is something to commit""" - assert self.is_open() - - return self.repo.status() - - def branches(self): - return list(self.repo.branches) - - def branch(self, branch, commit=None): - if not commit: - commit = self.repo.head - else: - commit = self.repo.get(commit) - self.repo.branches.local.create(branch, commit) - - def checkout(self, branch): - """Checkout into the branch HEAD""" - new_branch = False - ref = f"refs/heads/{branch}" - if branch not in self.branches(): - self.repo.references["HEAD"].set_target(ref) - new_branch = True - else: - self.repo.checkout(ref) - return new_branch - - def commit( - self, - user, - user_email, - user_time, - message, - parents=None, - committer=None, - committer_email=None, - committer_time=None, - allow_empty=False, - ): - """Add all the files and create a new commit in the current HEAD""" - assert allow_empty or self.is_dirty() - - if not committer: - committer = self.committer if self.committer else self.user - committer_email = ( - self.committer_email if self.committer_email else self.user_email - ) - committer_time = committer_time if committer_time else user_time - - try: - self.repo.index.add_all() - except pygit2.GitError as e: - if not allow_empty: - raise e - - self.repo.index.write() - author = pygit2.Signature(user, user_email, int(user_time.timestamp())) - committer = pygit2.Signature( - committer, committer_email, int(committer_time.timestamp()) - ) - if not parents: - try: - parents = [self.repo.head.target] - except pygit2.GitError as e: - parents = [] - if not allow_empty: - raise e - - tree = self.repo.index.write_tree() - return self.repo.create_commit( - "HEAD", author, committer, message, tree, parents - ) - - def merge( - self, - user, - user_email, - user_time, - message, - commit, - committer=None, - committer_email=None, - committer_time=None, - clean_on_conflict=True, - merged=False, - allow_empty=False, - ): - new_branch = False - - if not merged: - try: - self.repo.merge(commit) - except KeyError: - # If it is the first commit, we will have a missing - # "HEAD", but the files will be there. We can proceed - # to the commit directly. - new_branch = True - - if not merged and self.repo.index.conflicts: - for conflict in self.repo.index.conflicts: - conflict = [c for c in conflict if c] - if conflict: - logging.info(f"CONFLICT {conflict[0].path}") - - if clean_on_conflict: - self.clean() - # Now I miss Rust enums - return "CONFLICT" - - # Some merges are empty in OBS (no changes, not sure - # why), for now we signal them - if not allow_empty and not self.is_dirty(): - # I really really do miss Rust enums - return "EMPTY" - - if new_branch: - parents = [commit] - else: - parents = [ - self.repo.head.target, - commit, - ] - commit = self.commit( - user, - user_email, - user_time, - message, - parents, - committer, - committer_email, - committer_time, - allow_empty=allow_empty, - ) - - return commit - - def merge_abort(self): - self.repo.state_cleanup() - - def last_commit(self): - try: - return self.repo.head.target - except: - return None - - def gc(self): - logging.info(f"Garbage recollec and repackage {self.path}") - subprocess.run( - ["git", "gc", "--auto"], - cwd=self.path, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) - - def clean(self): - for path, _ in self.repo.status().items(): - logging.debug(f"Cleaning {path}") - try: - (self.path / path).unlink() - self.repo.index.remove(path) - except Exception as e: - logging.warning(f"Error removing file {path}: {e}") - - def add(self, filename): - self.repo.index.add(filename) - - def add_default_lfs_gitattributes(self, force=False): - if not (self.path / ".gitattributes").exists() or force: - with (self.path / ".gitattributes").open("w") as f: - content = ["## Default LFS"] - content += [f"*{b} {LFS_SUFFIX}" for b in sorted(BINARY)] - f.write("\n".join(content)) - f.write("\n") - self.add(".gitattributes") - - def add_specific_lfs_gitattributes(self, binaries): - self.add_default_lfs_gitattributes(force=True) - if binaries: - with (self.path / ".gitattributes").open("a") as f: - content = ["## Specific LFS patterns"] - content += [f"{b} {LFS_SUFFIX}" for b in sorted(binaries)] - f.write("\n".join(content)) - f.write("\n") - self.add(".gitattributes") - - def get_specific_lfs_gitattributes(self): - with (self.path / ".gitattributes").open() as f: - patterns = [ - line.split()[0] - for line in f - if line.strip() and not line.startswith("#") - ] - binary = {f"*{b}" for b in BINARY} - return [p for p in patterns if p not in binary] - - def add_lfs(self, filename, sha256, size): - with (self.path / filename).open("w") as f: - f.write("version https://git-lfs.github.com/spec/v1\n") - f.write(f"oid sha256:{sha256}\n") - f.write(f"size {size}\n") - self.add(filename) - - if not self.is_lfs_tracked(filename): - logging.debug(f"Add specific LFS file {filename}") - specific_patterns = self.get_specific_lfs_gitattributes() - specific_patterns.append(filename) - self.add_specific_lfs_gitattributes(specific_patterns) - - def is_lfs_tracked(self, filename): - with (self.path / ".gitattributes").open() as f: - patterns = ( - line.split()[0] - for line in f - if line.strip() and not line.startswith("#") - ) - return any(fnmatch.fnmatch(filename, line) for line in patterns) - - def remove(self, filename): - self.repo.index.remove(filename) - (self.path / filename).unlink() - - patterns = self.get_specific_lfs_gitattributes() - if filename in patterns: - patterns.remove(filename) - self.add_specific_lfs_gitattributes(patterns) - - class OBS: def __init__(self, url=None): if url: diff --git a/git.py b/git.py new file mode 100644 index 0000000..3a21738 --- /dev/null +++ b/git.py @@ -0,0 +1,258 @@ +import pathlib +import pygit2 +import logging +import subprocess +import fnmatch + + +class Git: + """Local git repository""" + + def __init__(self, path, committer=None, committer_email=None): + self.path = pathlib.Path(path) + self.committer = committer + self.committer_email = committer_email + + self.repo = None + + def is_open(self): + return self.repo is not None + + # TODO: Extend it to packages and files + def exists(self): + """Check if the path is a valid git repository""" + return (self.path / ".git").exists() + + def create(self): + """Create a local git repository""" + self.path.mkdir(parents=True, exist_ok=True) + # Convert the path to string, to avoid some limitations in + # older pygit2 + self.repo = pygit2.init_repository(str(self.path)) + return self + + def is_dirty(self): + """Check if there is something to commit""" + assert self.is_open() + + return self.repo.status() + + def branches(self): + return list(self.repo.branches) + + def branch(self, branch, commit=None): + if not commit: + commit = self.repo.head + else: + commit = self.repo.get(commit) + self.repo.branches.local.create(branch, commit) + + def checkout(self, branch): + """Checkout into the branch HEAD""" + new_branch = False + ref = f"refs/heads/{branch}" + if branch not in self.branches(): + self.repo.references["HEAD"].set_target(ref) + new_branch = True + else: + self.repo.checkout(ref) + return new_branch + + def commit( + self, + user, + user_email, + user_time, + message, + parents=None, + committer=None, + committer_email=None, + committer_time=None, + allow_empty=False, + ): + """Add all the files and create a new commit in the current HEAD""" + assert allow_empty or self.is_dirty() + + if not committer: + committer = self.committer if self.committer else self.user + committer_email = ( + self.committer_email if self.committer_email else self.user_email + ) + committer_time = committer_time if committer_time else user_time + + try: + self.repo.index.add_all() + except pygit2.GitError as e: + if not allow_empty: + raise e + + self.repo.index.write() + author = pygit2.Signature(user, user_email, int(user_time.timestamp())) + committer = pygit2.Signature( + committer, committer_email, int(committer_time.timestamp()) + ) + if not parents: + try: + parents = [self.repo.head.target] + except pygit2.GitError as e: + parents = [] + if not allow_empty: + raise e + + tree = self.repo.index.write_tree() + return self.repo.create_commit( + "HEAD", author, committer, message, tree, parents + ) + + def merge( + self, + user, + user_email, + user_time, + message, + commit, + committer=None, + committer_email=None, + committer_time=None, + clean_on_conflict=True, + merged=False, + allow_empty=False, + ): + new_branch = False + + if not merged: + try: + self.repo.merge(commit) + except KeyError: + # If it is the first commit, we will have a missing + # "HEAD", but the files will be there. We can proceed + # to the commit directly. + new_branch = True + + if not merged and self.repo.index.conflicts: + for conflict in self.repo.index.conflicts: + conflict = [c for c in conflict if c] + if conflict: + logging.info(f"CONFLICT {conflict[0].path}") + + if clean_on_conflict: + self.clean() + # Now I miss Rust enums + return "CONFLICT" + + # Some merges are empty in OBS (no changes, not sure + # why), for now we signal them + if not allow_empty and not self.is_dirty(): + # I really really do miss Rust enums + return "EMPTY" + + if new_branch: + parents = [commit] + else: + parents = [ + self.repo.head.target, + commit, + ] + commit = self.commit( + user, + user_email, + user_time, + message, + parents, + committer, + committer_email, + committer_time, + allow_empty=allow_empty, + ) + + return commit + + def merge_abort(self): + self.repo.state_cleanup() + + def last_commit(self): + try: + return self.repo.head.target + except: + return None + + def gc(self): + logging.info(f"Garbage recollec and repackage {self.path}") + subprocess.run( + ["git", "gc", "--auto"], + cwd=self.path, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + + def clean(self): + for path, _ in self.repo.status().items(): + logging.debug(f"Cleaning {path}") + try: + (self.path / path).unlink() + self.repo.index.remove(path) + except Exception as e: + logging.warning(f"Error removing file {path}: {e}") + + def add(self, filename): + self.repo.index.add(filename) + + def add_default_lfs_gitattributes(self, force=False): + if not (self.path / ".gitattributes").exists() or force: + with (self.path / ".gitattributes").open("w") as f: + content = ["## Default LFS"] + content += [f"*{b} {LFS_SUFFIX}" for b in sorted(BINARY)] + f.write("\n".join(content)) + f.write("\n") + self.add(".gitattributes") + + def add_specific_lfs_gitattributes(self, binaries): + self.add_default_lfs_gitattributes(force=True) + if binaries: + with (self.path / ".gitattributes").open("a") as f: + content = ["## Specific LFS patterns"] + content += [f"{b} {LFS_SUFFIX}" for b in sorted(binaries)] + f.write("\n".join(content)) + f.write("\n") + self.add(".gitattributes") + + def get_specific_lfs_gitattributes(self): + with (self.path / ".gitattributes").open() as f: + patterns = [ + line.split()[0] + for line in f + if line.strip() and not line.startswith("#") + ] + binary = {f"*{b}" for b in BINARY} + return [p for p in patterns if p not in binary] + + def add_lfs(self, filename, sha256, size): + with (self.path / filename).open("w") as f: + f.write("version https://git-lfs.github.com/spec/v1\n") + f.write(f"oid sha256:{sha256}\n") + f.write(f"size {size}\n") + self.add(filename) + + if not self.is_lfs_tracked(filename): + logging.debug(f"Add specific LFS file {filename}") + specific_patterns = self.get_specific_lfs_gitattributes() + specific_patterns.append(filename) + self.add_specific_lfs_gitattributes(specific_patterns) + + def is_lfs_tracked(self, filename): + with (self.path / ".gitattributes").open() as f: + patterns = ( + line.split()[0] + for line in f + if line.strip() and not line.startswith("#") + ) + return any(fnmatch.fnmatch(filename, line) for line in patterns) + + def remove(self, filename): + self.repo.index.remove(filename) + (self.path / filename).unlink() + + patterns = self.get_specific_lfs_gitattributes() + if filename in patterns: + patterns.remove(filename) + self.add_specific_lfs_gitattributes(patterns)