autogits/bots-common/git_utils.go
Adam Majer 86df1921e0 devel-importer: handle history rewrite
Imports can have history rewritten because email addresses
can change in OBS and are not recorded as in git commits. This
can be handled via comparing Tree objects and rebasing
new changes ontop.
2024-09-18 17:17:24 +02:00

712 lines
15 KiB
Go

package common
/*
* This file is part of Autogits.
*
* Copyright © 2024 SUSE LLC
*
* Autogits is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 2 of the License, or (at your option) any later
* version.
*
* Autogits is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* Foobar. If not, see <https://www.gnu.org/licenses/>.
*/
import (
"fmt"
"io"
"log"
"net/url"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
)
type GitHandler struct {
DebugLogger bool
GitPath string
GitCommiter string
GitEmail string
}
func CreateGitHandler(git_author, email, name string) (*GitHandler, error) {
var err error
git := new(GitHandler)
git.GitCommiter = git_author
git.GitPath, err = os.MkdirTemp("", name)
if err != nil {
return nil, fmt.Errorf("Cannot create temp dir: %w", err)
}
if err = os.Chmod(git.GitPath, 0700); err != nil {
return nil, fmt.Errorf("Cannot fix permissions of temp dir: %w", err)
}
return git, nil
}
//func (h *GitHandler) ProcessBranchList() []string {
// if h.HasError() {
// return make([]string, 0)
// }
//
// trackedBranches, err := os.ReadFile(path.Join(h.GitPath, DefaultGitPrj, TrackedBranchesFile))
// if err != nil {
// if errors.Is(err, os.ErrNotExist) {
// trackedBranches = []byte("factory")
// } else {
// h.LogError("file error reading '%s' file in repo", TrackedBranchesFile)
// h.Error = err
// return make([]string, 0)
// }
// }
//
// return strings.Split(string(trackedBranches), "\n")
//}
type GitReference struct {
Branch string
Id string
}
type GitReferences struct {
refs []GitReference
}
func (refs *GitReferences) addReference(id, branch string) {
for _, ref := range refs.refs {
if ref.Id == id && ref.Branch == branch {
return
}
}
refs.refs = append(refs.refs, GitReference{Branch: branch, Id: id})
}
func (e *GitHandler) CloneDevel(gitDir, outName, urlString string) error {
url, err := url.Parse(urlString)
branch := url.Fragment
url.Fragment = ""
params := []string{"clone", "-o", "devel"}
if len(branch) > 0 {
params = append(params, "-b", branch)
}
params = append(params, url.String(), outName)
if err != nil {
return fmt.Errorf("error parsing SSH URL. %w", err)
}
out, err := e.GitExecWithOutput(gitDir, params...)
if err != nil {
return fmt.Errorf("error cloning %s.\n%s\nerr: %w", urlString, out, err)
}
return nil
}
func (e *GitHandler) GitBranchHead(gitDir, branchName string) (string, error) {
id, err := e.GitExecWithOutput(gitDir, "rev-list", "-1", branchName)
if err != nil {
return "", fmt.Errorf("Can't find default remote branch: %s", branchName)
}
return strings.TrimSpace(id), nil
}
func (e *GitHandler) Close() error {
if err := os.RemoveAll(e.GitPath); err != nil {
return err
}
e.GitPath = ""
return nil
}
type writeFunc func(data []byte) (int, error)
func (f writeFunc) Write(data []byte) (int, error) {
return f(data)
}
func (h writeFunc) UnmarshalText(text []byte) error {
_, err := h.Write(text)
return err
}
func (h writeFunc) Close() error {
_, err := h.Write(nil)
return err
}
func (e *GitHandler) GitExecWithOutputOrPanic(cwd string, params ...string) string {
out, err := e.GitExecWithOutput(cwd, params...)
if err != nil {
log.Panicln("git command failed:", params, "err:", err)
}
return out
}
func (e *GitHandler) GitExecOrPanic(cwd string, params ...string) {
if err := e.GitExec(cwd, params...); err != nil {
log.Panicln("git command failed:", params, "err:", err)
}
}
func (e *GitHandler) GitExec(cwd string, params ...string) error {
_, err := e.GitExecWithOutput(cwd, params...)
return err
}
func (e *GitHandler) GitExecWithOutput(cwd string, params ...string) (string, error) {
cmd := exec.Command("/usr/bin/git", params...)
cmd.Env = []string{
"GIT_CEILING_DIRECTORIES=" + e.GitPath,
"GIT_CONFIG_GLOBAL=/dev/null",
"GIT_AUTHOR_NAME=" + e.GitCommiter,
"GIT_COMMITTER_NAME=" + e.GitCommiter,
"EMAIL=not@exist@src.opensuse.org",
"GIT_LFS_SKIP_SMUDGE=1",
"GIT_SSH_COMMAND=/usr/bin/ssh -o StrictHostKeyChecking=yes",
}
cmd.Dir = filepath.Join(e.GitPath, cwd)
cmd.Stdin = nil
if e.DebugLogger {
log.Printf("git execute: %#v\n", cmd.Args)
}
out, err := cmd.CombinedOutput()
if e.DebugLogger {
log.Println(string(out))
}
if err != nil {
if e.DebugLogger {
log.Printf(" *** error: %v\n", err)
}
return "", fmt.Errorf("error executing: git %#v \n%s\n err: %w", cmd.Args, out, err)
}
return string(out), nil
}
type ChanIO struct {
ch chan byte
}
func (c *ChanIO) Write(p []byte) (int, error) {
for _, b := range p {
c.ch <- b
}
return len(p), nil
}
// read at least 1 byte, but don't block if nothing more in channel
func (c *ChanIO) Read(data []byte) (idx int, err error) {
var ok bool
data[idx], ok = <-c.ch
if !ok {
err = io.EOF
return
}
idx++
for len(c.ch) > 0 && idx < len(data) {
data[idx], ok = <-c.ch
if !ok {
err = io.EOF
return
}
idx++
}
return
}
type gitMsg struct {
hash string
itemType string
size int
}
type commit struct {
Tree string
Msg string
}
type tree_entry struct {
name string
mode int
hash string
size int
}
type tree struct {
items []tree_entry
}
func (t *tree_entry) isSubmodule() bool {
return (t.mode & 0170000) == 0160000
}
func (t *tree_entry) isTree() bool {
return (t.mode & 0170000) == 0040000
}
func (t *tree_entry) isBlob() bool {
return !t.isTree() && !t.isSubmodule()
}
func parseGitMsg(data <-chan byte) (gitMsg, error) {
var id []byte = make([]byte, 64)
var msgType []byte = make([]byte, 16)
var size int
pos := 0
for c := <-data; c != ' '; c = <-data {
if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') {
id[pos] = c
pos++
} else {
return gitMsg{}, fmt.Errorf("Invalid character during object hash parse '%c' at %d", c, pos)
}
}
id = id[:pos]
pos = 0
var c byte
for c = <-data; c != ' ' && c != '\x00'; c = <-data {
if c >= 'a' && c <= 'z' {
msgType[pos] = c
pos++
} else {
return gitMsg{}, fmt.Errorf("Invalid character during object type parse '%c' at %d", c, pos)
}
}
msgType = msgType[:pos]
switch string(msgType) {
case "commit", "tree", "blob":
break
case "missing":
if c != '\x00' {
return gitMsg{}, fmt.Errorf("Missing format weird")
}
return gitMsg{
hash: string(id[:]),
itemType: "missing",
size: 0,
}, fmt.Errorf("Object not found: '%s'", string(id))
default:
return gitMsg{}, fmt.Errorf("Invalid object type: '%s'", string(msgType))
}
for c = <-data; c != '\000'; c = <-data {
if c >= '0' && c <= '9' {
size = size*10 + (int(c) - '0')
} else {
return gitMsg{}, fmt.Errorf("Invalid character during object size parse: '%c'", c)
}
}
return gitMsg{
hash: string(id[:]),
itemType: string(msgType),
size: size,
}, nil
}
func parseGitCommitHdr(data <-chan byte) ([2]string, error) {
hdr := make([]byte, 0, 60)
val := make([]byte, 0, 1000)
c := <-data
if c != '\n' { // end of header marker
for ; c != ' '; c = <-data {
hdr = append(hdr, c)
}
for c := <-data; c != '\n'; c = <-data {
val = append(val, c)
}
}
return [2]string{string(hdr), string(val)}, nil
}
func parseGitCommitMsg(data <-chan byte, l int) (string, error) {
msg := make([]byte, 0, l)
for c := <-data; c != '\x00'; c = <-data {
msg = append(msg, c)
l--
}
// l--
if l != 0 {
return "", fmt.Errorf("Unexpected data in the git commit msg: l=%d", l)
}
return string(msg), nil
}
func parseGitCommit(data <-chan byte) (commit, error) {
hdr, err := parseGitMsg(data)
if err != nil {
return commit{}, err
} else if hdr.itemType != "commit" {
return commit{}, fmt.Errorf("expected commit but parsed %s", hdr.itemType)
}
var c commit
l := hdr.size
for {
hdr, err := parseGitCommitHdr(data)
if err != nil {
return commit{}, nil
}
if len(hdr[0])+len(hdr[1]) == 0 { // hdr end marker
break
}
switch hdr[0] {
case "tree":
c.Tree = hdr[1]
}
l -= len(hdr[0]) + len(hdr[1]) + 2
}
l--
c.Msg, err = parseGitCommitMsg(data, l)
return c, err
}
func parseTreeEntry(data <-chan byte, hashLen int) (tree_entry, error) {
var e tree_entry
for c := <-data; c != ' '; c = <-data {
e.mode = e.mode*8 + int(c-'0')
e.size++
}
e.size++
name := make([]byte, 0, 128)
for c := <-data; c != '\x00'; c = <-data {
name = append(name, c)
e.size++
}
e.size++
e.name = string(name)
const hexBinToAscii = "0123456789abcdef"
hash := make([]byte, 0, hashLen*2)
for range hashLen {
c := <-data
hash = append(hash, hexBinToAscii[((c&0xF0)>>4)], hexBinToAscii[c&0xF])
}
e.hash = string(hash)
e.size += hashLen
return e, nil
}
func parseGitTree(data <-chan byte) (tree, error) {
hdr, err := parseGitMsg(data)
if err != nil {
return tree{}, err
}
// max capacity to length of hash
t := tree{items: make([]tree_entry, 0, hdr.size/len(hdr.hash))}
parsedLen := 0
for parsedLen < hdr.size {
entry, err := parseTreeEntry(data, len(hdr.hash)/2)
if err != nil {
return tree{}, nil
}
t.items = append(t.items, entry)
parsedLen += entry.size
}
c := <-data // \0 read
if c != '\x00' {
return t, fmt.Errorf("Unexpected character during git tree data read")
}
if parsedLen != hdr.size {
return t, fmt.Errorf("Invalid size of git tree data")
}
return t, nil
}
func parseGitBlob(data <-chan byte) ([]byte, error) {
hdr, err := parseGitMsg(data)
if err != nil {
return []byte{}, err
}
d := make([]byte, hdr.size)
for l := 0; l < hdr.size; l++ {
d[l] = <-data
}
eob := <-data
if eob != '\x00' {
return d, fmt.Errorf("invalid byte read in parseGitBlob")
}
return d, nil
}
func (e *GitHandler) GitParseCommits(cwd string, commitIDs []string) (parsedCommits []commit, err error) {
var done sync.Mutex
done.Lock()
data_in, data_out := ChanIO{make(chan byte, 256)}, ChanIO{make(chan byte, 70)}
parsedCommits = make([]commit, 0, len(commitIDs))
go func() {
defer done.Unlock()
defer close(data_out.ch)
for _, id := range commitIDs {
data_out.Write([]byte(id))
data_out.ch <- '\x00'
c, e := parseGitCommit(data_in.ch)
if e != nil {
err = fmt.Errorf("Error parsing git commit: %w", e)
return
}
parsedCommits = append(parsedCommits, c)
}
}()
cmd := exec.Command("/usr/bin/git", "cat-file", "--batch", "-Z")
cmd.Env = []string{
"GIT_CEILING_DIRECTORIES=" + e.GitPath,
"GIT_CONFIG_GLOBAL=/dev/null",
}
cmd.Dir = filepath.Join(e.GitPath, cwd)
cmd.Stdout = &data_in
cmd.Stdin = &data_out
cmd.Stderr = writeFunc(func(data []byte) (int, error) {
if e.DebugLogger {
log.Printf(string(data))
}
return len(data), nil
})
if e.DebugLogger {
log.Printf("command run: %v\n", cmd.Args)
}
err = cmd.Run()
done.Lock()
return
}
// TODO: support sub-trees
func (e *GitHandler) GitCatFile(cwd, commitId, filename string) (data []byte, err error) {
var done sync.Mutex
done.Lock()
data_in, data_out := ChanIO{make(chan byte, 256)}, ChanIO{make(chan byte, 70)}
go func() {
defer done.Unlock()
defer close(data_out.ch)
data_out.Write([]byte(commitId))
data_out.ch <- '\x00'
c, err := parseGitCommit(data_in.ch)
if err != nil {
log.Printf("Error parsing git commit: %v\n", err)
return
}
data_out.Write([]byte(c.Tree))
data_out.ch <- '\x00'
tree, err := parseGitTree(data_in.ch)
if err != nil {
if e.DebugLogger {
log.Printf("Error parsing git tree: %v\n", err)
}
return
}
for _, te := range tree.items {
if te.isBlob() && te.name == filename {
data_out.Write([]byte(te.hash))
data_out.ch <- '\x00'
data, err = parseGitBlob(data_in.ch)
return
}
}
err = fmt.Errorf("file not found: '%s'", filename)
}()
cmd := exec.Command("/usr/bin/git", "cat-file", "--batch", "-Z")
cmd.Env = []string{
"GIT_CEILING_DIRECTORIES=" + e.GitPath,
"GIT_CONFIG_GLOBAL=/dev/null",
}
cmd.Dir = filepath.Join(e.GitPath, cwd)
cmd.Stdout = &data_in
cmd.Stdin = &data_out
cmd.Stderr = writeFunc(func(data []byte) (int, error) {
if e.DebugLogger {
log.Printf(string(data))
}
return len(data), nil
})
if e.DebugLogger {
log.Printf("command run: %v\n", cmd.Args)
}
err = cmd.Run()
done.Lock()
return
}
// return (filename) -> (hash) map for all submodules
// TODO: recursive? map different orgs, not just assume '.' for path
func (e *GitHandler) GitSubmoduleList(cwd, commitId string) (submoduleList map[string]string, err error) {
var done sync.Mutex
submoduleList = make(map[string]string)
done.Lock()
data_in, data_out := ChanIO{make(chan byte, 256)}, ChanIO{make(chan byte, 70)}
go func() {
defer done.Unlock()
defer close(data_out.ch)
data_out.Write([]byte(commitId))
data_out.ch <- '\x00'
var c commit
c, err = parseGitCommit(data_in.ch)
if err != nil {
err = fmt.Errorf("Error parsing git commit. Err: %w", err)
return
}
data_out.Write([]byte(c.Tree))
data_out.ch <- '\x00'
var tree tree
tree, err = parseGitTree(data_in.ch)
if err != nil {
err = fmt.Errorf("Error parsing git tree: %w", err)
return
}
for _, te := range tree.items {
if te.isSubmodule() {
submoduleList[te.name] = te.hash
}
}
}()
cmd := exec.Command("/usr/bin/git", "cat-file", "--batch", "-Z")
cmd.Env = []string{
"GIT_CEILING_DIRECTORIES=" + e.GitPath,
"GIT_CONFIG_GLOBAL=/dev/null",
}
cmd.Dir = filepath.Join(e.GitPath, cwd)
cmd.Stdout = &data_in
cmd.Stdin = &data_out
cmd.Stderr = writeFunc(func(data []byte) (int, error) {
if e.DebugLogger {
log.Println(string(data))
}
return len(data), nil
})
if e.DebugLogger {
log.Printf("command run: %v\n", cmd.Args)
}
err = cmd.Run()
done.Lock()
return submoduleList, err
}
func (e *GitHandler) GitSubmoduleCommitId(cwd, packageName, commitId string) (subCommitId string, valid bool) {
defer func() {
if recover() != nil {
commitId = ""
valid = false
}
}()
data_in, data_out := ChanIO{make(chan byte, 256)}, ChanIO{make(chan byte, 70)}
var wg sync.WaitGroup
wg.Add(1)
if e.DebugLogger {
log.Printf("getting commit id '%s' from git at '%s' with packageName: %s\n", commitId, cwd, packageName)
}
go func() {
defer wg.Done()
defer close(data_out.ch)
data_out.Write([]byte(commitId))
data_out.ch <- '\x00'
c, err := parseGitCommit(data_in.ch)
if err != nil {
log.Panicf("Error parsing git commit: %v\n", err)
}
data_out.Write([]byte(c.Tree))
data_out.ch <- '\x00'
tree, err := parseGitTree(data_in.ch)
if err != nil {
log.Panicf("Error parsing git tree: %v\n", err)
}
for _, te := range tree.items {
if te.name == packageName && te.isSubmodule() {
subCommitId = te.hash
return
}
}
}()
cmd := exec.Command("/usr/bin/git", "cat-file", "--batch", "-Z")
cmd.Env = []string{
"GIT_CEILING_DIRECTORIES=" + e.GitPath,
"GIT_CONFIG_GLOBAL=/dev/null",
}
cmd.Dir = filepath.Join(e.GitPath, cwd)
cmd.Stdout = &data_in
cmd.Stdin = &data_out
cmd.Stderr = writeFunc(func(data []byte) (int, error) {
log.Println(string(data))
return len(data), nil
})
if e.DebugLogger {
log.Printf("command run: %v\n", cmd.Args)
}
if err := cmd.Run(); err != nil {
log.Printf("Error running command %v, err: %v", cmd.Args, err)
}
wg.Wait()
return subCommitId, len(subCommitId) == len(commitId)
}