autogits/bots-common/git_utils.go

680 lines
14 KiB
Go
Raw Normal View History

2024-07-07 21:08:41 +02:00
package common
import (
"fmt"
2024-07-14 23:56:37 +02:00
"io"
2024-07-07 21:08:41 +02:00
"os"
"os/exec"
"path"
"path/filepath"
"strings"
2024-07-14 23:56:37 +02:00
"sync"
2024-07-07 21:08:41 +02:00
)
//func (h *RequestHandler) ProcessBranchList() []string {
// if h.HasError() {
// return make([]string, 0)
// }
//
// trackedBranches, err := os.ReadFile(path.Join(h.GitPath, DefaultGitPrj, TrackedBranchesFile))
// if err != nil {
// if errors.Is(err, os.ErrNotExist) {
// trackedBranches = []byte("factory")
// } else {
// h.LogError("file error reading '%s' file in repo", TrackedBranchesFile)
// h.Error = err
// return make([]string, 0)
// }
// }
//
// return strings.Split(string(trackedBranches), "\n")
//}
type GitReference struct {
Branch string
Id string
}
type GitReferences struct {
refs []GitReference
}
func (refs *GitReferences) addReference(id, branch string) {
for _, ref := range refs.refs {
if ref.Id == id && ref.Branch == branch {
return
}
}
refs.refs = append(refs.refs, GitReference{Branch: branch, Id: id})
}
func processRefs(gitDir string) ([]GitReference, error) {
packedRefsPath := path.Join(gitDir, "packed-refs")
stat, err := os.Stat(packedRefsPath)
if err != nil {
return nil, err
}
if stat.Size() > 10000 || stat.IsDir() {
return nil, fmt.Errorf("Funny business with 'packed-refs' in '%s'", gitDir)
}
data, err := os.ReadFile(packedRefsPath)
if err != nil {
return nil, err
}
var references GitReferences
for _, line := range strings.Split(string(data), "\n") {
if len(line) < 1 || line[0] == '#' {
continue
}
splitLine := strings.Split(line, " ")
if len(splitLine) != 2 {
return nil, fmt.Errorf("Unexpected packaged-refs entry '%#v' in '%s'", splitLine, packedRefsPath)
}
id, ref := splitLine[0], splitLine[1]
const remoteRefPrefix = "refs/remotes/origin/"
if ref[0:len(remoteRefPrefix)] != remoteRefPrefix {
continue
}
references.addReference(id, ref[len(remoteRefPrefix):])
}
return references.refs, nil
}
func findGitDir(p string) (string, error) {
gitFile := path.Join(p, ".git")
stat, err := os.Stat(gitFile)
if err != nil {
return "", err
}
if stat.IsDir() {
return path.Join(p, ".git"), nil
}
data, err := os.ReadFile(gitFile)
if err != nil {
return "", err
}
for _, line := range strings.Split(string(data), "\n") {
refs := strings.Split(line, ":")
if len(refs) != 2 {
return "", fmt.Errorf("Unknown format of .git file: '%s'\n", line)
}
if refs[0] != "gitdir" {
return "", fmt.Errorf("Unknown header of .git file: '%s'\n", refs[0])
}
return path.Join(p, strings.TrimSpace(refs[1])), nil
}
return "", fmt.Errorf("Can't find git subdirectory in '%s'", p)
}
func (e *RequestHandler) GitBranchHead(gitDir, branchName string) (string, error) {
if e.HasError() {
return "", e.Error
}
path, err := findGitDir(path.Join(e.GitPath, gitDir))
if err != nil {
e.LogError("Error identifying gitdir in `%s`: %#v", gitDir, err)
e.Error = err
}
refs, err := processRefs(path)
if err != nil {
e.LogError("Error finding branches (%s): %#v", branchName, err)
e.Error = err
return "", e.Error
}
for _, ref := range refs {
if ref.Branch == branchName {
return ref.Id, nil
}
}
e.Error = fmt.Errorf("Can't find default remote branch: %s", branchName)
e.LogError("%s", e.Error.Error())
return "", e.Error
}
type ExecStream interface {
Close()
HasError() bool
GitExec(cwd string, param ...string) ExecStream
}
func (e *RequestHandler) Close() {
if e.GitPath == "" {
return
}
e.Error = os.RemoveAll(e.GitPath)
e.GitPath = ""
return
}
func (e *RequestHandler) HasError() bool {
return e.Error != nil
}
type writeFunc func(data []byte) (int, error)
func (f writeFunc) Write(data []byte) (int, error) {
return f(data)
}
2024-07-26 16:53:09 +02:00
func (h writeFunc) UnmarshalText(text []byte) error {
_, err := h.Write(text)
return err
}
func (h writeFunc) Close() error {
_, err := h.Write(nil)
return err
}
2024-07-07 21:08:41 +02:00
func (e *RequestHandler) GitExec(cwd string, params ...string) ExecStream {
if e.Error != nil {
return e
}
cmd := exec.Command("/usr/bin/git", params...)
cmd.Env = []string{
"GIT_CEILING_DIRECTORIES=" + e.GitPath,
"GIT_CONFIG_GLOBAL=/dev/null",
"GIT_AUTHOR_NAME=" + e.GitCommiter,
"EMAIL=not@exist@src.opensuse.org",
"GIT_LFS_SKIP_SMUDGE=1",
"GIT_SSH_COMMAND=/usr/bin/ssh -o StrictHostKeyChecking=yes",
}
cmd.Dir = filepath.Join(e.GitPath, cwd)
cmd.Stdout = writeFunc(func(data []byte) (int, error) {
e.Logger.Log("%s", data)
return len(data), nil
})
cmd.Stderr = writeFunc(func(data []byte) (int, error) {
e.Logger.LogError("%s", data)
return len(data), nil
})
cmd.Stdin = nil
e.Log("git execute: %#v", cmd.Args)
e.Error = cmd.Run()
return e
}
2024-07-11 16:45:49 +02:00
2024-07-14 23:56:37 +02:00
type ChanIO struct {
ch chan byte
}
func (c *ChanIO) Write(p []byte) (int, error) {
2024-07-16 06:56:57 +02:00
for _, b := range p {
c.ch <- b
2024-07-14 23:56:37 +02:00
}
return len(p), nil
}
2024-07-16 06:56:57 +02:00
// read at least 1 byte, but don't block if nothing more in channel
2024-07-16 07:14:12 +02:00
func (c *ChanIO) Read(data []byte) (idx int, err error) {
2024-07-16 06:56:57 +02:00
var ok bool
2024-07-14 23:56:37 +02:00
2024-07-16 06:56:57 +02:00
data[idx], ok = <-c.ch
if !ok {
2024-07-16 07:14:12 +02:00
err = io.EOF
return
2024-07-16 06:56:57 +02:00
}
idx++
for len(c.ch) > 0 && idx < len(data) {
data[idx], ok = <- c.ch
2024-07-14 23:56:37 +02:00
if !ok {
2024-07-16 07:14:12 +02:00
err = io.EOF
return
2024-07-14 23:56:37 +02:00
}
2024-07-16 06:56:57 +02:00
idx++
2024-07-14 23:56:37 +02:00
}
2024-07-16 07:14:12 +02:00
return
2024-07-14 23:56:37 +02:00
}
type gitMsg struct {
hash string
itemType string
size int
}
type commit struct {
Tree string
Msg string
}
type tree_entry struct {
name string
mode int
hash string
2024-07-15 19:19:34 +02:00
size int
2024-07-14 23:56:37 +02:00
}
type tree struct {
items []tree_entry
}
2024-07-15 19:19:34 +02:00
func (t *tree_entry) isSubmodule() bool {
return (t.mode & 0170000) == 0160000
}
func (t *tree_entry) isTree() bool {
return (t.mode & 0170000) == 0040000
}
func (t *tree_entry) isBlob() bool {
return !t.isTree() && !t.isSubmodule()
}
2024-07-14 23:56:37 +02:00
func parseGitMsg(data <-chan byte) (gitMsg, error) {
var id []byte = make([]byte, 64)
var msgType []byte = make([]byte, 16)
var size int
pos := 0
for c := <-data; c != ' '; c = <-data {
if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') {
id[pos] = c
pos++
} else {
2024-07-15 21:22:58 +02:00
return gitMsg{}, fmt.Errorf("Invalid character during object hash parse '%c' at %d", c, pos)
2024-07-14 23:56:37 +02:00
}
}
id = id[:pos]
pos = 0
2024-07-29 15:28:03 +02:00
var c byte
for c = <-data; c != ' ' && c != '\x00'; c = <-data {
2024-07-14 23:56:37 +02:00
if c >= 'a' && c <= 'z' {
msgType[pos] = c
pos++
} else {
2024-07-15 21:14:09 +02:00
return gitMsg{}, fmt.Errorf("Invalid character during object type parse '%c' at %d", c, pos)
2024-07-14 23:56:37 +02:00
}
}
msgType = msgType[:pos]
switch string(msgType) {
2024-07-29 15:28:03 +02:00
case "commit", "tree", "blob":
2024-07-14 23:56:37 +02:00
break
2024-07-29 15:28:03 +02:00
case "missing":
if c != '\x00' {
return gitMsg{}, fmt.Errorf("Missing format weird")
}
return gitMsg{
hash: string(id[:]),
itemType: "missing",
size: 0,
}, fmt.Errorf("Object not found: '%s'", string(id))
2024-07-14 23:56:37 +02:00
default:
return gitMsg{}, fmt.Errorf("Invalid object type: '%s'", string(msgType))
}
2024-07-29 15:28:03 +02:00
for c = <-data; c != '\000'; c = <-data {
2024-07-14 23:56:37 +02:00
if c >= '0' && c <= '9' {
size = size*10 + (int(c) - '0')
} else {
2024-07-15 19:19:34 +02:00
return gitMsg{}, fmt.Errorf("Invalid character during object size parse: '%c'", c)
2024-07-14 23:56:37 +02:00
}
}
return gitMsg{
hash: string(id[:]),
itemType: string(msgType),
size: size,
}, nil
}
func parseGitCommitHdr(data <-chan byte) ([2]string, error) {
hdr := make([]byte, 0, 60)
val := make([]byte, 0, 1000)
c := <-data
if c != '\n' { // end of header marker
for ; c != ' '; c = <-data {
hdr = append(hdr, c)
}
for c := <-data; c != '\n'; c = <-data {
val = append(val, c)
}
}
2024-07-11 16:45:49 +02:00
2024-07-14 23:56:37 +02:00
return [2]string{string(hdr), string(val)}, nil
2024-07-11 16:45:49 +02:00
}
2024-07-14 23:56:37 +02:00
func parseGitCommitMsg(data <-chan byte, l int) (string, error) {
msg := make([]byte, 0, l)
for c := <-data; c != '\x00'; c = <-data {
msg = append(msg, c)
2024-07-29 15:28:03 +02:00
l--
}
// l--
if l != 0 {
return "", fmt.Errorf("Unexpected data in the git commit msg: l=%d", l)
2024-07-14 23:56:37 +02:00
}
return string(msg), nil
}
func parseGitCommit(data <-chan byte) (commit, error) {
hdr, err := parseGitMsg(data)
if err != nil {
return commit{}, err
} else if hdr.itemType != "commit" {
return commit{}, fmt.Errorf("expected commit but parsed %s", hdr.itemType)
}
var c commit
l := hdr.size
for {
hdr, err := parseGitCommitHdr(data)
if err != nil {
return commit{}, nil
}
if len(hdr[0])+len(hdr[1]) == 0 { // hdr end marker
break
}
switch hdr[0] {
case "tree":
c.Tree = hdr[1]
}
l -= len(hdr[0]) + len(hdr[1]) + 2
}
l--
c.Msg, err = parseGitCommitMsg(data, l)
return c, err
}
2024-07-15 19:19:34 +02:00
func parseTreeEntry(data <-chan byte, hashLen int) (tree_entry, error) {
2024-07-14 23:56:37 +02:00
var e tree_entry
2024-07-15 19:19:34 +02:00
for c := <-data; c != ' '; c = <-data {
e.mode = e.mode*8 + int(c-'0')
e.size++
}
e.size++
name := make([]byte, 0, 128)
for c := <-data; c != '\x00'; c = <-data {
name = append(name, c)
e.size++
2024-07-14 23:59:48 +02:00
}
2024-07-15 19:19:34 +02:00
e.size++
e.name = string(name)
const hexBinToAscii = "0123456789abcdef"
hash := make([]byte, 0, hashLen*2)
for range hashLen {
c := <-data
hash = append(hash, hexBinToAscii[((c&0xF0)>>4)], hexBinToAscii[c&0xF])
2024-07-14 23:59:48 +02:00
}
2024-07-15 19:19:34 +02:00
e.hash = string(hash)
e.size += hashLen
2024-07-14 23:59:48 +02:00
2024-07-14 23:56:37 +02:00
return e, nil
}
func parseGitTree(data <-chan byte) (tree, error) {
2024-07-15 19:19:34 +02:00
hdr, err := parseGitMsg(data)
if err != nil {
return tree{}, err
}
// max capacity to length of hash
t := tree{items: make([]tree_entry, 0, hdr.size/len(hdr.hash))}
2024-07-29 15:28:03 +02:00
parsedLen := 0
for parsedLen < hdr.size {
2024-07-15 19:19:34 +02:00
entry, err := parseTreeEntry(data, len(hdr.hash)/2)
if err != nil {
return tree{}, nil
}
t.items = append(t.items, entry)
parsedLen += entry.size
}
2024-07-29 15:28:03 +02:00
c := <-data // \0 read
if c != '\x00' {
return t, fmt.Errorf("Unexpected character during git tree data read")
}
if parsedLen != hdr.size {
return t, fmt.Errorf("Invalid size of git tree data")
}
2024-07-15 19:19:34 +02:00
return t, nil
2024-07-14 23:56:37 +02:00
}
2024-07-29 15:28:03 +02:00
func parseGitBlob(data <-chan byte) ([]byte, error) {
hdr, err := parseGitMsg(data)
if err != nil {
return []byte{}, err
}
d := make([]byte, hdr.size)
for l:=0; l<hdr.size; l++ {
d[l] = <-data
}
eob := <-data
if eob != '\x00' {
return d, fmt.Errorf("invalid byte read in parseGitBlob")
}
return d, nil
}
// TODO: support sub-trees
func (e *RequestHandler) GitCatFile(cwd, commitId, filename string) []byte {
var done sync.Mutex
var data []byte
done.Lock()
data_in, data_out := ChanIO{make(chan byte, 256)}, ChanIO{make(chan byte, 70)}
go func() {
defer done.Unlock()
defer close(data_out.ch)
data_out.Write([]byte(commitId))
data_out.ch <- '\x00'
c, err := parseGitCommit(data_in.ch)
if err != nil {
e.Error = err
e.LogError("Error parsing git commit: %v", err)
return
}
data_out.Write([]byte(c.Tree))
data_out.ch <- '\x00'
tree, err := parseGitTree(data_in.ch)
if err != nil {
e.Error = err
e.LogError("Error parsing git tree: %v", err)
return
}
for _, te := range tree.items {
if te.isBlob() && te.name == filename {
data_out.Write([]byte(te.hash))
data_out.ch <- '\x00'
data, err = parseGitBlob(data_in.ch)
if err != nil {
e.Error = err
e.LogError("Error reading blob data: %v", err)
}
return
}
}
e.Error = fmt.Errorf("file not found: '%s'", filename)
e.LogPlainError(e.Error)
}()
cmd := exec.Command("/usr/bin/git", "cat-file", "--batch", "-Z")
cmd.Env = []string{
"GIT_CEILING_DIRECTORIES=" + e.GitPath,
"GIT_CONFIG_GLOBAL=/dev/null",
}
cmd.Dir = filepath.Join(e.GitPath, cwd)
cmd.Stdout = &data_in
cmd.Stdin = &data_out
cmd.Stderr = writeFunc(func(data []byte) (int, error) {
e.Logger.LogError("%s", data)
return len(data), nil
})
e.Log("command run: %v", cmd.Args)
e.Error = cmd.Run()
done.Lock()
return data
}
2024-07-26 16:53:09 +02:00
func (e *RequestHandler) GitSubmoduleList(cwd, commitId string) map[string]string {
var done sync.Mutex
submoduleList := make(map[string]string)
2024-07-31 16:52:02 +02:00
if e.HasError() {
return submoduleList
}
2024-07-26 16:53:09 +02:00
done.Lock()
data_in, data_out := ChanIO{make(chan byte, 256)}, ChanIO{make(chan byte, 70)}
go func() {
defer done.Unlock()
defer close(data_out.ch)
data_out.Write([]byte(commitId))
data_out.ch <- '\x00'
c, err := parseGitCommit(data_in.ch)
if err != nil {
e.Error = err
e.LogError("Error parsing git commit: %v", err)
return
}
data_out.Write([]byte(c.Tree))
data_out.ch <- '\x00'
tree, err := parseGitTree(data_in.ch)
if err != nil {
e.Error = err
e.LogError("Error parsing git tree: %v", err)
return
}
for _, te := range tree.items {
if te.isSubmodule() {
submoduleList[te.name] = te.hash
}
}
}()
cmd := exec.Command("/usr/bin/git", "cat-file", "--batch", "-Z")
cmd.Env = []string{
"GIT_CEILING_DIRECTORIES=" + e.GitPath,
"GIT_CONFIG_GLOBAL=/dev/null",
}
cmd.Dir = filepath.Join(e.GitPath, cwd)
cmd.Stdout = &data_in
cmd.Stdin = &data_out
cmd.Stderr = writeFunc(func(data []byte) (int, error) {
e.Logger.LogError("%s", data)
return len(data), nil
})
e.Log("command run: %v", cmd.Args)
e.Error = cmd.Run()
done.Lock()
return submoduleList
}
2024-07-15 19:19:34 +02:00
func (e *RequestHandler) GitSubmoduleCommitId(cwd, packageName, commitId string) (string, bool) {
2024-07-11 16:45:49 +02:00
if e.Error != nil {
return "", false
}
2024-07-14 23:56:37 +02:00
data_in, data_out := ChanIO{make(chan byte, 256)}, ChanIO{make(chan byte, 70)}
2024-07-15 19:19:34 +02:00
var subCommitId string
2024-07-14 23:56:37 +02:00
var foundLock sync.Mutex
foundLock.Lock()
2024-07-15 21:16:01 +02:00
e.Log("getting commit id '%s' from git at '%s' with packageName: %s", commitId, cwd, packageName)
2024-07-14 23:56:37 +02:00
go func() {
defer foundLock.Unlock()
defer close(data_out.ch)
2024-07-15 19:19:34 +02:00
data_out.Write([]byte(commitId))
data_out.ch <- '\x00'
2024-07-14 23:56:37 +02:00
c, err := parseGitCommit(data_in.ch)
if err != nil {
e.Error = err
e.LogError("Error parsing git commit: %v", err)
return
}
data_out.Write([]byte(c.Tree))
2024-07-15 19:19:34 +02:00
data_out.ch <- '\x00'
2024-07-14 23:56:37 +02:00
tree, err := parseGitTree(data_in.ch)
if err != nil {
e.Error = err
e.LogError("Error parsing git tree: %v", err)
return
}
for _, te := range tree.items {
2024-07-15 19:19:34 +02:00
if te.name == packageName && te.isSubmodule() {
subCommitId = te.hash
2024-07-14 23:56:37 +02:00
return
}
}
}()
cmd := exec.Command("/usr/bin/git", "cat-file", "--batch", "-Z")
2024-07-15 21:20:33 +02:00
cmd.Env = []string{
"GIT_CEILING_DIRECTORIES=" + e.GitPath,
"GIT_CONFIG_GLOBAL=/dev/null",
}
2024-07-11 16:45:49 +02:00
cmd.Dir = filepath.Join(e.GitPath, cwd)
2024-07-14 23:56:37 +02:00
cmd.Stdout = &data_in
cmd.Stdin = &data_out
2024-07-15 21:20:33 +02:00
cmd.Stderr = writeFunc(func(data []byte) (int, error) {
e.Logger.LogError("%s", data)
return len(data), nil
})
2024-07-16 07:14:12 +02:00
e.Log("command run: %v", cmd.Args)
e.Error = cmd.Run()
2024-07-11 16:45:49 +02:00
2024-07-14 23:56:37 +02:00
foundLock.Lock()
2024-07-15 19:19:34 +02:00
return subCommitId, len(subCommitId) == len(commitId)
2024-07-11 16:45:49 +02:00
}