Compare commits

..

3 Commits

Author SHA256 Message Date
572e33111b workflow-pr: fix race conditions in event processing
All checks were successful
go-generate-check / go-generate-check (pull_request) Successful in 12s
Integration tests / t (pull_request) Successful in 11m31s
Three targeted fixes for the reviewer add/remove race condition:

1. Per-PR serialization lock (prLocks sync.Map)
   Both the RabbitMQ event loop and the ConsistencyCheckProcess goroutine
   converge on ProcesPullRequest with no mutual exclusion, allowing them
   to race on AssignReviewers/UnrequestReview for the same PR.  A
   buffered-channel-per-PR-key mutex serialises all processing for a
   given PR regardless of which goroutine triggers it.

2. Self-triggered pull_request_sync filter
   The bot's own pushes to prjgit branches cause Gitea to fire a
   pull_request_sync event back, which would re-run AssignReviewers on
   already-settled reviewer state, producing spurious add/remove cycles.
   Events where Sender.Username matches the bot's own account are now
   dropped early in ProcessFunc.

3. Requeue retry moved to background goroutine
   The updatePrjGitError_requeue path was sleeping 5 s inside the event
   loop goroutine (blocking all further event processing) then calling
   ProcessFunc recursively.  It now schedules the retry in a goroutine;
   the per-PR lock prevents the retry from racing with events that arrive
   during the sleep window.  The recursive counter is removed as it only
   guarded this path.
2026-03-10 16:24:05 +01:00
b04755c667 Merge branch 'main' into refactoring-make
Some checks failed
Integration tests / t (pull_request) Failing after 11m36s
Integration tests / t (push) Failing after 11m46s
2026-03-10 12:30:20 +01:00
1fc0be5f60 make: refactor build target into per-module dependencies
Some checks failed
Integration tests / t (pull_request) Has been cancelled
- Replace shell loop in build with explicit module targets
- Add .PHONY for build and module targets
- Keep go build -C <module> -buildmode=pie behavior unchanged
- Enable parallel builds via make -jN build
2026-03-06 11:33:32 +01:00
5 changed files with 54 additions and 36 deletions

View File

@@ -1,4 +1,8 @@
MODULES := devel-importer utils/hujson utils/maintainer-update gitea-events-rabbitmq-publisher gitea_status_proxy group-review obs-forward-bot obs-staging-bot obs-status-service workflow-direct workflow-pr
build:
for m in $(MODULES); do go build -C $$m -buildmode=pie || exit 1 ; done
.PHONY: build $(MODULES)
build: $(MODULES)
$(MODULES):
go build -C $@ -buildmode=pie

View File

@@ -1,7 +0,0 @@
#!BuildTag: openbuildservice/gwf-test-basecontainer
#!UseOBSRepositories
FROM registry.suse.com/bci/bci-base:15.7
RUN zypper -n install binutils gawk git git-core git-lfs jq make openssh openssh-clients sqlite3 vim which
RUN zypper -n install autogits-gitea-events-rabbitmq-publisher autogits-obs-staging-bot autogits-workflow-pr gitea

View File

@@ -1,5 +0,0 @@
#!BuildTag: openbuildservice/gwf-client
#!UseOBSRepositories
FROM opensuse/tumbleweed
RUN zypper -n install podman podman-compose vim make python3-pytest python3-requests python3-pytest-dependency python3-pytest-httpserver

View File

@@ -8,6 +8,7 @@ import (
"runtime/debug"
"slices"
"strings"
"sync"
"time"
"github.com/opentracing/opentracing-go/log"
@@ -628,9 +629,29 @@ func (pr *PRProcessor) Process(req *models.PullRequest) error {
return err
}
// prLocks serialises concurrent processing of the same PR.
// Both the RabbitMQ event loop and the consistency-checker goroutine call
// ProcesPullRequest; without this lock they can race on reviewer add/remove.
// Key format: "org/repo#num"
var prLocks sync.Map // map[string]chan struct{}
func prLockKey(pr *models.PullRequest) string {
return fmt.Sprintf("%s/%s#%d", pr.Base.Repo.Owner.UserName, pr.Base.Repo.Name, pr.Index)
}
func acquirePRLock(key string) chan struct{} {
v, _ := prLocks.LoadOrStore(key, make(chan struct{}, 1))
ch := v.(chan struct{})
ch <- struct{}{}
return ch
}
func releasePRLock(ch chan struct{}) {
<-ch
}
type RequestProcessor struct {
configuredRepos map[string][]*common.AutogitConfig
recursive int
}
func (w *RequestProcessor) Process(pr *models.PullRequest) error {
@@ -647,6 +668,9 @@ func ProcesPullRequest(pr *models.PullRequest, configs []*common.AutogitConfig)
return nil
}
lock := acquirePRLock(prLockKey(pr))
defer releasePRLock(lock)
PRProcessor, err := AllocatePRProcessor(pr, configs)
if err != nil {
log.Error(err)
@@ -663,17 +687,23 @@ func (w *RequestProcessor) ProcessFunc(request *common.Request) (err error) {
common.LogInfo("panic cought --- recovered")
common.LogError(string(debug.Stack()))
}
w.recursive--
}()
w.recursive++
if w.recursive > 3 {
common.LogError("Recursion limit reached... something is wrong with this PR?")
return nil
}
var pr *models.PullRequest
if req, ok := request.Data.(*common.PullRequestWebhookEvent); ok {
// Skip pull_request_sync events triggered by the bot's own pushes to
// prjgit branches. Those would re-run AssignReviewers immediately
// after the bot itself just set them, producing spurious add/remove
// cycles. Human-triggered syncs have a different sender and are still
// processed normally.
if request.Type == common.RequestType_PRSync && CurrentUser != nil &&
req.Sender.Username == CurrentUser.UserName {
common.LogDebug("Skipping self-triggered pull_request_sync from", req.Sender.Username,
"on", req.Pull_Request.Base.Repo.Owner.Username+"/"+req.Pull_Request.Base.Repo.Name,
"#", req.Pull_Request.Number)
return nil
}
pr, err = Gitea.GetPullRequest(req.Pull_Request.Base.Repo.Owner.Username, req.Pull_Request.Base.Repo.Name, req.Pull_Request.Number)
if err != nil {
common.LogError("Cannot find PR for issue:", req.Pull_Request.Base.Repo.Owner.Username, req.Pull_Request.Base.Repo.Name, req.Pull_Request.Number)
@@ -710,8 +740,16 @@ func (w *RequestProcessor) ProcessFunc(request *common.Request) (err error) {
common.LogError("*** Cannot find config for org:", pr.Base.Repo.Owner.UserName)
}
if err = ProcesPullRequest(pr, configs); err == updatePrjGitError_requeue {
time.Sleep(time.Second * 5)
return w.ProcessFunc(request)
// Retry after a delay in a background goroutine so the event loop is
// not blocked while we wait. The per-PR lock inside ProcesPullRequest
// ensures no other processing races with the retry.
go func() {
time.Sleep(time.Second * 5)
if err := ProcesPullRequest(pr, configs); err != nil {
common.LogError("requeue retry failed:", err)
}
}()
return nil
}
return err
}

View File

@@ -989,18 +989,6 @@ func TestProcessFunc(t *testing.T) {
}
})
t.Run("Recursion limit", func(t *testing.T) {
reqProc.recursive = 3
err := reqProc.ProcessFunc(&common.Request{})
if err != nil {
t.Errorf("Expected nil error on recursion limit, got %v", err)
}
if reqProc.recursive != 3 {
t.Errorf("Expected recursive to be 3, got %d", reqProc.recursive)
}
reqProc.recursive = 0 // Reset
})
t.Run("Invalid data format", func(t *testing.T) {
err := reqProc.ProcessFunc(&common.Request{Data: nil})
if err == nil || !strings.Contains(err.Error(), "Invalid data format") {