runc/0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
Aleksa Sarai 25576708db Accepting request 1159948 from home:cyphar:docker
- Add upstream patch <https://github.com/opencontainers/runc/pull/4219> to
  properly fix -ENOSYS stub on ppc64le. bsc#1192051 bsc#1221050
  + 0001-bsc1221050-libct-seccomp-patchbpf-rm-duplicated-code.patch
  + 0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
  + 0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch

OBS-URL: https://build.opensuse.org/request/show/1159948
OBS-URL: https://build.opensuse.org/package/show/Virtualization:containers/runc?expand=0&rev=156
2024-03-21 03:51:32 +00:00

163 lines
6.7 KiB
Diff

From d8014991cb2555c17a8828d664a4b7b3924b1e9b Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Wed, 13 Mar 2024 16:12:51 +1100
Subject: [PATCH 3/3] bsc1221050: seccomp: patchbpf: always include native
architecture in stub
(This is a backport of 376417ba7646f05ddb1efa8fe30e2a3b53cf673b.)
It turns out that on ppc64le (at least), Docker doesn't include any
architectures in the list of allowed architectures. libseccomp
interprets this as "just include the default architecture" but patchbpf
would return a no-op ENOSYS stub, which would lead to the exact issues
that commit 7a8d7162f9d7 ("seccomp: prepend -ENOSYS stub to all
filters") fixed for other architectures.
So, just always include the running architecture in the list. There's
no real downside.
SUSE-Bugs: 1192051 1221050
Ref: https://bugzilla.suse.com/show_bug.cgi?id=1192051#c6
Reported-by: Fabian Vogt <fvogt@suse.com>
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
---
libcontainer/seccomp/patchbpf/enosys_linux.go | 22 +++++++--
.../seccomp/patchbpf/enosys_linux_test.go | 47 +++++++++++++++++--
2 files changed, 61 insertions(+), 8 deletions(-)
diff --git a/libcontainer/seccomp/patchbpf/enosys_linux.go b/libcontainer/seccomp/patchbpf/enosys_linux.go
index 1b67fda85c64..d459ba8792ca 100644
--- a/libcontainer/seccomp/patchbpf/enosys_linux.go
+++ b/libcontainer/seccomp/patchbpf/enosys_linux.go
@@ -224,16 +224,30 @@ type lastSyscallMap map[linuxAuditArch]map[libseccomp.ScmpArch]libseccomp.ScmpSy
// representation, but SCMP_ARCH_X32 means we have to track cases where the
// same architecture has different largest syscalls based on the mode.
func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
- lastSyscalls := make(lastSyscallMap)
- // Only loop over architectures which are present in the filter. Any other
- // architectures will get the libseccomp bad architecture action anyway.
+ scmpArchs := make(map[libseccomp.ScmpArch]struct{})
for _, ociArch := range config.Architectures {
arch, err := libseccomp.GetArchFromString(ociArch)
if err != nil {
return nil, fmt.Errorf("unable to validate seccomp architecture: %w", err)
}
+ scmpArchs[arch] = struct{}{}
+ }
+ // On architectures like ppc64le, Docker inexplicably doesn't include the
+ // native architecture in the architecture list which results in no
+ // architectures being present in the list at all (rendering the ENOSYS
+ // stub a no-op). So, always include the native architecture.
+ if nativeScmpArch, err := libseccomp.GetNativeArch(); err != nil {
+ return nil, fmt.Errorf("unable to get native arch: %w", err)
+ } else if _, ok := scmpArchs[nativeScmpArch]; !ok {
+ logrus.Debugf("seccomp: adding implied native architecture %v to config set", nativeScmpArch)
+ scmpArchs[nativeScmpArch] = struct{}{}
+ }
+ logrus.Debugf("seccomp: configured architecture set: %s", scmpArchs)
- // Figure out native architecture representation of the architecture.
+ // Only loop over architectures which are present in the filter. Any other
+ // architectures will get the libseccomp bad architecture action anyway.
+ lastSyscalls := make(lastSyscallMap)
+ for arch := range scmpArchs {
auditArch, err := scmpArchToAuditArch(arch)
if err != nil {
return nil, fmt.Errorf("cannot map architecture %v to AUDIT_ARCH_ constant: %w", arch, err)
diff --git a/libcontainer/seccomp/patchbpf/enosys_linux_test.go b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
index bdfeff68adb3..3d442e1daa66 100644
--- a/libcontainer/seccomp/patchbpf/enosys_linux_test.go
+++ b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
@@ -12,6 +12,7 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
libseccomp "github.com/seccomp/libseccomp-golang"
+ "github.com/sirupsen/logrus"
"golang.org/x/net/bpf"
)
@@ -105,6 +106,18 @@ var testArches = []string{
"ppc64le",
"s390",
"s390x",
+ // Dummy value to indicate a configuration with no architecture specified.
+ "native",
+}
+
+var nativeArch string
+
+func init() {
+ scmpNativeArch, err := libseccomp.GetNativeArch()
+ if err != nil {
+ logrus.Panicf("get native arch: %v", err)
+ }
+ nativeArch = scmpNativeArch.String()
}
func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string) {
@@ -155,6 +168,9 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
expected uint32
}
+ if arch == "native" {
+ arch = nativeArch
+ }
scmpArch, err := libseccomp.GetArchFromString(arch)
if err != nil {
t.Fatalf("unknown libseccomp architecture %q: %v", arch, err)
@@ -228,8 +244,15 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
// Test syscalls in the explicit list.
for _, test := range syscallTests {
- // Override the expected value in the two special cases.
- if !archSet[arch] || isAllowAction(defaultAction) {
+ // Override the expected value in the two special cases:
+ // 1. If the default action is allow, the filter won't have
+ // the stub prepended so we expect a fallthrough.
+ // 2. If the executing architecture is not in the architecture
+ // set, then the architecture is not handled by the stub --
+ // *except* in the case of the native architecture (which
+ // is always included in the stub).
+ if isAllowAction(defaultAction) ||
+ (!archSet[arch] && arch != nativeArch) {
test.expected = retFallthrough
}
@@ -263,7 +286,14 @@ var testActions = map[string]configs.Action{
func TestEnosysStub_SingleArch(t *testing.T) {
for _, arch := range testArches {
- arches := []string{arch}
+ var arches []string
+ // "native" indicates a blank architecture field for seccomp, to test
+ // the case where the running architecture was not included in the
+ // architecture. Docker doesn't always set the architecture for some
+ // reason (namely for ppc64le).
+ if arch != "native" {
+ arches = append(arches, arch)
+ }
t.Run("arch="+arch, func(t *testing.T) {
for name, action := range testActions {
t.Run("action="+name, func(t *testing.T) {
@@ -277,7 +307,16 @@ func TestEnosysStub_SingleArch(t *testing.T) {
func TestEnosysStub_MultiArch(t *testing.T) {
for end := 0; end < len(testArches); end++ {
for start := 0; start < end; start++ {
- arches := testArches[start:end]
+ var arches []string
+ for _, arch := range testArches[start:end] {
+ // "native" indicates a blank architecture field for seccomp, to test
+ // the case where the running architecture was not included in the
+ // architecture. Docker doesn't always set the architecture for some
+ // reason (namely for ppc64le).
+ if arch != "native" {
+ arches = append(arches, arch)
+ }
+ }
if len(arches) <= 1 {
continue
}
--
2.44.0