Sync from SUSE:SLFO:Main runc revision 2133c931be9a58b516e6632c4b6eabdc
This commit is contained in:
parent
863c656407
commit
c92af6636d
@ -0,0 +1,44 @@
|
||||
From 22eb87a32dc1c685425b685e96e8472b9ac1b5ca Mon Sep 17 00:00:00 2001
|
||||
From: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||
Date: Fri, 14 Oct 2022 18:37:00 -0700
|
||||
Subject: [PATCH 1/4] bsc1221050: libct/seccomp/patchbpf: rm duplicated code
|
||||
|
||||
(This is a cherry-pick of 2cd05e44b662fb79c46d5ebfd6c71e9ebc98d40c.)
|
||||
|
||||
In findLastSyscalls, we convert libseccomp.ArchNative to the real
|
||||
libseccomp architecture, but archToNative already does that, so
|
||||
this code is redundant.
|
||||
|
||||
Remove the redundant code, and move its comment to archToNative.
|
||||
|
||||
Fixes: 7a8d7162f
|
||||
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
|
||||
---
|
||||
libcontainer/seccomp/patchbpf/enosys_linux.go | 10 ----------
|
||||
1 file changed, 10 deletions(-)
|
||||
|
||||
diff --git a/libcontainer/seccomp/patchbpf/enosys_linux.go b/libcontainer/seccomp/patchbpf/enosys_linux.go
|
||||
index efe6dca58b21..c9c1d4ccb685 100644
|
||||
--- a/libcontainer/seccomp/patchbpf/enosys_linux.go
|
||||
+++ b/libcontainer/seccomp/patchbpf/enosys_linux.go
|
||||
@@ -233,16 +233,6 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
|
||||
return nil, fmt.Errorf("unable to validate seccomp architecture: %w", err)
|
||||
}
|
||||
|
||||
- // Map native architecture to a real architecture value to avoid
|
||||
- // doubling-up the lastSyscall mapping.
|
||||
- if arch == libseccomp.ArchNative {
|
||||
- nativeArch, err := libseccomp.GetNativeArch()
|
||||
- if err != nil {
|
||||
- return nil, fmt.Errorf("unable to get native architecture: %w", err)
|
||||
- }
|
||||
- arch = nativeArch
|
||||
- }
|
||||
-
|
||||
// Figure out native architecture representation of the architecture.
|
||||
nativeArch, err := archToNative(arch)
|
||||
if err != nil {
|
||||
--
|
||||
2.46.0
|
||||
|
289
0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
Normal file
289
0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
Normal file
@ -0,0 +1,289 @@
|
||||
From 558c5ecf487a40001ba854cfcbd5c94223167501 Mon Sep 17 00:00:00 2001
|
||||
From: Aleksa Sarai <cyphar@cyphar.com>
|
||||
Date: Wed, 13 Mar 2024 13:40:16 +1100
|
||||
Subject: [PATCH 2/4] bsc1221050: seccomp: patchbpf: rename nativeArch ->
|
||||
linuxAuditArch
|
||||
|
||||
(This is a backport of 6167f5ffc3e3fd53e6a41a2effa592a4873ad046.)
|
||||
|
||||
Calling the Linux AUDIT_* architecture constants "native" leads to
|
||||
confusing code when we are getting the actual native architecture of the
|
||||
running system.
|
||||
|
||||
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
|
||||
---
|
||||
libcontainer/seccomp/patchbpf/enosys_linux.go | 81 ++++++++++---------
|
||||
.../seccomp/patchbpf/enosys_linux_test.go | 16 ++--
|
||||
2 files changed, 49 insertions(+), 48 deletions(-)
|
||||
|
||||
diff --git a/libcontainer/seccomp/patchbpf/enosys_linux.go b/libcontainer/seccomp/patchbpf/enosys_linux.go
|
||||
index c9c1d4ccb685..1b67fda85c64 100644
|
||||
--- a/libcontainer/seccomp/patchbpf/enosys_linux.go
|
||||
+++ b/libcontainer/seccomp/patchbpf/enosys_linux.go
|
||||
@@ -164,11 +164,11 @@ func disassembleFilter(filter *libseccomp.ScmpFilter) ([]bpf.Instruction, error)
|
||||
return program, nil
|
||||
}
|
||||
|
||||
-type nativeArch uint32
|
||||
+type linuxAuditArch uint32
|
||||
|
||||
-const invalidArch nativeArch = 0
|
||||
+const invalidArch linuxAuditArch = 0
|
||||
|
||||
-func archToNative(arch libseccomp.ScmpArch) (nativeArch, error) {
|
||||
+func scmpArchToAuditArch(arch libseccomp.ScmpArch) (linuxAuditArch, error) {
|
||||
switch arch {
|
||||
case libseccomp.ArchNative:
|
||||
// Convert to actual native architecture.
|
||||
@@ -176,48 +176,48 @@ func archToNative(arch libseccomp.ScmpArch) (nativeArch, error) {
|
||||
if err != nil {
|
||||
return invalidArch, fmt.Errorf("unable to get native arch: %w", err)
|
||||
}
|
||||
- return archToNative(arch)
|
||||
+ return scmpArchToAuditArch(arch)
|
||||
case libseccomp.ArchX86:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_I386), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_I386), nil
|
||||
case libseccomp.ArchAMD64, libseccomp.ArchX32:
|
||||
// NOTE: x32 is treated like x86_64 except all x32 syscalls have the
|
||||
// 30th bit of the syscall number set to indicate that it's not a
|
||||
// normal x86_64 syscall.
|
||||
- return nativeArch(C.C_AUDIT_ARCH_X86_64), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_X86_64), nil
|
||||
case libseccomp.ArchARM:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_ARM), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_ARM), nil
|
||||
case libseccomp.ArchARM64:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_AARCH64), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_AARCH64), nil
|
||||
case libseccomp.ArchMIPS:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_MIPS), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_MIPS), nil
|
||||
case libseccomp.ArchMIPS64:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_MIPS64), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_MIPS64), nil
|
||||
case libseccomp.ArchMIPS64N32:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_MIPS64N32), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_MIPS64N32), nil
|
||||
case libseccomp.ArchMIPSEL:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_MIPSEL), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_MIPSEL), nil
|
||||
case libseccomp.ArchMIPSEL64:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_MIPSEL64), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_MIPSEL64), nil
|
||||
case libseccomp.ArchMIPSEL64N32:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_MIPSEL64N32), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_MIPSEL64N32), nil
|
||||
case libseccomp.ArchPPC:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_PPC), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_PPC), nil
|
||||
case libseccomp.ArchPPC64:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_PPC64), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_PPC64), nil
|
||||
case libseccomp.ArchPPC64LE:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_PPC64LE), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_PPC64LE), nil
|
||||
case libseccomp.ArchS390:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_S390), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_S390), nil
|
||||
case libseccomp.ArchS390X:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_S390X), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_S390X), nil
|
||||
case libseccomp.ArchRISCV64:
|
||||
- return nativeArch(C.C_AUDIT_ARCH_RISCV64), nil
|
||||
+ return linuxAuditArch(C.C_AUDIT_ARCH_RISCV64), nil
|
||||
default:
|
||||
return invalidArch, fmt.Errorf("unknown architecture: %v", arch)
|
||||
}
|
||||
}
|
||||
|
||||
-type lastSyscallMap map[nativeArch]map[libseccomp.ScmpArch]libseccomp.ScmpSyscall
|
||||
+type lastSyscallMap map[linuxAuditArch]map[libseccomp.ScmpArch]libseccomp.ScmpSyscall
|
||||
|
||||
// Figure out largest syscall number referenced in the filter for each
|
||||
// architecture. We will be generating code based on the native architecture
|
||||
@@ -234,17 +234,17 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
|
||||
}
|
||||
|
||||
// Figure out native architecture representation of the architecture.
|
||||
- nativeArch, err := archToNative(arch)
|
||||
+ auditArch, err := scmpArchToAuditArch(arch)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot map architecture %v to AUDIT_ARCH_ constant: %w", arch, err)
|
||||
}
|
||||
|
||||
- if _, ok := lastSyscalls[nativeArch]; !ok {
|
||||
- lastSyscalls[nativeArch] = map[libseccomp.ScmpArch]libseccomp.ScmpSyscall{}
|
||||
+ if _, ok := lastSyscalls[auditArch]; !ok {
|
||||
+ lastSyscalls[auditArch] = map[libseccomp.ScmpArch]libseccomp.ScmpSyscall{}
|
||||
}
|
||||
- if _, ok := lastSyscalls[nativeArch][arch]; ok {
|
||||
+ if _, ok := lastSyscalls[auditArch][arch]; ok {
|
||||
// Because of ArchNative we may hit the same entry multiple times.
|
||||
- // Just skip it if we've seen this (nativeArch, ScmpArch)
|
||||
+ // Just skip it if we've seen this (linuxAuditArch, ScmpArch)
|
||||
// combination before.
|
||||
continue
|
||||
}
|
||||
@@ -262,10 +262,11 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
|
||||
}
|
||||
}
|
||||
if largestSyscall != 0 {
|
||||
- lastSyscalls[nativeArch][arch] = largestSyscall
|
||||
+ logrus.Debugf("seccomp: largest syscall number for arch %v is %v", arch, largestSyscall)
|
||||
+ lastSyscalls[auditArch][arch] = largestSyscall
|
||||
} else {
|
||||
- logrus.Warnf("could not find any syscalls for arch %s", ociArch)
|
||||
- delete(lastSyscalls[nativeArch], arch)
|
||||
+ logrus.Warnf("could not find any syscalls for arch %v", arch)
|
||||
+ delete(lastSyscalls[auditArch], arch)
|
||||
}
|
||||
}
|
||||
return lastSyscalls, nil
|
||||
@@ -283,10 +284,10 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
|
||||
// close_range(2) which were added out-of-order in the syscall table between
|
||||
// kernel releases.
|
||||
func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error) {
|
||||
- // A jump-table for each nativeArch used to generate the initial
|
||||
+ // A jump-table for each linuxAuditArch used to generate the initial
|
||||
// conditional jumps -- measured from the *END* of the program so they
|
||||
// remain valid after prepending to the tail.
|
||||
- archJumpTable := map[nativeArch]uint32{}
|
||||
+ archJumpTable := map[linuxAuditArch]uint32{}
|
||||
|
||||
// Generate our own -ENOSYS rules for each architecture. They have to be
|
||||
// generated in reverse (prepended to the tail of the program) because the
|
||||
@@ -299,7 +300,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
}
|
||||
|
||||
// Generate the syscall -ENOSYS rules.
|
||||
- for nativeArch, maxSyscalls := range lastSyscalls {
|
||||
+ for auditArch, maxSyscalls := range lastSyscalls {
|
||||
// The number of instructions from the tail of this section which need
|
||||
// to be jumped in order to reach the -ENOSYS return. If the section
|
||||
// does not jump, it will fall through to the actual filter.
|
||||
@@ -380,7 +381,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
|
||||
// If we're on x86 we need to add a check for x32 and if we're in
|
||||
// the wrong mode we jump over the section.
|
||||
- if uint32(nativeArch) == uint32(C.C_AUDIT_ARCH_X86_64) {
|
||||
+ if uint32(auditArch) == uint32(C.C_AUDIT_ARCH_X86_64) {
|
||||
// Generate a prefix to check the mode.
|
||||
switch scmpArch {
|
||||
case libseccomp.ArchAMD64:
|
||||
@@ -409,8 +410,8 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
section = append(section, sectionTail...)
|
||||
case 2:
|
||||
// x32 and x86_64 are a unique case, we can't handle any others.
|
||||
- if uint32(nativeArch) != uint32(C.C_AUDIT_ARCH_X86_64) {
|
||||
- return nil, fmt.Errorf("unknown architecture overlap on native arch %#x", nativeArch)
|
||||
+ if uint32(auditArch) != uint32(C.C_AUDIT_ARCH_X86_64) {
|
||||
+ return nil, fmt.Errorf("unknown architecture overlap on native arch %#x", auditArch)
|
||||
}
|
||||
|
||||
x32sysno, ok := maxSyscalls[libseccomp.ArchX32]
|
||||
@@ -487,7 +488,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
programTail = append(section, programTail...)
|
||||
|
||||
// Update jump table.
|
||||
- archJumpTable[nativeArch] = uint32(len(programTail))
|
||||
+ archJumpTable[auditArch] = uint32(len(programTail))
|
||||
}
|
||||
|
||||
// Add a dummy "jump to filter" for any architecture we might miss below.
|
||||
@@ -507,9 +508,9 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
// architectures based on how large the jumps are going to be, or
|
||||
// re-sort the candidate architectures each time to make sure that we
|
||||
// pick the largest jump which is going to be smaller than 255.
|
||||
- for nativeArch := range lastSyscalls {
|
||||
+ for auditArch := range lastSyscalls {
|
||||
// We jump forwards but the jump table is calculated from the *END*.
|
||||
- jump := uint32(len(programTail)) - archJumpTable[nativeArch]
|
||||
+ jump := uint32(len(programTail)) - archJumpTable[auditArch]
|
||||
|
||||
// Same routine as above -- this is a basic jeq check, complicated
|
||||
// slightly if it turns out that we need to do a long jump.
|
||||
@@ -518,7 +519,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
// jeq [arch],[jump]
|
||||
bpf.JumpIf{
|
||||
Cond: bpf.JumpEqual,
|
||||
- Val: uint32(nativeArch),
|
||||
+ Val: uint32(auditArch),
|
||||
SkipTrue: uint8(jump),
|
||||
},
|
||||
}, programTail...)
|
||||
@@ -527,7 +528,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
// jne [arch],1
|
||||
bpf.JumpIf{
|
||||
Cond: bpf.JumpNotEqual,
|
||||
- Val: uint32(nativeArch),
|
||||
+ Val: uint32(auditArch),
|
||||
SkipTrue: 1,
|
||||
},
|
||||
// ja [jump]
|
||||
diff --git a/libcontainer/seccomp/patchbpf/enosys_linux_test.go b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
|
||||
index e2d363a43bd3..bdfeff68adb3 100644
|
||||
--- a/libcontainer/seccomp/patchbpf/enosys_linux_test.go
|
||||
+++ b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
|
||||
@@ -23,7 +23,7 @@ type seccompData struct {
|
||||
}
|
||||
|
||||
// mockSyscallPayload creates a fake seccomp_data struct with the given data.
|
||||
-func mockSyscallPayload(t *testing.T, sysno libseccomp.ScmpSyscall, arch nativeArch, args ...uint64) []byte {
|
||||
+func mockSyscallPayload(t *testing.T, sysno libseccomp.ScmpSyscall, arch linuxAuditArch, args ...uint64) []byte {
|
||||
var buf bytes.Buffer
|
||||
|
||||
data := seccompData{
|
||||
@@ -150,8 +150,8 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
|
||||
|
||||
for _, arch := range testArches {
|
||||
type syscallTest struct {
|
||||
- syscall string
|
||||
sysno libseccomp.ScmpSyscall
|
||||
+ syscall string
|
||||
expected uint32
|
||||
}
|
||||
|
||||
@@ -160,7 +160,7 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
|
||||
t.Fatalf("unknown libseccomp architecture %q: %v", arch, err)
|
||||
}
|
||||
|
||||
- nativeArch, err := archToNative(scmpArch)
|
||||
+ auditArch, err := scmpArchToAuditArch(scmpArch)
|
||||
if err != nil {
|
||||
t.Fatalf("unknown audit architecture %q: %v", arch, err)
|
||||
}
|
||||
@@ -179,9 +179,9 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
|
||||
t.Fatalf("unknown syscall %q on arch %q: %v", syscall, arch, err)
|
||||
}
|
||||
syscallTests = append(syscallTests, syscallTest{
|
||||
- syscall,
|
||||
- sysno,
|
||||
- expected,
|
||||
+ sysno: sysno,
|
||||
+ syscall: syscall,
|
||||
+ expected: expected,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -233,7 +233,7 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
|
||||
test.expected = retFallthrough
|
||||
}
|
||||
|
||||
- payload := mockSyscallPayload(t, test.sysno, nativeArch, 0x1337, 0xF00BA5)
|
||||
+ payload := mockSyscallPayload(t, test.sysno, auditArch, 0x1337, 0xF00BA5)
|
||||
// NOTE: golang.org/x/net/bpf returns int here rather
|
||||
// than uint32.
|
||||
rawRet, err := filter.Run(payload)
|
||||
@@ -247,7 +247,7 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
|
||||
t.Logf(" [%4.1d] %s", idx, insn)
|
||||
}
|
||||
t.Logf("payload: %#v", payload)
|
||||
- t.Errorf("filter %s(%d) %q(%d): got %#x, want %#x", arch, nativeArch, test.syscall, test.sysno, ret, test.expected)
|
||||
+ t.Errorf("filter %s(%d) %q(%d): got %#x, want %#x", arch, auditArch, test.syscall, test.sysno, ret, test.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
--
|
||||
2.46.0
|
||||
|
162
0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
Normal file
162
0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
Normal file
@ -0,0 +1,162 @@
|
||||
From a1e9b2e4015a6b548a0d3e004bf27dd2e3f2cf35 Mon Sep 17 00:00:00 2001
|
||||
From: Aleksa Sarai <cyphar@cyphar.com>
|
||||
Date: Wed, 13 Mar 2024 16:12:51 +1100
|
||||
Subject: [PATCH 3/4] bsc1221050: seccomp: patchbpf: always include native
|
||||
architecture in stub
|
||||
|
||||
(This is a backport of 376417ba7646f05ddb1efa8fe30e2a3b53cf673b.)
|
||||
|
||||
It turns out that on ppc64le (at least), Docker doesn't include any
|
||||
architectures in the list of allowed architectures. libseccomp
|
||||
interprets this as "just include the default architecture" but patchbpf
|
||||
would return a no-op ENOSYS stub, which would lead to the exact issues
|
||||
that commit 7a8d7162f9d7 ("seccomp: prepend -ENOSYS stub to all
|
||||
filters") fixed for other architectures.
|
||||
|
||||
So, just always include the running architecture in the list. There's
|
||||
no real downside.
|
||||
|
||||
SUSE-Bugs: 1192051 1221050
|
||||
Ref: https://bugzilla.suse.com/show_bug.cgi?id=1192051#c6
|
||||
Reported-by: Fabian Vogt <fvogt@suse.com>
|
||||
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
|
||||
---
|
||||
libcontainer/seccomp/patchbpf/enosys_linux.go | 22 +++++++--
|
||||
.../seccomp/patchbpf/enosys_linux_test.go | 47 +++++++++++++++++--
|
||||
2 files changed, 61 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/libcontainer/seccomp/patchbpf/enosys_linux.go b/libcontainer/seccomp/patchbpf/enosys_linux.go
|
||||
index 1b67fda85c64..d459ba8792ca 100644
|
||||
--- a/libcontainer/seccomp/patchbpf/enosys_linux.go
|
||||
+++ b/libcontainer/seccomp/patchbpf/enosys_linux.go
|
||||
@@ -224,16 +224,30 @@ type lastSyscallMap map[linuxAuditArch]map[libseccomp.ScmpArch]libseccomp.ScmpSy
|
||||
// representation, but SCMP_ARCH_X32 means we have to track cases where the
|
||||
// same architecture has different largest syscalls based on the mode.
|
||||
func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
|
||||
- lastSyscalls := make(lastSyscallMap)
|
||||
- // Only loop over architectures which are present in the filter. Any other
|
||||
- // architectures will get the libseccomp bad architecture action anyway.
|
||||
+ scmpArchs := make(map[libseccomp.ScmpArch]struct{})
|
||||
for _, ociArch := range config.Architectures {
|
||||
arch, err := libseccomp.GetArchFromString(ociArch)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to validate seccomp architecture: %w", err)
|
||||
}
|
||||
+ scmpArchs[arch] = struct{}{}
|
||||
+ }
|
||||
+ // On architectures like ppc64le, Docker inexplicably doesn't include the
|
||||
+ // native architecture in the architecture list which results in no
|
||||
+ // architectures being present in the list at all (rendering the ENOSYS
|
||||
+ // stub a no-op). So, always include the native architecture.
|
||||
+ if nativeScmpArch, err := libseccomp.GetNativeArch(); err != nil {
|
||||
+ return nil, fmt.Errorf("unable to get native arch: %w", err)
|
||||
+ } else if _, ok := scmpArchs[nativeScmpArch]; !ok {
|
||||
+ logrus.Debugf("seccomp: adding implied native architecture %v to config set", nativeScmpArch)
|
||||
+ scmpArchs[nativeScmpArch] = struct{}{}
|
||||
+ }
|
||||
+ logrus.Debugf("seccomp: configured architecture set: %s", scmpArchs)
|
||||
|
||||
- // Figure out native architecture representation of the architecture.
|
||||
+ // Only loop over architectures which are present in the filter. Any other
|
||||
+ // architectures will get the libseccomp bad architecture action anyway.
|
||||
+ lastSyscalls := make(lastSyscallMap)
|
||||
+ for arch := range scmpArchs {
|
||||
auditArch, err := scmpArchToAuditArch(arch)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot map architecture %v to AUDIT_ARCH_ constant: %w", arch, err)
|
||||
diff --git a/libcontainer/seccomp/patchbpf/enosys_linux_test.go b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
|
||||
index bdfeff68adb3..3d442e1daa66 100644
|
||||
--- a/libcontainer/seccomp/patchbpf/enosys_linux_test.go
|
||||
+++ b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
||||
libseccomp "github.com/seccomp/libseccomp-golang"
|
||||
+ "github.com/sirupsen/logrus"
|
||||
"golang.org/x/net/bpf"
|
||||
)
|
||||
|
||||
@@ -105,6 +106,18 @@ var testArches = []string{
|
||||
"ppc64le",
|
||||
"s390",
|
||||
"s390x",
|
||||
+ // Dummy value to indicate a configuration with no architecture specified.
|
||||
+ "native",
|
||||
+}
|
||||
+
|
||||
+var nativeArch string
|
||||
+
|
||||
+func init() {
|
||||
+ scmpNativeArch, err := libseccomp.GetNativeArch()
|
||||
+ if err != nil {
|
||||
+ logrus.Panicf("get native arch: %v", err)
|
||||
+ }
|
||||
+ nativeArch = scmpNativeArch.String()
|
||||
}
|
||||
|
||||
func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string) {
|
||||
@@ -155,6 +168,9 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
|
||||
expected uint32
|
||||
}
|
||||
|
||||
+ if arch == "native" {
|
||||
+ arch = nativeArch
|
||||
+ }
|
||||
scmpArch, err := libseccomp.GetArchFromString(arch)
|
||||
if err != nil {
|
||||
t.Fatalf("unknown libseccomp architecture %q: %v", arch, err)
|
||||
@@ -228,8 +244,15 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
|
||||
|
||||
// Test syscalls in the explicit list.
|
||||
for _, test := range syscallTests {
|
||||
- // Override the expected value in the two special cases.
|
||||
- if !archSet[arch] || isAllowAction(defaultAction) {
|
||||
+ // Override the expected value in the two special cases:
|
||||
+ // 1. If the default action is allow, the filter won't have
|
||||
+ // the stub prepended so we expect a fallthrough.
|
||||
+ // 2. If the executing architecture is not in the architecture
|
||||
+ // set, then the architecture is not handled by the stub --
|
||||
+ // *except* in the case of the native architecture (which
|
||||
+ // is always included in the stub).
|
||||
+ if isAllowAction(defaultAction) ||
|
||||
+ (!archSet[arch] && arch != nativeArch) {
|
||||
test.expected = retFallthrough
|
||||
}
|
||||
|
||||
@@ -263,7 +286,14 @@ var testActions = map[string]configs.Action{
|
||||
|
||||
func TestEnosysStub_SingleArch(t *testing.T) {
|
||||
for _, arch := range testArches {
|
||||
- arches := []string{arch}
|
||||
+ var arches []string
|
||||
+ // "native" indicates a blank architecture field for seccomp, to test
|
||||
+ // the case where the running architecture was not included in the
|
||||
+ // architecture. Docker doesn't always set the architecture for some
|
||||
+ // reason (namely for ppc64le).
|
||||
+ if arch != "native" {
|
||||
+ arches = append(arches, arch)
|
||||
+ }
|
||||
t.Run("arch="+arch, func(t *testing.T) {
|
||||
for name, action := range testActions {
|
||||
t.Run("action="+name, func(t *testing.T) {
|
||||
@@ -277,7 +307,16 @@ func TestEnosysStub_SingleArch(t *testing.T) {
|
||||
func TestEnosysStub_MultiArch(t *testing.T) {
|
||||
for end := 0; end < len(testArches); end++ {
|
||||
for start := 0; start < end; start++ {
|
||||
- arches := testArches[start:end]
|
||||
+ var arches []string
|
||||
+ for _, arch := range testArches[start:end] {
|
||||
+ // "native" indicates a blank architecture field for seccomp, to test
|
||||
+ // the case where the running architecture was not included in the
|
||||
+ // architecture. Docker doesn't always set the architecture for some
|
||||
+ // reason (namely for ppc64le).
|
||||
+ if arch != "native" {
|
||||
+ arches = append(arches, arch)
|
||||
+ }
|
||||
+ }
|
||||
if len(arches) <= 1 {
|
||||
continue
|
||||
}
|
||||
--
|
||||
2.46.0
|
||||
|
136
0004-bsc1214960-nsenter-cloned_binary-remove-bindfd-logic.patch
Normal file
136
0004-bsc1214960-nsenter-cloned_binary-remove-bindfd-logic.patch
Normal file
@ -0,0 +1,136 @@
|
||||
From 0f1f8e303cf1919c33952f4938e5637d8f77f907 Mon Sep 17 00:00:00 2001
|
||||
From: Aleksa Sarai <cyphar@cyphar.com>
|
||||
Date: Fri, 7 Jul 2023 22:45:44 +1000
|
||||
Subject: [PATCH 4/4] bsc1214960: nsenter: cloned_binary: remove bindfd logic
|
||||
entirely
|
||||
|
||||
(This is a cherry-pick of b999376fb237195265081a8b8ba3fd3bd6ef8c2c.)
|
||||
|
||||
While the ro-bind-mount trick did eliminate the memory overhead of
|
||||
copying the runc binary for each "runc init" invocation, on machines
|
||||
with very significant container churn, creating a temporary mount
|
||||
namespace on every container invocation can trigger severe lock
|
||||
contention on namespace_sem that makes containers fail to spawn.
|
||||
|
||||
The only reason we added bindfd in commit 16612d74de5f ("nsenter:
|
||||
cloned_binary: try to ro-bind /proc/self/exe before copying") was due to
|
||||
a Kubernetes e2e test failure where they had a ridiculously small memory
|
||||
limit. It seems incredibly unlikely that real workloads are running
|
||||
without 10MB to spare for the very short time that runc is interacting
|
||||
with the container.
|
||||
|
||||
In addition, since the original cloned_binary implementation, cgroupv2
|
||||
is now almost universally used on modern systems. Unlike cgroupv1, the
|
||||
cgroupv2 memcg implementation does not migrate memory usage when
|
||||
processes change cgroups (even cgroupv1 only did this if you had
|
||||
memory.move_charge_at_immigrate enabled). In addition, because we do the
|
||||
/proc/self/exe clone before synchronising the bootstrap data read, we
|
||||
are guaranteed to do the clone before "runc init" is moved into the
|
||||
container cgroup -- meaning that the memory used by the /proc/self/exe
|
||||
clone is charged against the root cgroup, and thus container workloads
|
||||
should not be affected at all with memfd cloning.
|
||||
|
||||
The long-term fix for this problem is to block the /proc/self/exe
|
||||
re-opening attack entirely in-kernel, which is something I'm working
|
||||
on[1]. Though it should also be noted that because the memfd is
|
||||
completely separate to the host binary, even attacks like Dirty COW
|
||||
against the runc binary can be defended against with the memfd approach.
|
||||
Of course, once we have in-kernel protection against the /proc/self/exe
|
||||
re-opening attack, we won't have that protection anymore...
|
||||
|
||||
[1]: https://lwn.net/Articles/934460/
|
||||
|
||||
SUSE-Bugs: https://bugzilla.suse.com/show_bug.cgi?id=1214960
|
||||
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
|
||||
---
|
||||
libcontainer/nsenter/cloned_binary.c | 67 ----------------------------
|
||||
1 file changed, 67 deletions(-)
|
||||
|
||||
diff --git a/libcontainer/nsenter/cloned_binary.c b/libcontainer/nsenter/cloned_binary.c
|
||||
index d1b2d4c546f1..565748b13a4e 100644
|
||||
--- a/libcontainer/nsenter/cloned_binary.c
|
||||
+++ b/libcontainer/nsenter/cloned_binary.c
|
||||
@@ -396,61 +396,6 @@ static int seal_execfd(int *fd, int fdtype)
|
||||
return -1;
|
||||
}
|
||||
|
||||
-static int try_bindfd(void)
|
||||
-{
|
||||
- int fd, ret = -1;
|
||||
- char template[PATH_MAX] = { 0 };
|
||||
- char *prefix = getenv("_LIBCONTAINER_STATEDIR");
|
||||
-
|
||||
- if (!prefix || *prefix != '/')
|
||||
- prefix = "/tmp";
|
||||
- if (snprintf(template, sizeof(template), "%s/runc.XXXXXX", prefix) < 0)
|
||||
- return ret;
|
||||
-
|
||||
- /*
|
||||
- * We need somewhere to mount it, mounting anything over /proc/self is a
|
||||
- * BAD idea on the host -- even if we do it temporarily.
|
||||
- */
|
||||
- fd = mkstemp(template);
|
||||
- if (fd < 0)
|
||||
- return ret;
|
||||
- close(fd);
|
||||
-
|
||||
- /*
|
||||
- * For obvious reasons this won't work in rootless mode because we haven't
|
||||
- * created a userns+mntns -- but getting that to work will be a bit
|
||||
- * complicated and it's only worth doing if someone actually needs it.
|
||||
- */
|
||||
- ret = -EPERM;
|
||||
- if (mount("/proc/self/exe", template, "", MS_BIND, "") < 0)
|
||||
- goto out;
|
||||
- if (mount("", template, "", MS_REMOUNT | MS_BIND | MS_RDONLY, "") < 0)
|
||||
- goto out_umount;
|
||||
-
|
||||
- /* Get read-only handle that we're sure can't be made read-write. */
|
||||
- ret = open(template, O_PATH | O_CLOEXEC);
|
||||
-
|
||||
-out_umount:
|
||||
- /*
|
||||
- * Make sure the MNT_DETACH works, otherwise we could get remounted
|
||||
- * read-write and that would be quite bad (the fd would be made read-write
|
||||
- * too, invalidating the protection).
|
||||
- */
|
||||
- if (umount2(template, MNT_DETACH) < 0) {
|
||||
- if (ret >= 0)
|
||||
- close(ret);
|
||||
- ret = -ENOTRECOVERABLE;
|
||||
- }
|
||||
-
|
||||
-out:
|
||||
- /*
|
||||
- * We don't care about unlink errors, the worst that happens is that
|
||||
- * there's an empty file left around in STATEDIR.
|
||||
- */
|
||||
- unlink(template);
|
||||
- return ret;
|
||||
-}
|
||||
-
|
||||
static ssize_t fd_to_fd(int outfd, int infd)
|
||||
{
|
||||
ssize_t total = 0;
|
||||
@@ -485,18 +430,6 @@ static int clone_binary(void)
|
||||
size_t sent = 0;
|
||||
int fdtype = EFD_NONE;
|
||||
|
||||
- /*
|
||||
- * Before we resort to copying, let's try creating an ro-binfd in one shot
|
||||
- * by getting a handle for a read-only bind-mount of the execfd.
|
||||
- */
|
||||
- execfd = try_bindfd();
|
||||
- if (execfd >= 0)
|
||||
- return execfd;
|
||||
-
|
||||
- /*
|
||||
- * Dammit, that didn't work -- time to copy the binary to a safe place we
|
||||
- * can seal the contents.
|
||||
- */
|
||||
execfd = make_execfd(&fdtype);
|
||||
if (execfd < 0 || fdtype == EFD_NONE)
|
||||
return -ENOTRECOVERABLE;
|
||||
--
|
||||
2.46.0
|
||||
|
BIN
runc-1.1.12.tar.xz
(Stored with Git LFS)
BIN
runc-1.1.12.tar.xz
(Stored with Git LFS)
Binary file not shown.
@ -1,17 +0,0 @@
|
||||
-----BEGIN PGP SIGNATURE-----
|
||||
|
||||
iQJEBAABCAAuFiEEXzbGxhtUYBJKdfWmnhiqJn3bjbQFAmWvvCcQHGFzYXJhaUBz
|
||||
dXNlLmNvbQAKCRCeGKomfduNtG2oD/9yLwYdfbx4GU31kCuvTS3odH8XyplL4QLl
|
||||
TszoLO/50z/Y9r0QBNuLsDDvAWtsJAYTsRIwEwDgUuziHnbkbHCnE2C+6P7OWUKp
|
||||
7VS1mqWzWeVibt0hYBWcooJb8inA/ctwfppZlH8EnTdoyqp0bAuQKtj2muA+LTvN
|
||||
n/19qZ0/zAvErya5ugZCfnpJngOM0W//F5OSE/DKI3ct6o3AilxlzlhZuwkiYQud
|
||||
nwS5j4CvQp7GkJeuwDluUHGmsT8AW6P3McptS/BcT4wUKWhxcntJG1cdiZOFTW84
|
||||
3CLdwMPGQR0SVK5yPMbKogRtglODEW82Ytp4S8BB9sG5PS5rBsvnApSQxFluRMQT
|
||||
oaQsEKwPS+VSUwf44QR42iF3fB8dxmmmcautr5yaUiSx4DdFGj9jjrbMa9YCk2da
|
||||
J/5ExwJv5nP5R+uwOiH3ziZuFuuH1afbGLrT2ouv61/SMGiYiLEAyiegF94Zg2nu
|
||||
5RvMUz33LpEckLrlNN5u9q+/jbfJmZAUtdVafKQQTBRFKPCyHjOroKM11PzoHX6l
|
||||
3dsyEPbEfowZ+uM2z9wCfub529fNF8t9k9sUAIQsma5p7+l7xJMbOua2kd1kGiQU
|
||||
ec19+KD6ka4NHyDRwxe0iM6/AuFlKKUUTVGZjg2bD+ap0qgDjZ3R5lTmI1pJ8Win
|
||||
wfoEKZCm+A==
|
||||
=Sl8m
|
||||
-----END PGP SIGNATURE-----
|
BIN
runc-1.1.14.tar.xz
(Stored with Git LFS)
Normal file
BIN
runc-1.1.14.tar.xz
(Stored with Git LFS)
Normal file
Binary file not shown.
7
runc-1.1.14.tar.xz.asc
Normal file
7
runc-1.1.14.tar.xz.asc
Normal file
@ -0,0 +1,7 @@
|
||||
-----BEGIN PGP SIGNATURE-----
|
||||
|
||||
iHUEABYKAB0WIQS2TklVsp+j1GPyqQYol/rSt+lEbwUCZtZk+AAKCRAol/rSt+lE
|
||||
b0TGAQC6tc59nCVnmViX22aKK6fuV++saYQgQKKhIkqiyBs97wD/a49dqcnjgHIf
|
||||
OKO+WjeCGwFIwmHIsAeD3bdCb+XTqQI=
|
||||
=E21y
|
||||
-----END PGP SIGNATURE-----
|
40
runc.changes
40
runc.changes
@ -1,3 +1,43 @@
|
||||
-------------------------------------------------------------------
|
||||
Tue Sep 3 01:57:20 UTC 2024 - Aleksa Sarai <asarai@suse.com>
|
||||
|
||||
[ This was only ever released for SLES and Leap. ]
|
||||
|
||||
- Update to runc v1.1.14. Upstream changelog is available from
|
||||
<https://github.com/opencontainers/runc/releases/tag/v1.1.14>.
|
||||
Includes the patch for CVE-2024-45310. bsc#1230092
|
||||
|
||||
- Rebase patches:
|
||||
* 0001-bsc1221050-libct-seccomp-patchbpf-rm-duplicated-code.patch
|
||||
* 0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
|
||||
* 0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
|
||||
* 0004-bsc1214960-nsenter-cloned_binary-remove-bindfd-logic.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Jul 22 13:08:06 UTC 2024 - Aleksa Sarai <asarai@suse.com>
|
||||
|
||||
[ This was only ever released for SLES and Leap. ]
|
||||
|
||||
- Update to runc v1.1.13. Upstream changelog is available from
|
||||
<https://github.com/opencontainers/runc/releases/tag/v1.1.13>.
|
||||
- Rebase patches:
|
||||
* 0001-bsc1221050-libct-seccomp-patchbpf-rm-duplicated-code.patch
|
||||
* 0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
|
||||
* 0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
|
||||
- Backport <https://github.com/opencontainers/runc/pull/3931> to fix a
|
||||
performance issue when running lots of containers, caused by systemd getting
|
||||
too many mount notifications. bsc#1214960
|
||||
+ 0004-bsc1214960-nsenter-cloned_binary-remove-bindfd-logic.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Mar 21 03:46:48 UTC 2024 - Aleksa Sarai <asarai@suse.com>
|
||||
|
||||
- Add upstream patch <https://github.com/opencontainers/runc/pull/4219> to
|
||||
properly fix -ENOSYS stub on ppc64le. bsc#1192051 bsc#1221050
|
||||
+ 0001-bsc1221050-libct-seccomp-patchbpf-rm-duplicated-code.patch
|
||||
+ 0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
|
||||
+ 0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Jan 31 00:00:33 UTC 2024 - Aleksa Sarai <asarai@suse.com>
|
||||
|
||||
|
13
runc.spec
13
runc.spec
@ -18,13 +18,13 @@
|
||||
|
||||
|
||||
# MANUAL: Make sure you update this each time you update runc.
|
||||
%define git_version 51d5e94601ceffbbd85688df1c928ecccbfa4685
|
||||
%define git_short 51d5e94601ce
|
||||
%define git_version 2c9f5602f0ba3d9da1c2596322dfc4e156844890
|
||||
%define git_short 2c9f5602f0ba
|
||||
|
||||
%define project github.com/opencontainers/runc
|
||||
|
||||
Name: runc
|
||||
Version: 1.1.12
|
||||
Version: 1.1.14
|
||||
Release: 0
|
||||
Summary: Tool for spawning and running OCI containers
|
||||
License: Apache-2.0
|
||||
@ -33,6 +33,12 @@ URL: https://github.com/opencontainers/runc
|
||||
Source0: https://github.com/opencontainers/runc/releases/download/v%{version}/runc.tar.xz#/runc-%{version}.tar.xz
|
||||
Source1: https://github.com/opencontainers/runc/releases/download/v%{version}/runc.tar.xz.asc#/runc-%{version}.tar.xz.asc
|
||||
Source2: runc.keyring
|
||||
# SUSE-FIX-UPSTREAM: Backport of <https://github.com/opencontainers/runc/pull/4219>. bsc#1221050
|
||||
Patch10: 0001-bsc1221050-libct-seccomp-patchbpf-rm-duplicated-code.patch
|
||||
Patch11: 0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
|
||||
Patch12: 0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
|
||||
# SUSE-FIX-UPSTREAM: Partial backport of <https://github.com/opencontainers/runc/pull/3931>. bsc#1214960
|
||||
Patch20: 0004-bsc1214960-nsenter-cloned_binary-remove-bindfd-logic.patch
|
||||
BuildRequires: diffutils
|
||||
BuildRequires: fdupes
|
||||
BuildRequires: go
|
||||
@ -64,6 +70,7 @@ and has grown to become a separate project entirely.
|
||||
|
||||
%prep
|
||||
%setup -q -n %{name}-%{version}
|
||||
%autopatch -p1
|
||||
|
||||
%build
|
||||
# build runc
|
||||
|
Loading…
Reference in New Issue
Block a user