Sync from SUSE:ALP:Source:Standard:1.0 runc revision 2133c931be9a58b516e6632c4b6eabdc

2024-10-11 10:55:14 +02:00 · 2024-10-11 10:55:14 +02:00 · 7737253f5a
commit 7737253f5a
parent 0a8f3713a3
10 changed files with 691 additions and 23 deletions
--- a/0001-bsc1221050-libct-seccomp-patchbpf-rm-duplicated-code.patch
+++ b/0001-bsc1221050-libct-seccomp-patchbpf-rm-duplicated-code.patch
@ -0,0 +1,44 @@
+From 22eb87a32dc1c685425b685e96e8472b9ac1b5ca Mon Sep 17 00:00:00 2001
+From: Kir Kolyshkin <kolyshkin@gmail.com>
+Date: Fri, 14 Oct 2022 18:37:00 -0700
+Subject: [PATCH 1/4] bsc1221050: libct/seccomp/patchbpf: rm duplicated code
+
+(This is a cherry-pick of 2cd05e44b662fb79c46d5ebfd6c71e9ebc98d40c.)
+
+In findLastSyscalls, we convert libseccomp.ArchNative to the real
+libseccomp architecture, but archToNative already does that, so
+this code is redundant.
+
+Remove the redundant code, and move its comment to archToNative.
+
+Fixes: 7a8d7162f
+Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
+Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
+---
+ libcontainer/seccomp/patchbpf/enosys_linux.go | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+diff --git a/libcontainer/seccomp/patchbpf/enosys_linux.go b/libcontainer/seccomp/patchbpf/enosys_linux.go
+index efe6dca58b21..c9c1d4ccb685 100644
+--- a/libcontainer/seccomp/patchbpf/enosys_linux.go
+++ b/libcontainer/seccomp/patchbpf/enosys_linux.go
+@@ -233,16 +233,6 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
+ 			return nil, fmt.Errorf("unable to validate seccomp architecture: %w", err)
+ 		}
+ 
+-		// Map native architecture to a real architecture value to avoid
+-		// doubling-up the lastSyscall mapping.
+-		if arch == libseccomp.ArchNative {
+-			nativeArch, err := libseccomp.GetNativeArch()
+-			if err != nil {
+-				return nil, fmt.Errorf("unable to get native architecture: %w", err)
+-			}
+-			arch = nativeArch
+-		}
+-
+ 		// Figure out native architecture representation of the architecture.
+ 		nativeArch, err := archToNative(arch)
+ 		if err != nil {
+-- 
+2.46.0
+
--- a/0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
+++ b/0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
@ -0,0 +1,289 @@
+From 558c5ecf487a40001ba854cfcbd5c94223167501 Mon Sep 17 00:00:00 2001
+From: Aleksa Sarai <cyphar@cyphar.com>
+Date: Wed, 13 Mar 2024 13:40:16 +1100
+Subject: [PATCH 2/4] bsc1221050: seccomp: patchbpf: rename nativeArch ->
+ linuxAuditArch
+
+(This is a backport of 6167f5ffc3e3fd53e6a41a2effa592a4873ad046.)
+
+Calling the Linux AUDIT_* architecture constants "native" leads to
+confusing code when we are getting the actual native architecture of the
+running system.
+
+Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
+---
+ libcontainer/seccomp/patchbpf/enosys_linux.go | 81 ++++++++++---------
+ .../seccomp/patchbpf/enosys_linux_test.go     | 16 ++--
+ 2 files changed, 49 insertions(+), 48 deletions(-)
+
+diff --git a/libcontainer/seccomp/patchbpf/enosys_linux.go b/libcontainer/seccomp/patchbpf/enosys_linux.go
+index c9c1d4ccb685..1b67fda85c64 100644
+--- a/libcontainer/seccomp/patchbpf/enosys_linux.go
+++ b/libcontainer/seccomp/patchbpf/enosys_linux.go
+@@ -164,11 +164,11 @@ func disassembleFilter(filter *libseccomp.ScmpFilter) ([]bpf.Instruction, error)
+ 	return program, nil
+ }
+ 
+-type nativeArch uint32
+type linuxAuditArch uint32
+ 
+-const invalidArch nativeArch = 0
+const invalidArch linuxAuditArch = 0
+ 
+-func archToNative(arch libseccomp.ScmpArch) (nativeArch, error) {
+func scmpArchToAuditArch(arch libseccomp.ScmpArch) (linuxAuditArch, error) {
+ 	switch arch {
+ 	case libseccomp.ArchNative:
+ 		// Convert to actual native architecture.
+@@ -176,48 +176,48 @@ func archToNative(arch libseccomp.ScmpArch) (nativeArch, error) {
+ 		if err != nil {
+ 			return invalidArch, fmt.Errorf("unable to get native arch: %w", err)
+ 		}
+-		return archToNative(arch)
+		return scmpArchToAuditArch(arch)
+ 	case libseccomp.ArchX86:
+-		return nativeArch(C.C_AUDIT_ARCH_I386), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_I386), nil
+ 	case libseccomp.ArchAMD64, libseccomp.ArchX32:
+ 		// NOTE: x32 is treated like x86_64 except all x32 syscalls have the
+ 		//       30th bit of the syscall number set to indicate that it's not a
+ 		//       normal x86_64 syscall.
+-		return nativeArch(C.C_AUDIT_ARCH_X86_64), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_X86_64), nil
+ 	case libseccomp.ArchARM:
+-		return nativeArch(C.C_AUDIT_ARCH_ARM), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_ARM), nil
+ 	case libseccomp.ArchARM64:
+-		return nativeArch(C.C_AUDIT_ARCH_AARCH64), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_AARCH64), nil
+ 	case libseccomp.ArchMIPS:
+-		return nativeArch(C.C_AUDIT_ARCH_MIPS), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_MIPS), nil
+ 	case libseccomp.ArchMIPS64:
+-		return nativeArch(C.C_AUDIT_ARCH_MIPS64), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_MIPS64), nil
+ 	case libseccomp.ArchMIPS64N32:
+-		return nativeArch(C.C_AUDIT_ARCH_MIPS64N32), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_MIPS64N32), nil
+ 	case libseccomp.ArchMIPSEL:
+-		return nativeArch(C.C_AUDIT_ARCH_MIPSEL), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_MIPSEL), nil
+ 	case libseccomp.ArchMIPSEL64:
+-		return nativeArch(C.C_AUDIT_ARCH_MIPSEL64), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_MIPSEL64), nil
+ 	case libseccomp.ArchMIPSEL64N32:
+-		return nativeArch(C.C_AUDIT_ARCH_MIPSEL64N32), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_MIPSEL64N32), nil
+ 	case libseccomp.ArchPPC:
+-		return nativeArch(C.C_AUDIT_ARCH_PPC), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_PPC), nil
+ 	case libseccomp.ArchPPC64:
+-		return nativeArch(C.C_AUDIT_ARCH_PPC64), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_PPC64), nil
+ 	case libseccomp.ArchPPC64LE:
+-		return nativeArch(C.C_AUDIT_ARCH_PPC64LE), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_PPC64LE), nil
+ 	case libseccomp.ArchS390:
+-		return nativeArch(C.C_AUDIT_ARCH_S390), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_S390), nil
+ 	case libseccomp.ArchS390X:
+-		return nativeArch(C.C_AUDIT_ARCH_S390X), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_S390X), nil
+ 	case libseccomp.ArchRISCV64:
+-		return nativeArch(C.C_AUDIT_ARCH_RISCV64), nil
+		return linuxAuditArch(C.C_AUDIT_ARCH_RISCV64), nil
+ 	default:
+ 		return invalidArch, fmt.Errorf("unknown architecture: %v", arch)
+ 	}
+ }
+ 
+-type lastSyscallMap map[nativeArch]map[libseccomp.ScmpArch]libseccomp.ScmpSyscall
+type lastSyscallMap map[linuxAuditArch]map[libseccomp.ScmpArch]libseccomp.ScmpSyscall
+ 
+ // Figure out largest syscall number referenced in the filter for each
+ // architecture. We will be generating code based on the native architecture
+@@ -234,17 +234,17 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
+ 		}
+ 
+ 		// Figure out native architecture representation of the architecture.
+-		nativeArch, err := archToNative(arch)
+		auditArch, err := scmpArchToAuditArch(arch)
+ 		if err != nil {
+ 			return nil, fmt.Errorf("cannot map architecture %v to AUDIT_ARCH_ constant: %w", arch, err)
+ 		}
+ 
+-		if _, ok := lastSyscalls[nativeArch]; !ok {
+-			lastSyscalls[nativeArch] = map[libseccomp.ScmpArch]libseccomp.ScmpSyscall{}
+		if _, ok := lastSyscalls[auditArch]; !ok {
+			lastSyscalls[auditArch] = map[libseccomp.ScmpArch]libseccomp.ScmpSyscall{}
+ 		}
+-		if _, ok := lastSyscalls[nativeArch][arch]; ok {
+		if _, ok := lastSyscalls[auditArch][arch]; ok {
+ 			// Because of ArchNative we may hit the same entry multiple times.
+-			// Just skip it if we've seen this (nativeArch, ScmpArch)
+			// Just skip it if we've seen this (linuxAuditArch, ScmpArch)
+ 			// combination before.
+ 			continue
+ 		}
+@@ -262,10 +262,11 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
+ 			}
+ 		}
+ 		if largestSyscall != 0 {
+-			lastSyscalls[nativeArch][arch] = largestSyscall
+			logrus.Debugf("seccomp: largest syscall number for arch %v is %v", arch, largestSyscall)
+			lastSyscalls[auditArch][arch] = largestSyscall
+ 		} else {
+-			logrus.Warnf("could not find any syscalls for arch %s", ociArch)
+-			delete(lastSyscalls[nativeArch], arch)
+			logrus.Warnf("could not find any syscalls for arch %v", arch)
+			delete(lastSyscalls[auditArch], arch)
+ 		}
+ 	}
+ 	return lastSyscalls, nil
+@@ -283,10 +284,10 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
+ // close_range(2) which were added out-of-order in the syscall table between
+ // kernel releases.
+ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error) {
+-	// A jump-table for each nativeArch used to generate the initial
+	// A jump-table for each linuxAuditArch used to generate the initial
+ 	// conditional jumps -- measured from the *END* of the program so they
+ 	// remain valid after prepending to the tail.
+-	archJumpTable := map[nativeArch]uint32{}
+	archJumpTable := map[linuxAuditArch]uint32{}
+ 
+ 	// Generate our own -ENOSYS rules for each architecture. They have to be
+ 	// generated in reverse (prepended to the tail of the program) because the
+@@ -299,7 +300,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
+ 	}
+ 
+ 	// Generate the syscall -ENOSYS rules.
+-	for nativeArch, maxSyscalls := range lastSyscalls {
+	for auditArch, maxSyscalls := range lastSyscalls {
+ 		// The number of instructions from the tail of this section which need
+ 		// to be jumped in order to reach the -ENOSYS return. If the section
+ 		// does not jump, it will fall through to the actual filter.
+@@ -380,7 +381,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
+ 
+ 			// If we're on x86 we need to add a check for x32 and if we're in
+ 			// the wrong mode we jump over the section.
+-			if uint32(nativeArch) == uint32(C.C_AUDIT_ARCH_X86_64) {
+			if uint32(auditArch) == uint32(C.C_AUDIT_ARCH_X86_64) {
+ 				// Generate a prefix to check the mode.
+ 				switch scmpArch {
+ 				case libseccomp.ArchAMD64:
+@@ -409,8 +410,8 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
+ 			section = append(section, sectionTail...)
+ 		case 2:
+ 			// x32 and x86_64 are a unique case, we can't handle any others.
+-			if uint32(nativeArch) != uint32(C.C_AUDIT_ARCH_X86_64) {
+-				return nil, fmt.Errorf("unknown architecture overlap on native arch %#x", nativeArch)
+			if uint32(auditArch) != uint32(C.C_AUDIT_ARCH_X86_64) {
+				return nil, fmt.Errorf("unknown architecture overlap on native arch %#x", auditArch)
+ 			}
+ 
+ 			x32sysno, ok := maxSyscalls[libseccomp.ArchX32]
+@@ -487,7 +488,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
+ 		programTail = append(section, programTail...)
+ 
+ 		// Update jump table.
+-		archJumpTable[nativeArch] = uint32(len(programTail))
+		archJumpTable[auditArch] = uint32(len(programTail))
+ 	}
+ 
+ 	// Add a dummy "jump to filter" for any architecture we might miss below.
+@@ -507,9 +508,9 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
+ 	//       architectures based on how large the jumps are going to be, or
+ 	//       re-sort the candidate architectures each time to make sure that we
+ 	//       pick the largest jump which is going to be smaller than 255.
+-	for nativeArch := range lastSyscalls {
+	for auditArch := range lastSyscalls {
+ 		// We jump forwards but the jump table is calculated from the *END*.
+-		jump := uint32(len(programTail)) - archJumpTable[nativeArch]
+		jump := uint32(len(programTail)) - archJumpTable[auditArch]
+ 
+ 		// Same routine as above -- this is a basic jeq check, complicated
+ 		// slightly if it turns out that we need to do a long jump.
+@@ -518,7 +519,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
+ 				// jeq [arch],[jump]
+ 				bpf.JumpIf{
+ 					Cond:     bpf.JumpEqual,
+-					Val:      uint32(nativeArch),
+					Val:      uint32(auditArch),
+ 					SkipTrue: uint8(jump),
+ 				},
+ 			}, programTail...)
+@@ -527,7 +528,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
+ 				// jne [arch],1
+ 				bpf.JumpIf{
+ 					Cond:     bpf.JumpNotEqual,
+-					Val:      uint32(nativeArch),
+					Val:      uint32(auditArch),
+ 					SkipTrue: 1,
+ 				},
+ 				// ja [jump]
+diff --git a/libcontainer/seccomp/patchbpf/enosys_linux_test.go b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
+index e2d363a43bd3..bdfeff68adb3 100644
+--- a/libcontainer/seccomp/patchbpf/enosys_linux_test.go
+++ b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
+@@ -23,7 +23,7 @@ type seccompData struct {
+ }
+ 
+ // mockSyscallPayload creates a fake seccomp_data struct with the given data.
+-func mockSyscallPayload(t *testing.T, sysno libseccomp.ScmpSyscall, arch nativeArch, args ...uint64) []byte {
+func mockSyscallPayload(t *testing.T, sysno libseccomp.ScmpSyscall, arch linuxAuditArch, args ...uint64) []byte {
+ 	var buf bytes.Buffer
+ 
+ 	data := seccompData{
+@@ -150,8 +150,8 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
+ 
+ 		for _, arch := range testArches {
+ 			type syscallTest struct {
+-				syscall  string
+ 				sysno    libseccomp.ScmpSyscall
+				syscall  string
+ 				expected uint32
+ 			}
+ 
+@@ -160,7 +160,7 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
+ 				t.Fatalf("unknown libseccomp architecture %q: %v", arch, err)
+ 			}
+ 
+-			nativeArch, err := archToNative(scmpArch)
+			auditArch, err := scmpArchToAuditArch(scmpArch)
+ 			if err != nil {
+ 				t.Fatalf("unknown audit architecture %q: %v", arch, err)
+ 			}
+@@ -179,9 +179,9 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
+ 					t.Fatalf("unknown syscall %q on arch %q: %v", syscall, arch, err)
+ 				}
+ 				syscallTests = append(syscallTests, syscallTest{
+-					syscall,
+-					sysno,
+-					expected,
+					sysno:    sysno,
+					syscall:  syscall,
+					expected: expected,
+ 				})
+ 			}
+ 
+@@ -233,7 +233,7 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
+ 					test.expected = retFallthrough
+ 				}
+ 
+-				payload := mockSyscallPayload(t, test.sysno, nativeArch, 0x1337, 0xF00BA5)
+				payload := mockSyscallPayload(t, test.sysno, auditArch, 0x1337, 0xF00BA5)
+ 				// NOTE: golang.org/x/net/bpf returns int here rather
+ 				// than uint32.
+ 				rawRet, err := filter.Run(payload)
+@@ -247,7 +247,7 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
+ 						t.Logf("  [%4.1d] %s", idx, insn)
+ 					}
+ 					t.Logf("payload: %#v", payload)
+-					t.Errorf("filter %s(%d) %q(%d): got %#x, want %#x", arch, nativeArch, test.syscall, test.sysno, ret, test.expected)
+					t.Errorf("filter %s(%d) %q(%d): got %#x, want %#x", arch, auditArch, test.syscall, test.sysno, ret, test.expected)
+ 				}
+ 			}
+ 		}
+-- 
+2.46.0
+
--- a/0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
+++ b/0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
@ -0,0 +1,162 @@
+From a1e9b2e4015a6b548a0d3e004bf27dd2e3f2cf35 Mon Sep 17 00:00:00 2001
+From: Aleksa Sarai <cyphar@cyphar.com>
+Date: Wed, 13 Mar 2024 16:12:51 +1100
+Subject: [PATCH 3/4] bsc1221050: seccomp: patchbpf: always include native
+ architecture in stub
+
+(This is a backport of 376417ba7646f05ddb1efa8fe30e2a3b53cf673b.)
+
+It turns out that on ppc64le (at least), Docker doesn't include any
+architectures in the list of allowed architectures. libseccomp
+interprets this as "just include the default architecture" but patchbpf
+would return a no-op ENOSYS stub, which would lead to the exact issues
+that commit 7a8d7162f9d7 ("seccomp: prepend -ENOSYS stub to all
+filters") fixed for other architectures.
+
+So, just always include the running architecture in the list. There's
+no real downside.
+
+SUSE-Bugs: 1192051 1221050
+Ref: https://bugzilla.suse.com/show_bug.cgi?id=1192051#c6
+Reported-by: Fabian Vogt <fvogt@suse.com>
+Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
+---
+ libcontainer/seccomp/patchbpf/enosys_linux.go | 22 +++++++--
+ .../seccomp/patchbpf/enosys_linux_test.go     | 47 +++++++++++++++++--
+ 2 files changed, 61 insertions(+), 8 deletions(-)
+
+diff --git a/libcontainer/seccomp/patchbpf/enosys_linux.go b/libcontainer/seccomp/patchbpf/enosys_linux.go
+index 1b67fda85c64..d459ba8792ca 100644
+--- a/libcontainer/seccomp/patchbpf/enosys_linux.go
+++ b/libcontainer/seccomp/patchbpf/enosys_linux.go
+@@ -224,16 +224,30 @@ type lastSyscallMap map[linuxAuditArch]map[libseccomp.ScmpArch]libseccomp.ScmpSy
+ // representation, but SCMP_ARCH_X32 means we have to track cases where the
+ // same architecture has different largest syscalls based on the mode.
+ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
+-	lastSyscalls := make(lastSyscallMap)
+-	// Only loop over architectures which are present in the filter. Any other
+-	// architectures will get the libseccomp bad architecture action anyway.
+	scmpArchs := make(map[libseccomp.ScmpArch]struct{})
+ 	for _, ociArch := range config.Architectures {
+ 		arch, err := libseccomp.GetArchFromString(ociArch)
+ 		if err != nil {
+ 			return nil, fmt.Errorf("unable to validate seccomp architecture: %w", err)
+ 		}
+		scmpArchs[arch] = struct{}{}
+	}
+	// On architectures like ppc64le, Docker inexplicably doesn't include the
+	// native architecture in the architecture list which results in no
+	// architectures being present in the list at all (rendering the ENOSYS
+	// stub a no-op). So, always include the native architecture.
+	if nativeScmpArch, err := libseccomp.GetNativeArch(); err != nil {
+		return nil, fmt.Errorf("unable to get native arch: %w", err)
+	} else if _, ok := scmpArchs[nativeScmpArch]; !ok {
+		logrus.Debugf("seccomp: adding implied native architecture %v to config set", nativeScmpArch)
+		scmpArchs[nativeScmpArch] = struct{}{}
+	}
+	logrus.Debugf("seccomp: configured architecture set: %s", scmpArchs)
+ 
+-		// Figure out native architecture representation of the architecture.
+	// Only loop over architectures which are present in the filter. Any other
+	// architectures will get the libseccomp bad architecture action anyway.
+	lastSyscalls := make(lastSyscallMap)
+	for arch := range scmpArchs {
+ 		auditArch, err := scmpArchToAuditArch(arch)
+ 		if err != nil {
+ 			return nil, fmt.Errorf("cannot map architecture %v to AUDIT_ARCH_ constant: %w", arch, err)
+diff --git a/libcontainer/seccomp/patchbpf/enosys_linux_test.go b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
+index bdfeff68adb3..3d442e1daa66 100644
+--- a/libcontainer/seccomp/patchbpf/enosys_linux_test.go
+++ b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
+@@ -12,6 +12,7 @@ import (
+ 	"github.com/opencontainers/runc/libcontainer/configs"
+ 
+ 	libseccomp "github.com/seccomp/libseccomp-golang"
+	"github.com/sirupsen/logrus"
+ 	"golang.org/x/net/bpf"
+ )
+ 
+@@ -105,6 +106,18 @@ var testArches = []string{
+ 	"ppc64le",
+ 	"s390",
+ 	"s390x",
+	// Dummy value to indicate a configuration with no architecture specified.
+	"native",
+}
+
+var nativeArch string
+
+func init() {
+	scmpNativeArch, err := libseccomp.GetNativeArch()
+	if err != nil {
+		logrus.Panicf("get native arch: %v", err)
+	}
+	nativeArch = scmpNativeArch.String()
+ }
+ 
+ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string) {
+@@ -155,6 +168,9 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
+ 				expected uint32
+ 			}
+ 
+			if arch == "native" {
+				arch = nativeArch
+			}
+ 			scmpArch, err := libseccomp.GetArchFromString(arch)
+ 			if err != nil {
+ 				t.Fatalf("unknown libseccomp architecture %q: %v", arch, err)
+@@ -228,8 +244,15 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
+ 
+ 			// Test syscalls in the explicit list.
+ 			for _, test := range syscallTests {
+-				// Override the expected value in the two special cases.
+-				if !archSet[arch] || isAllowAction(defaultAction) {
+				// Override the expected value in the two special cases:
+				//  1. If the default action is allow, the filter won't have
+				//     the stub prepended so we expect a fallthrough.
+				//  2. If the executing architecture is not in the architecture
+				//     set, then the architecture is not handled by the stub --
+				//     *except* in the case of the native architecture (which
+				//     is always included in the stub).
+				if isAllowAction(defaultAction) ||
+					(!archSet[arch] && arch != nativeArch) {
+ 					test.expected = retFallthrough
+ 				}
+ 
+@@ -263,7 +286,14 @@ var testActions = map[string]configs.Action{
+ 
+ func TestEnosysStub_SingleArch(t *testing.T) {
+ 	for _, arch := range testArches {
+-		arches := []string{arch}
+		var arches []string
+		// "native" indicates a blank architecture field for seccomp, to test
+		// the case where the running architecture was not included in the
+		// architecture. Docker doesn't always set the architecture for some
+		// reason (namely for ppc64le).
+		if arch != "native" {
+			arches = append(arches, arch)
+		}
+ 		t.Run("arch="+arch, func(t *testing.T) {
+ 			for name, action := range testActions {
+ 				t.Run("action="+name, func(t *testing.T) {
+@@ -277,7 +307,16 @@ func TestEnosysStub_SingleArch(t *testing.T) {
+ func TestEnosysStub_MultiArch(t *testing.T) {
+ 	for end := 0; end < len(testArches); end++ {
+ 		for start := 0; start < end; start++ {
+-			arches := testArches[start:end]
+			var arches []string
+			for _, arch := range testArches[start:end] {
+				// "native" indicates a blank architecture field for seccomp, to test
+				// the case where the running architecture was not included in the
+				// architecture. Docker doesn't always set the architecture for some
+				// reason (namely for ppc64le).
+				if arch != "native" {
+					arches = append(arches, arch)
+				}
+			}
+ 			if len(arches) <= 1 {
+ 				continue
+ 			}
+-- 
+2.46.0
+
--- a/0004-bsc1214960-nsenter-cloned_binary-remove-bindfd-logic.patch
+++ b/0004-bsc1214960-nsenter-cloned_binary-remove-bindfd-logic.patch
@ -0,0 +1,136 @@
+From 0f1f8e303cf1919c33952f4938e5637d8f77f907 Mon Sep 17 00:00:00 2001
+From: Aleksa Sarai <cyphar@cyphar.com>
+Date: Fri, 7 Jul 2023 22:45:44 +1000
+Subject: [PATCH 4/4] bsc1214960: nsenter: cloned_binary: remove bindfd logic
+ entirely
+
+(This is a cherry-pick of b999376fb237195265081a8b8ba3fd3bd6ef8c2c.)
+
+While the ro-bind-mount trick did eliminate the memory overhead of
+copying the runc binary for each "runc init" invocation, on machines
+with very significant container churn, creating a temporary mount
+namespace on every container invocation can trigger severe lock
+contention on namespace_sem that makes containers fail to spawn.
+
+The only reason we added bindfd in commit 16612d74de5f ("nsenter:
+cloned_binary: try to ro-bind /proc/self/exe before copying") was due to
+a Kubernetes e2e test failure where they had a ridiculously small memory
+limit. It seems incredibly unlikely that real workloads are running
+without 10MB to spare for the very short time that runc is interacting
+with the container.
+
+In addition, since the original cloned_binary implementation, cgroupv2
+is now almost universally used on modern systems. Unlike cgroupv1, the
+cgroupv2 memcg implementation does not migrate memory usage when
+processes change cgroups (even cgroupv1 only did this if you had
+memory.move_charge_at_immigrate enabled). In addition, because we do the
+/proc/self/exe clone before synchronising the bootstrap data read, we
+are guaranteed to do the clone before "runc init" is moved into the
+container cgroup -- meaning that the memory used by the /proc/self/exe
+clone is charged against the root cgroup, and thus container workloads
+should not be affected at all with memfd cloning.
+
+The long-term fix for this problem is to block the /proc/self/exe
+re-opening attack entirely in-kernel, which is something I'm working
+on[1]. Though it should also be noted that because the memfd is
+completely separate to the host binary, even attacks like Dirty COW
+against the runc binary can be defended against with the memfd approach.
+Of course, once we have in-kernel protection against the /proc/self/exe
+re-opening attack, we won't have that protection anymore...
+
+[1]: https://lwn.net/Articles/934460/
+
+SUSE-Bugs: https://bugzilla.suse.com/show_bug.cgi?id=1214960
+Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
+---
+ libcontainer/nsenter/cloned_binary.c | 67 ----------------------------
+ 1 file changed, 67 deletions(-)
+
+diff --git a/libcontainer/nsenter/cloned_binary.c b/libcontainer/nsenter/cloned_binary.c
+index d1b2d4c546f1..565748b13a4e 100644
+--- a/libcontainer/nsenter/cloned_binary.c
+++ b/libcontainer/nsenter/cloned_binary.c
+@@ -396,61 +396,6 @@ static int seal_execfd(int *fd, int fdtype)
+ 	return -1;
+ }
+ 
+-static int try_bindfd(void)
+-{
+-	int fd, ret = -1;
+-	char template[PATH_MAX] = { 0 };
+-	char *prefix = getenv("_LIBCONTAINER_STATEDIR");
+-
+-	if (!prefix || *prefix != '/')
+-		prefix = "/tmp";
+-	if (snprintf(template, sizeof(template), "%s/runc.XXXXXX", prefix) < 0)
+-		return ret;
+-
+-	/*
+-	 * We need somewhere to mount it, mounting anything over /proc/self is a
+-	 * BAD idea on the host -- even if we do it temporarily.
+-	 */
+-	fd = mkstemp(template);
+-	if (fd < 0)
+-		return ret;
+-	close(fd);
+-
+-	/*
+-	 * For obvious reasons this won't work in rootless mode because we haven't
+-	 * created a userns+mntns -- but getting that to work will be a bit
+-	 * complicated and it's only worth doing if someone actually needs it.
+-	 */
+-	ret = -EPERM;
+-	if (mount("/proc/self/exe", template, "", MS_BIND, "") < 0)
+-		goto out;
+-	if (mount("", template, "", MS_REMOUNT | MS_BIND | MS_RDONLY, "") < 0)
+-		goto out_umount;
+-
+-	/* Get read-only handle that we're sure can't be made read-write. */
+-	ret = open(template, O_PATH | O_CLOEXEC);
+-
+-out_umount:
+-	/*
+-	 * Make sure the MNT_DETACH works, otherwise we could get remounted
+-	 * read-write and that would be quite bad (the fd would be made read-write
+-	 * too, invalidating the protection).
+-	 */
+-	if (umount2(template, MNT_DETACH) < 0) {
+-		if (ret >= 0)
+-			close(ret);
+-		ret = -ENOTRECOVERABLE;
+-	}
+-
+-out:
+-	/*
+-	 * We don't care about unlink errors, the worst that happens is that
+-	 * there's an empty file left around in STATEDIR.
+-	 */
+-	unlink(template);
+-	return ret;
+-}
+-
+ static ssize_t fd_to_fd(int outfd, int infd)
+ {
+ 	ssize_t total = 0;
+@@ -485,18 +430,6 @@ static int clone_binary(void)
+ 	size_t sent = 0;
+ 	int fdtype = EFD_NONE;
+ 
+-	/*
+-	 * Before we resort to copying, let's try creating an ro-binfd in one shot
+-	 * by getting a handle for a read-only bind-mount of the execfd.
+-	 */
+-	execfd = try_bindfd();
+-	if (execfd >= 0)
+-		return execfd;
+-
+-	/*
+-	 * Dammit, that didn't work -- time to copy the binary to a safe place we
+-	 * can seal the contents.
+-	 */
+ 	execfd = make_execfd(&fdtype);
+ 	if (execfd < 0 || fdtype == EFD_NONE)
+ 		return -ENOTRECOVERABLE;
+-- 
+2.46.0
+
--- a/runc-1.1.12.tar.xz
+++ b/runc-1.1.12.tar.xz
--- a/runc-1.1.12.tar.xz.asc
+++ b/runc-1.1.12.tar.xz.asc
@ -1,17 +0,0 @@
-----BEGIN PGP SIGNATURE-----
-
-iQJEBAABCAAuFiEEXzbGxhtUYBJKdfWmnhiqJn3bjbQFAmWvvCcQHGFzYXJhaUBz
-dXNlLmNvbQAKCRCeGKomfduNtG2oD/9yLwYdfbx4GU31kCuvTS3odH8XyplL4QLl
-TszoLO/50z/Y9r0QBNuLsDDvAWtsJAYTsRIwEwDgUuziHnbkbHCnE2C+6P7OWUKp
-7VS1mqWzWeVibt0hYBWcooJb8inA/ctwfppZlH8EnTdoyqp0bAuQKtj2muA+LTvN
-n/19qZ0/zAvErya5ugZCfnpJngOM0W//F5OSE/DKI3ct6o3AilxlzlhZuwkiYQud
-nwS5j4CvQp7GkJeuwDluUHGmsT8AW6P3McptS/BcT4wUKWhxcntJG1cdiZOFTW84
-3CLdwMPGQR0SVK5yPMbKogRtglODEW82Ytp4S8BB9sG5PS5rBsvnApSQxFluRMQT
-oaQsEKwPS+VSUwf44QR42iF3fB8dxmmmcautr5yaUiSx4DdFGj9jjrbMa9YCk2da
-J/5ExwJv5nP5R+uwOiH3ziZuFuuH1afbGLrT2ouv61/SMGiYiLEAyiegF94Zg2nu
-5RvMUz33LpEckLrlNN5u9q+/jbfJmZAUtdVafKQQTBRFKPCyHjOroKM11PzoHX6l
-3dsyEPbEfowZ+uM2z9wCfub529fNF8t9k9sUAIQsma5p7+l7xJMbOua2kd1kGiQU
-ec19+KD6ka4NHyDRwxe0iM6/AuFlKKUUTVGZjg2bD+ap0qgDjZ3R5lTmI1pJ8Win
-wfoEKZCm+A==
-=Sl8m
-----END PGP SIGNATURE-----
--- a/runc-1.1.14.tar.xz
+++ b/runc-1.1.14.tar.xz
--- a/runc-1.1.14.tar.xz.asc
+++ b/runc-1.1.14.tar.xz.asc
@ -0,0 +1,7 @@
+-----BEGIN PGP SIGNATURE-----
+
+iHUEABYKAB0WIQS2TklVsp+j1GPyqQYol/rSt+lEbwUCZtZk+AAKCRAol/rSt+lE
+b0TGAQC6tc59nCVnmViX22aKK6fuV++saYQgQKKhIkqiyBs97wD/a49dqcnjgHIf
+OKO+WjeCGwFIwmHIsAeD3bdCb+XTqQI=
+=E21y
+-----END PGP SIGNATURE-----
--- a/runc.changes
+++ b/runc.changes
@ -1,3 +1,43 @@
+-------------------------------------------------------------------
+Tue Sep  3 01:57:20 UTC 2024 - Aleksa Sarai <asarai@suse.com>
+
+[ This was only ever released for SLES and Leap. ]
+
+- Update to runc v1.1.14. Upstream changelog is available from
+  <https://github.com/opencontainers/runc/releases/tag/v1.1.14>.
+  Includes the patch for CVE-2024-45310. bsc#1230092
+
+- Rebase patches:
+  * 0001-bsc1221050-libct-seccomp-patchbpf-rm-duplicated-code.patch
+  * 0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
+  * 0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
+  * 0004-bsc1214960-nsenter-cloned_binary-remove-bindfd-logic.patch
+
+-------------------------------------------------------------------
+Mon Jul 22 13:08:06 UTC 2024 - Aleksa Sarai <asarai@suse.com>
+
+[ This was only ever released for SLES and Leap. ]
+
+- Update to runc v1.1.13. Upstream changelog is available from
+  <https://github.com/opencontainers/runc/releases/tag/v1.1.13>.
+- Rebase patches:
+  * 0001-bsc1221050-libct-seccomp-patchbpf-rm-duplicated-code.patch
+  * 0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
+  * 0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
+- Backport <https://github.com/opencontainers/runc/pull/3931> to fix a
+  performance issue when running lots of containers, caused by systemd getting
+  too many mount notifications. bsc#1214960
+  + 0004-bsc1214960-nsenter-cloned_binary-remove-bindfd-logic.patch
+
+-------------------------------------------------------------------
+Thu Mar 21 03:46:48 UTC 2024 - Aleksa Sarai <asarai@suse.com>
+
+- Add upstream patch <https://github.com/opencontainers/runc/pull/4219> to
+  properly fix -ENOSYS stub on ppc64le. bsc#1192051 bsc#1221050
+  + 0001-bsc1221050-libct-seccomp-patchbpf-rm-duplicated-code.patch
+  + 0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
+  + 0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
+
 -------------------------------------------------------------------
 Wed Jan 31 00:00:33 UTC 2024 - Aleksa Sarai <asarai@suse.com>

--- a/runc.spec
+++ b/runc.spec
@ -18,13 +18,13 @@


 # MANUAL: Make sure you update this each time you update runc.
-%define git_version 51d5e94601ceffbbd85688df1c928ecccbfa4685
-%define git_short   51d5e94601ce
+%define git_version 2c9f5602f0ba3d9da1c2596322dfc4e156844890
+%define git_short   2c9f5602f0ba

 %define project github.com/opencontainers/runc

 Name:           runc
-Version:        1.1.12
+Version:        1.1.14
 Release:        0
 Summary:        Tool for spawning and running OCI containers
 License:        Apache-2.0
@ -33,6 +33,12 @@ URL:            https://github.com/opencontainers/runc
 Source0:        https://github.com/opencontainers/runc/releases/download/v%{version}/runc.tar.xz#/runc-%{version}.tar.xz
 Source1:        https://github.com/opencontainers/runc/releases/download/v%{version}/runc.tar.xz.asc#/runc-%{version}.tar.xz.asc
 Source2:        runc.keyring
+# SUSE-FIX-UPSTREAM: Backport of <https://github.com/opencontainers/runc/pull/4219>. bsc#1221050
+Patch10:        0001-bsc1221050-libct-seccomp-patchbpf-rm-duplicated-code.patch
+Patch11:        0002-bsc1221050-seccomp-patchbpf-rename-nativeArch-linuxA.patch
+Patch12:        0003-bsc1221050-seccomp-patchbpf-always-include-native-ar.patch
+# SUSE-FIX-UPSTREAM: Partial backport of <https://github.com/opencontainers/runc/pull/3931>. bsc#1214960
+Patch20:        0004-bsc1214960-nsenter-cloned_binary-remove-bindfd-logic.patch
 BuildRequires:  diffutils
 BuildRequires:  fdupes
 BuildRequires:  go
@ -64,6 +70,7 @@ and has grown to become a separate project entirely.

 %prep
 %setup -q -n %{name}-%{version}
+%autopatch -p1

 %build
 # build runc