SHA256
1
0
forked from pool/runc

Accepting request 978574 from home:cyphar:docker

- Backport <https://github.com/opencontainers/runc/pull/3474> to fix issues
  with newer syscalls (namely faccessat2) on older kernels on s390(x) caused by
  that platform's syscall multiplexing semantics. bsc#1192051 bsc#1199565
  + bsc1192051-0001-seccomp-enosys-always-return-ENOSYS-for-setup-2-on-s390x.patch
- Add ExcludeArch for s390 (not s390x) since we've never supported it.

OBS-URL: https://build.opensuse.org/request/show/978574
OBS-URL: https://build.opensuse.org/package/show/Virtualization:containers/runc?expand=0&rev=125
This commit is contained in:
Aleksa Sarai 2022-05-23 03:15:57 +00:00 committed by Git OBS Bridge
parent 2f40aa19ad
commit 456c3f8a79
3 changed files with 182 additions and 0 deletions

View File

@ -0,0 +1,164 @@
From dcc3dc305307f530f8faf394c84d1dede29443ab Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Fri, 20 May 2022 10:39:41 +1000
Subject: [PATCH] seccomp: enosys: always return -ENOSYS for setup(2) on
s390(x)
On s390x, syscalls above 255 are multiplexed using the (now otherwise
unused) setup(2) syscall (syscall number 0). If the kernel supports the
syscall then it will correctly translate the syscall number such that
seccomp will correctly detect it -- however, for unknown syscalls the
syscall number remains unchanged. This can be verified by running the
following program under strace:
int main(void)
{
scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_TRAP);
seccomp_load(ctx);
return syscall(439, AT_FDCWD, "asdf", X_OK, 0);
}
Which will then die with the following signal (on pre-5.8 kernels):
--- SIGSYS {si_signo=SIGSYS, si_code=SYS_SECCOMP,
si_call_addr=0x3ffb3006c22, si_syscall=__NR_setup,
si_arch=AUDIT_ARCH_S390X} ---
(Note that the si_syscall is __NR_setup, not __NR_faccessat2.)
As a result, the -ENOSYS handling we had previously did not work
completely correctly on s390x because any syscall not supported by the
kernel would be treated as syscall number 0 rather than the actual
syscall number.
Always returning -ENOSYS will not cause any issues because in all of the
cases where this multiplexing occurs, seccomp will see the remapped
syscall number -- and no userspace program will call setup(2)
intentionally (the syscall has not existed in Linux for decades and was
originally a hack used early in Linux init prior to spawning pid1 -- so
you will get -ENOSYS from the kernel anyway).
SUSE-Bugs: bsc#1192051 bsc#1199565
Backport: <https://github.com/opencontainers/runc/pull/3474>
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
---
libcontainer/seccomp/patchbpf/enosys_linux.go | 48 ++++++++++++++-----
.../seccomp/patchbpf/enosys_linux_test.go | 13 +++++
2 files changed, 50 insertions(+), 11 deletions(-)
diff --git a/libcontainer/seccomp/patchbpf/enosys_linux.go b/libcontainer/seccomp/patchbpf/enosys_linux.go
index 095fba7fd91f..6376512b086f 100644
--- a/libcontainer/seccomp/patchbpf/enosys_linux.go
+++ b/libcontainer/seccomp/patchbpf/enosys_linux.go
@@ -80,6 +80,11 @@ import "C"
var retErrnoEnosys = uint32(C.C_ACT_ERRNO_ENOSYS)
+// This syscall is used for multiplexing "large" syscalls on s390(x). Unknown
+// syscalls will end up with this syscall number, so we need to explcitly
+// return -ENOSYS for this syscall on those architectures.
+const s390xMultiplexSyscall libseccomp.ScmpSyscall = 0
+
func isAllowAction(action configs.Action) bool {
switch action {
// Trace is considered an "allow" action because a good tracer should
@@ -315,7 +320,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
// directly from the arch code so we need to do it here. Sadly we can't
// share this code between architecture branches.
section := []bpf.Instruction{
- // load [0]
+ // load [0] (syscall number)
bpf.LoadAbsolute{Off: 0, Size: 4}, // NOTE: We assume sizeof(int) == 4.
}
@@ -324,10 +329,37 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
// No syscalls found for this arch -- skip it and move on.
continue
case 1:
- // Get the only syscall in the map.
- var sysno libseccomp.ScmpSyscall
- for _, no := range maxSyscalls {
+ // Get the only syscall and scmpArch in the map.
+ var (
+ scmpArch libseccomp.ScmpArch
+ sysno libseccomp.ScmpSyscall
+ )
+ for arch, no := range maxSyscalls {
sysno = no
+ scmpArch = arch
+ }
+
+ switch scmpArch {
+ // Return -ENOSYS for setup(2) on s390(x). This syscall is used for
+ // multiplexing "large syscall number" syscalls, but if the syscall
+ // number is not known to the kernel then the syscall number is
+ // left unchanged (and because it is sysno=0, you'll end up with
+ // EPERM for syscalls the kernel doesn't know about).
+ //
+ // The actual setup(2) syscall is never used by userspace anymore
+ // (and hasn't existed for decades) outside of this multiplexing
+ // scheme so returning -ENOSYS is fine.
+ case libseccomp.ArchS390, libseccomp.ArchS390X:
+ section = append(section, []bpf.Instruction{
+ // jne [setup=0],1
+ bpf.JumpIf{
+ Cond: bpf.JumpNotEqual,
+ Val: uint32(s390xMultiplexSyscall),
+ SkipTrue: 1,
+ },
+ // ret [ENOSYS]
+ bpf.RetConstant{Val: retErrnoEnosys},
+ }...)
}
// The simplest case just boils down to a single jgt instruction,
@@ -359,12 +391,6 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
// If we're on x86 we need to add a check for x32 and if we're in
// the wrong mode we jump over the section.
if uint32(nativeArch) == uint32(C.C_AUDIT_ARCH_X86_64) {
- // Grab the only architecture in the map.
- var scmpArch libseccomp.ScmpArch
- for arch := range maxSyscalls {
- scmpArch = arch
- }
-
// Generate a prefix to check the mode.
switch scmpArch {
case libseccomp.ArchAMD64:
@@ -522,7 +548,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
// Prepend the load instruction for the architecture.
programTail = append([]bpf.Instruction{
- // load [4]
+ // load [4] (architecture)
bpf.LoadAbsolute{Off: 4, Size: 4}, // NOTE: We assume sizeof(int) == 4.
}, programTail...)
diff --git a/libcontainer/seccomp/patchbpf/enosys_linux_test.go b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
index 727800aa50cd..e2d363a43bd3 100644
--- a/libcontainer/seccomp/patchbpf/enosys_linux_test.go
+++ b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
@@ -213,6 +213,19 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
})
}
+ // If we're on s390(x) make sure you get -ENOSYS for the "setup"
+ // syscall (this is done to work around an issue with s390x's
+ // syscall multiplexing which results in unknown syscalls being a
+ // setup(2) invocation).
+ switch scmpArch {
+ case libseccomp.ArchS390, libseccomp.ArchS390X:
+ syscallTests = append(syscallTests, syscallTest{
+ sysno: s390xMultiplexSyscall,
+ syscall: "setup",
+ expected: retErrnoEnosys,
+ })
+ }
+
// Test syscalls in the explicit list.
for _, test := range syscallTests {
// Override the expected value in the two special cases.
--
2.36.1

View File

@ -1,3 +1,16 @@
-------------------------------------------------------------------
Mon May 23 03:02:32 UTC 2022 - Aleksa Sarai <asarai@suse.com>
- Backport <https://github.com/opencontainers/runc/pull/3474> to fix issues
with newer syscalls (namely faccessat2) on older kernels on s390(x) caused by
that platform's syscall multiplexing semantics. bsc#1192051 bsc#1199565
+ bsc1192051-0001-seccomp-enosys-always-return-ENOSYS-for-setup-2-on-s390x.patch
-------------------------------------------------------------------
Thu May 12 10:04:57 UTC 2022 - Aleksa Sarai <asarai@suse.com>
- Add ExcludeArch for s390 (not s390x) since we've never supported it.
-------------------------------------------------------------------
Wed May 11 22:43:51 UTC 2022 - Aleksa Sarai <asarai@suse.com>

View File

@ -36,6 +36,8 @@ URL: https://github.com/opencontainers/runc
Source0: https://github.com/opencontainers/runc/releases/download/v%{_version}/runc.tar.xz#/runc-%{version}.tar.xz
Source1: https://github.com/opencontainers/runc/releases/download/v%{_version}/runc.tar.xz.asc#/runc-%{version}.tar.xz.asc
Source2: runc.keyring
# OPENSUSE-FIX-UPSTREAM: Backport of <https://github.com/opencontainers/runc/pull/3474>. bsc#1192051 bsc#1199565
Patch1: bsc1192051-0001-seccomp-enosys-always-return-ENOSYS-for-setup-2-on-s390x.patch
BuildRequires: fdupes
BuildRequires: go-go-md2man
# Due to a limitation in openSUSE's Go packaging we cannot have a BuildRequires
@ -55,6 +57,7 @@ Obsoletes: docker-runc-kubic < %{version}
Provides: docker-runc-kubic = %{version}
Obsoletes: docker-runc = 0.1.1+gitr2819_50a19c6
Obsoletes: docker-runc_50a19c6
ExcludeArch: s390
# Construct "git describe --dirty --long --always".
%define git_describe v%{_version}-0-g%{git_short}
@ -67,6 +70,8 @@ and has grown to become a separate project entirely.
%prep
%setup -q -n %{name}-%{_version}
# bsc#1192051 bsc#1199565
%patch1 -p1
%build
# build runc