Accepting request 978574 from home:cyphar:docker
- Backport <https://github.com/opencontainers/runc/pull/3474> to fix issues with newer syscalls (namely faccessat2) on older kernels on s390(x) caused by that platform's syscall multiplexing semantics. bsc#1192051 bsc#1199565 + bsc1192051-0001-seccomp-enosys-always-return-ENOSYS-for-setup-2-on-s390x.patch - Add ExcludeArch for s390 (not s390x) since we've never supported it. OBS-URL: https://build.opensuse.org/request/show/978574 OBS-URL: https://build.opensuse.org/package/show/Virtualization:containers/runc?expand=0&rev=125
This commit is contained in:
parent
2f40aa19ad
commit
456c3f8a79
@ -0,0 +1,164 @@
|
|||||||
|
From dcc3dc305307f530f8faf394c84d1dede29443ab Mon Sep 17 00:00:00 2001
|
||||||
|
From: Aleksa Sarai <cyphar@cyphar.com>
|
||||||
|
Date: Fri, 20 May 2022 10:39:41 +1000
|
||||||
|
Subject: [PATCH] seccomp: enosys: always return -ENOSYS for setup(2) on
|
||||||
|
s390(x)
|
||||||
|
|
||||||
|
On s390x, syscalls above 255 are multiplexed using the (now otherwise
|
||||||
|
unused) setup(2) syscall (syscall number 0). If the kernel supports the
|
||||||
|
syscall then it will correctly translate the syscall number such that
|
||||||
|
seccomp will correctly detect it -- however, for unknown syscalls the
|
||||||
|
syscall number remains unchanged. This can be verified by running the
|
||||||
|
following program under strace:
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_TRAP);
|
||||||
|
seccomp_load(ctx);
|
||||||
|
|
||||||
|
return syscall(439, AT_FDCWD, "asdf", X_OK, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
Which will then die with the following signal (on pre-5.8 kernels):
|
||||||
|
|
||||||
|
--- SIGSYS {si_signo=SIGSYS, si_code=SYS_SECCOMP,
|
||||||
|
si_call_addr=0x3ffb3006c22, si_syscall=__NR_setup,
|
||||||
|
si_arch=AUDIT_ARCH_S390X} ---
|
||||||
|
|
||||||
|
(Note that the si_syscall is __NR_setup, not __NR_faccessat2.)
|
||||||
|
|
||||||
|
As a result, the -ENOSYS handling we had previously did not work
|
||||||
|
completely correctly on s390x because any syscall not supported by the
|
||||||
|
kernel would be treated as syscall number 0 rather than the actual
|
||||||
|
syscall number.
|
||||||
|
|
||||||
|
Always returning -ENOSYS will not cause any issues because in all of the
|
||||||
|
cases where this multiplexing occurs, seccomp will see the remapped
|
||||||
|
syscall number -- and no userspace program will call setup(2)
|
||||||
|
intentionally (the syscall has not existed in Linux for decades and was
|
||||||
|
originally a hack used early in Linux init prior to spawning pid1 -- so
|
||||||
|
you will get -ENOSYS from the kernel anyway).
|
||||||
|
|
||||||
|
SUSE-Bugs: bsc#1192051 bsc#1199565
|
||||||
|
Backport: <https://github.com/opencontainers/runc/pull/3474>
|
||||||
|
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
|
||||||
|
---
|
||||||
|
libcontainer/seccomp/patchbpf/enosys_linux.go | 48 ++++++++++++++-----
|
||||||
|
.../seccomp/patchbpf/enosys_linux_test.go | 13 +++++
|
||||||
|
2 files changed, 50 insertions(+), 11 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libcontainer/seccomp/patchbpf/enosys_linux.go b/libcontainer/seccomp/patchbpf/enosys_linux.go
|
||||||
|
index 095fba7fd91f..6376512b086f 100644
|
||||||
|
--- a/libcontainer/seccomp/patchbpf/enosys_linux.go
|
||||||
|
+++ b/libcontainer/seccomp/patchbpf/enosys_linux.go
|
||||||
|
@@ -80,6 +80,11 @@ import "C"
|
||||||
|
|
||||||
|
var retErrnoEnosys = uint32(C.C_ACT_ERRNO_ENOSYS)
|
||||||
|
|
||||||
|
+// This syscall is used for multiplexing "large" syscalls on s390(x). Unknown
|
||||||
|
+// syscalls will end up with this syscall number, so we need to explcitly
|
||||||
|
+// return -ENOSYS for this syscall on those architectures.
|
||||||
|
+const s390xMultiplexSyscall libseccomp.ScmpSyscall = 0
|
||||||
|
+
|
||||||
|
func isAllowAction(action configs.Action) bool {
|
||||||
|
switch action {
|
||||||
|
// Trace is considered an "allow" action because a good tracer should
|
||||||
|
@@ -315,7 +320,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||||
|
// directly from the arch code so we need to do it here. Sadly we can't
|
||||||
|
// share this code between architecture branches.
|
||||||
|
section := []bpf.Instruction{
|
||||||
|
- // load [0]
|
||||||
|
+ // load [0] (syscall number)
|
||||||
|
bpf.LoadAbsolute{Off: 0, Size: 4}, // NOTE: We assume sizeof(int) == 4.
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -324,10 +329,37 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||||
|
// No syscalls found for this arch -- skip it and move on.
|
||||||
|
continue
|
||||||
|
case 1:
|
||||||
|
- // Get the only syscall in the map.
|
||||||
|
- var sysno libseccomp.ScmpSyscall
|
||||||
|
- for _, no := range maxSyscalls {
|
||||||
|
+ // Get the only syscall and scmpArch in the map.
|
||||||
|
+ var (
|
||||||
|
+ scmpArch libseccomp.ScmpArch
|
||||||
|
+ sysno libseccomp.ScmpSyscall
|
||||||
|
+ )
|
||||||
|
+ for arch, no := range maxSyscalls {
|
||||||
|
sysno = no
|
||||||
|
+ scmpArch = arch
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ switch scmpArch {
|
||||||
|
+ // Return -ENOSYS for setup(2) on s390(x). This syscall is used for
|
||||||
|
+ // multiplexing "large syscall number" syscalls, but if the syscall
|
||||||
|
+ // number is not known to the kernel then the syscall number is
|
||||||
|
+ // left unchanged (and because it is sysno=0, you'll end up with
|
||||||
|
+ // EPERM for syscalls the kernel doesn't know about).
|
||||||
|
+ //
|
||||||
|
+ // The actual setup(2) syscall is never used by userspace anymore
|
||||||
|
+ // (and hasn't existed for decades) outside of this multiplexing
|
||||||
|
+ // scheme so returning -ENOSYS is fine.
|
||||||
|
+ case libseccomp.ArchS390, libseccomp.ArchS390X:
|
||||||
|
+ section = append(section, []bpf.Instruction{
|
||||||
|
+ // jne [setup=0],1
|
||||||
|
+ bpf.JumpIf{
|
||||||
|
+ Cond: bpf.JumpNotEqual,
|
||||||
|
+ Val: uint32(s390xMultiplexSyscall),
|
||||||
|
+ SkipTrue: 1,
|
||||||
|
+ },
|
||||||
|
+ // ret [ENOSYS]
|
||||||
|
+ bpf.RetConstant{Val: retErrnoEnosys},
|
||||||
|
+ }...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The simplest case just boils down to a single jgt instruction,
|
||||||
|
@@ -359,12 +391,6 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||||
|
// If we're on x86 we need to add a check for x32 and if we're in
|
||||||
|
// the wrong mode we jump over the section.
|
||||||
|
if uint32(nativeArch) == uint32(C.C_AUDIT_ARCH_X86_64) {
|
||||||
|
- // Grab the only architecture in the map.
|
||||||
|
- var scmpArch libseccomp.ScmpArch
|
||||||
|
- for arch := range maxSyscalls {
|
||||||
|
- scmpArch = arch
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
// Generate a prefix to check the mode.
|
||||||
|
switch scmpArch {
|
||||||
|
case libseccomp.ArchAMD64:
|
||||||
|
@@ -522,7 +548,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||||
|
|
||||||
|
// Prepend the load instruction for the architecture.
|
||||||
|
programTail = append([]bpf.Instruction{
|
||||||
|
- // load [4]
|
||||||
|
+ // load [4] (architecture)
|
||||||
|
bpf.LoadAbsolute{Off: 4, Size: 4}, // NOTE: We assume sizeof(int) == 4.
|
||||||
|
}, programTail...)
|
||||||
|
|
||||||
|
diff --git a/libcontainer/seccomp/patchbpf/enosys_linux_test.go b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
|
||||||
|
index 727800aa50cd..e2d363a43bd3 100644
|
||||||
|
--- a/libcontainer/seccomp/patchbpf/enosys_linux_test.go
|
||||||
|
+++ b/libcontainer/seccomp/patchbpf/enosys_linux_test.go
|
||||||
|
@@ -213,6 +213,19 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
+ // If we're on s390(x) make sure you get -ENOSYS for the "setup"
|
||||||
|
+ // syscall (this is done to work around an issue with s390x's
|
||||||
|
+ // syscall multiplexing which results in unknown syscalls being a
|
||||||
|
+ // setup(2) invocation).
|
||||||
|
+ switch scmpArch {
|
||||||
|
+ case libseccomp.ArchS390, libseccomp.ArchS390X:
|
||||||
|
+ syscallTests = append(syscallTests, syscallTest{
|
||||||
|
+ sysno: s390xMultiplexSyscall,
|
||||||
|
+ syscall: "setup",
|
||||||
|
+ expected: retErrnoEnosys,
|
||||||
|
+ })
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
// Test syscalls in the explicit list.
|
||||||
|
for _, test := range syscallTests {
|
||||||
|
// Override the expected value in the two special cases.
|
||||||
|
--
|
||||||
|
2.36.1
|
||||||
|
|
13
runc.changes
13
runc.changes
@ -1,3 +1,16 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon May 23 03:02:32 UTC 2022 - Aleksa Sarai <asarai@suse.com>
|
||||||
|
|
||||||
|
- Backport <https://github.com/opencontainers/runc/pull/3474> to fix issues
|
||||||
|
with newer syscalls (namely faccessat2) on older kernels on s390(x) caused by
|
||||||
|
that platform's syscall multiplexing semantics. bsc#1192051 bsc#1199565
|
||||||
|
+ bsc1192051-0001-seccomp-enosys-always-return-ENOSYS-for-setup-2-on-s390x.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu May 12 10:04:57 UTC 2022 - Aleksa Sarai <asarai@suse.com>
|
||||||
|
|
||||||
|
- Add ExcludeArch for s390 (not s390x) since we've never supported it.
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Wed May 11 22:43:51 UTC 2022 - Aleksa Sarai <asarai@suse.com>
|
Wed May 11 22:43:51 UTC 2022 - Aleksa Sarai <asarai@suse.com>
|
||||||
|
|
||||||
|
@ -36,6 +36,8 @@ URL: https://github.com/opencontainers/runc
|
|||||||
Source0: https://github.com/opencontainers/runc/releases/download/v%{_version}/runc.tar.xz#/runc-%{version}.tar.xz
|
Source0: https://github.com/opencontainers/runc/releases/download/v%{_version}/runc.tar.xz#/runc-%{version}.tar.xz
|
||||||
Source1: https://github.com/opencontainers/runc/releases/download/v%{_version}/runc.tar.xz.asc#/runc-%{version}.tar.xz.asc
|
Source1: https://github.com/opencontainers/runc/releases/download/v%{_version}/runc.tar.xz.asc#/runc-%{version}.tar.xz.asc
|
||||||
Source2: runc.keyring
|
Source2: runc.keyring
|
||||||
|
# OPENSUSE-FIX-UPSTREAM: Backport of <https://github.com/opencontainers/runc/pull/3474>. bsc#1192051 bsc#1199565
|
||||||
|
Patch1: bsc1192051-0001-seccomp-enosys-always-return-ENOSYS-for-setup-2-on-s390x.patch
|
||||||
BuildRequires: fdupes
|
BuildRequires: fdupes
|
||||||
BuildRequires: go-go-md2man
|
BuildRequires: go-go-md2man
|
||||||
# Due to a limitation in openSUSE's Go packaging we cannot have a BuildRequires
|
# Due to a limitation in openSUSE's Go packaging we cannot have a BuildRequires
|
||||||
@ -55,6 +57,7 @@ Obsoletes: docker-runc-kubic < %{version}
|
|||||||
Provides: docker-runc-kubic = %{version}
|
Provides: docker-runc-kubic = %{version}
|
||||||
Obsoletes: docker-runc = 0.1.1+gitr2819_50a19c6
|
Obsoletes: docker-runc = 0.1.1+gitr2819_50a19c6
|
||||||
Obsoletes: docker-runc_50a19c6
|
Obsoletes: docker-runc_50a19c6
|
||||||
|
ExcludeArch: s390
|
||||||
|
|
||||||
# Construct "git describe --dirty --long --always".
|
# Construct "git describe --dirty --long --always".
|
||||||
%define git_describe v%{_version}-0-g%{git_short}
|
%define git_describe v%{_version}-0-g%{git_short}
|
||||||
@ -67,6 +70,8 @@ and has grown to become a separate project entirely.
|
|||||||
|
|
||||||
%prep
|
%prep
|
||||||
%setup -q -n %{name}-%{_version}
|
%setup -q -n %{name}-%{_version}
|
||||||
|
# bsc#1192051 bsc#1199565
|
||||||
|
%patch1 -p1
|
||||||
|
|
||||||
%build
|
%build
|
||||||
# build runc
|
# build runc
|
||||||
|
Loading…
Reference in New Issue
Block a user