runc/CVE-2016-9962.patch

226 lines
8.5 KiB
Diff

From 9729fc3127c5e27d59a0126f52e91c61037173ae Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Tue, 29 Nov 2016 01:25:06 +1100
Subject: [PATCH] libcontainer: init: only pass stateDirFd when creating a
container
If we pass a file descriptor to the host filesystem while joining a
container, there is a race condition where a process inside the
container can ptrace(2) the joining process and stop it from closing its
file descriptor to the stateDirFd. Then the process can access the *host*
filesystem from that file descriptor.
To fix this, don't open or pass the stateDirFd to the init process
unless we're creating a new container. A proper fix for this would be to
remove the need for even passing around directory file descriptors
(which are quite dangerous in the context of mount namespaces).
SUSE: This is an additional patch which was the original "fix" for the
CVE. It was agreed that the patch was too complicated to be used
as a fix for a security issue -- but it will be upstreamed once
the embargo passes.
Fixes: CVE-2016-9962
Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
libcontainer/container_linux.go | 30 ++++++++++++++++++------------
libcontainer/factory_linux.go | 37 ++++++++++++++++++++++---------------
libcontainer/init_linux.go | 3 +--
libcontainer/process_linux.go | 2 --
libcontainer/setns_init_linux.go | 7 +------
5 files changed, 42 insertions(+), 37 deletions(-)
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index 82c6d8e..369b5d5 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -297,21 +297,29 @@ func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProces
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new init pipe")
}
- rootDir, err := os.Open(c.root)
- if err != nil {
- return nil, err
- }
- cmd, err := c.commandTemplate(p, childPipe, rootDir)
+ cmd, err := c.commandTemplate(p, childPipe)
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new command template")
}
if !doInit {
- return c.newSetnsProcess(p, cmd, parentPipe, childPipe, rootDir)
+ return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
}
+
+ // We only set up rootDir if we're not doing a `runc exec`. The reason for
+ // this is to avoid cases where a racing, unprivileged process inside the
+ // container can get access to the statedir file descriptor (which would
+ // allow for container rootfs escape).
+ rootDir, err := os.Open(c.root)
+ if err != nil {
+ return nil, err
+ }
+ cmd.ExtraFiles = append(cmd.ExtraFiles, rootDir)
+ cmd.Env = append(cmd.Env,
+ fmt.Sprintf("_LIBCONTAINER_STATEDIR=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
return c.newInitProcess(p, cmd, parentPipe, childPipe, rootDir)
}
-func (c *linuxContainer) commandTemplate(p *Process, childPipe, rootDir *os.File) (*exec.Cmd, error) {
+func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...)
cmd.Stdin = p.Stdin
cmd.Stdout = p.Stdout
@@ -320,10 +328,9 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe, rootDir *os.File
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
}
- cmd.ExtraFiles = append(p.ExtraFiles, childPipe, rootDir)
+ cmd.ExtraFiles = append(p.ExtraFiles, childPipe)
cmd.Env = append(cmd.Env,
- fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-2),
- fmt.Sprintf("_LIBCONTAINER_STATEDIR=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
+ fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
// even with the parent still running.
@@ -360,7 +367,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
}, nil
}
-func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*setnsProcess, error) {
+func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
state, err := c.currentState()
if err != nil {
@@ -381,7 +388,6 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
- rootDir: rootDir,
}, nil
}
diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go
index cc336c147fb9..e3478d5b50e3 100644
--- a/libcontainer/factory_linux.go
+++ b/libcontainer/factory_linux.go
@@ -222,27 +222,34 @@ func (l *LinuxFactory) Type() string {
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
- var pipefd, rootfd int
- for _, pair := range []struct {
- k string
- v *int
- }{
- {"_LIBCONTAINER_INITPIPE", &pipefd},
- {"_LIBCONTAINER_STATEDIR", &rootfd},
- } {
-
- s := os.Getenv(pair.k)
+ var (
+ pipefd, rootfd int
+ envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
+ envStateDir = os.Getenv("_LIBCONTAINER_STATEDIR")
+ )
- i, err := strconv.Atoi(s)
- if err != nil {
- return fmt.Errorf("unable to convert %s=%s to int", pair.k, s)
- }
- *pair.v = i
+ // Get the INITPIPE.
+ pipefd, err = strconv.Atoi(envInitPipe)
+ if err != nil {
+ return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
}
+
var (
pipe = os.NewFile(uintptr(pipefd), "pipe")
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
)
+
+ // Only get the STATEDIR if we're an init process. It's a bit late to do
+ // anything about this if we've already brought in an fd (a racing process
+ // could've opened the fd by now because we're in the PID namespace).
+ rootfd = -1
+ if it == initStandard {
+ rootfd, err = strconv.Atoi(envStateDir)
+ if err != nil {
+ return fmt.Errorf("unable to convert _LIBCONTAINER_STATEDIR=%s to int: %s", envStateDir, err)
+ }
+ }
+
// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go
index 4043d51c0bd0..b1e6762ecdf3 100644
--- a/libcontainer/init_linux.go
+++ b/libcontainer/init_linux.go
@@ -77,8 +77,7 @@ func newContainerInit(t initType, pipe *os.File, stateDirFD int) (initer, error)
switch t {
case initSetns:
return &linuxSetnsInit{
- config: config,
- stateDirFD: stateDirFD,
+ config: config,
}, nil
case initStandard:
return &linuxStandardInit{
diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go
index 5b81317fd711..7b90c6b8faae 100644
--- a/libcontainer/process_linux.go
+++ b/libcontainer/process_linux.go
@@ -51,7 +51,6 @@ type setnsProcess struct {
fds []string
process *Process
bootstrapData io.Reader
- rootDir *os.File
}
func (p *setnsProcess) startTime() (string, error) {
@@ -70,7 +69,6 @@ func (p *setnsProcess) start() (err error) {
defer p.parentPipe.Close()
err = p.cmd.Start()
p.childPipe.Close()
- p.rootDir.Close()
if err != nil {
return newSystemErrorWithCause(err, "starting setns process")
}
diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go
index 7f5f182402cc..2a8f34528179 100644
--- a/libcontainer/setns_init_linux.go
+++ b/libcontainer/setns_init_linux.go
@@ -5,7 +5,6 @@ package libcontainer
import (
"fmt"
"os"
- "syscall"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/keys"
@@ -17,8 +16,7 @@ import (
// linuxSetnsInit performs the container's initialization for running a new process
// inside an existing container.
type linuxSetnsInit struct {
- config *initConfig
- stateDirFD int
+ config *initConfig
}
func (l *linuxSetnsInit) getSessionRingName() string {
@@ -51,8 +49,5 @@ func (l *linuxSetnsInit) Init() error {
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err
}
- // close the statedir fd before exec because the kernel resets dumpable in the wrong order
- // https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
- syscall.Close(l.stateDirFD)
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}
--
2.11.0