226 lines
8.6 KiB
Diff
226 lines
8.6 KiB
Diff
|
From 9729fc3127c5e27d59a0126f52e91c61037173ae Mon Sep 17 00:00:00 2001
|
||
|
From: Aleksa Sarai <asarai@suse.de>
|
||
|
Date: Tue, 29 Nov 2016 01:25:06 +1100
|
||
|
Subject: [PATCH] libcontainer: init: only pass stateDirFd when creating a
|
||
|
container
|
||
|
|
||
|
If we pass a file descriptor to the host filesystem while joining a
|
||
|
container, there is a race condition where a process inside the
|
||
|
container can ptrace(2) the joining process and stop it from closing its
|
||
|
file descriptor to the stateDirFd. Then the process can access the *host*
|
||
|
filesystem from that file descriptor.
|
||
|
|
||
|
To fix this, don't open or pass the stateDirFd to the init process
|
||
|
unless we're creating a new container. A proper fix for this would be to
|
||
|
remove the need for even passing around directory file descriptors
|
||
|
(which are quite dangerous in the context of mount namespaces).
|
||
|
|
||
|
SUSE: This is an additional patch which was the original "fix" for the
|
||
|
CVE. It was agreed that the patch was too complicated to be used
|
||
|
as a fix for a security issue -- but it will be upstreamed once
|
||
|
the embargo passes.
|
||
|
|
||
|
Fixes: CVE-2016-8649
|
||
|
Signed-off-by: Aleksa Sarai <asarai@suse.de>
|
||
|
---
|
||
|
libcontainer/container_linux.go | 30 ++++++++++++++++++------------
|
||
|
libcontainer/factory_linux.go | 37 ++++++++++++++++++++++---------------
|
||
|
libcontainer/init_linux.go | 3 +--
|
||
|
libcontainer/process_linux.go | 2 --
|
||
|
libcontainer/setns_init_linux.go | 7 +------
|
||
|
5 files changed, 42 insertions(+), 37 deletions(-)
|
||
|
|
||
|
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
|
||
|
index 29c8b3437be3..4110af6fd89d 100644
|
||
|
--- a/libcontainer/container_linux.go
|
||
|
+++ b/libcontainer/container_linux.go
|
||
|
@@ -294,21 +294,29 @@ func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProces
|
||
|
if err != nil {
|
||
|
return nil, newSystemErrorWithCause(err, "creating new init pipe")
|
||
|
}
|
||
|
- rootDir, err := os.Open(c.root)
|
||
|
- if err != nil {
|
||
|
- return nil, err
|
||
|
- }
|
||
|
- cmd, err := c.commandTemplate(p, childPipe, rootDir)
|
||
|
+ cmd, err := c.commandTemplate(p, childPipe)
|
||
|
if err != nil {
|
||
|
return nil, newSystemErrorWithCause(err, "creating new command template")
|
||
|
}
|
||
|
if !doInit {
|
||
|
- return c.newSetnsProcess(p, cmd, parentPipe, childPipe, rootDir)
|
||
|
+ return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
|
||
|
}
|
||
|
+
|
||
|
+ // We only set up rootDir if we're not doing a `runc exec`. The reason for
|
||
|
+ // this is to avoid cases where a racing, unprivileged process inside the
|
||
|
+ // container can get access to the statedir file descriptor (which would
|
||
|
+ // allow for container rootfs escape).
|
||
|
+ rootDir, err := os.Open(c.root)
|
||
|
+ if err != nil {
|
||
|
+ return nil, err
|
||
|
+ }
|
||
|
+ cmd.ExtraFiles = append(cmd.ExtraFiles, rootDir)
|
||
|
+ cmd.Env = append(cmd.Env,
|
||
|
+ fmt.Sprintf("_LIBCONTAINER_STATEDIR=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
|
||
|
return c.newInitProcess(p, cmd, parentPipe, childPipe, rootDir)
|
||
|
}
|
||
|
|
||
|
-func (c *linuxContainer) commandTemplate(p *Process, childPipe, rootDir *os.File) (*exec.Cmd, error) {
|
||
|
+func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
|
||
|
cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...)
|
||
|
cmd.Stdin = p.Stdin
|
||
|
cmd.Stdout = p.Stdout
|
||
|
@@ -317,10 +325,9 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe, rootDir *os.File
|
||
|
if cmd.SysProcAttr == nil {
|
||
|
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||
|
}
|
||
|
- cmd.ExtraFiles = append(p.ExtraFiles, childPipe, rootDir)
|
||
|
+ cmd.ExtraFiles = append(p.ExtraFiles, childPipe)
|
||
|
cmd.Env = append(cmd.Env,
|
||
|
- fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-2),
|
||
|
- fmt.Sprintf("_LIBCONTAINER_STATEDIR=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
|
||
|
+ fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
|
||
|
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
|
||
|
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
|
||
|
// even with the parent still running.
|
||
|
@@ -357,7 +364,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
|
||
|
}, nil
|
||
|
}
|
||
|
|
||
|
-func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*setnsProcess, error) {
|
||
|
+func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
|
||
|
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
|
||
|
state, err := c.currentState()
|
||
|
if err != nil {
|
||
|
@@ -378,7 +385,6 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
|
||
|
config: c.newInitConfig(p),
|
||
|
process: p,
|
||
|
bootstrapData: data,
|
||
|
- rootDir: rootDir,
|
||
|
}, nil
|
||
|
}
|
||
|
|
||
|
diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go
|
||
|
index cc336c147fb9..e3478d5b50e3 100644
|
||
|
--- a/libcontainer/factory_linux.go
|
||
|
+++ b/libcontainer/factory_linux.go
|
||
|
@@ -222,27 +222,34 @@ func (l *LinuxFactory) Type() string {
|
||
|
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
|
||
|
// This is a low level implementation detail of the reexec and should not be consumed externally
|
||
|
func (l *LinuxFactory) StartInitialization() (err error) {
|
||
|
- var pipefd, rootfd int
|
||
|
- for _, pair := range []struct {
|
||
|
- k string
|
||
|
- v *int
|
||
|
- }{
|
||
|
- {"_LIBCONTAINER_INITPIPE", &pipefd},
|
||
|
- {"_LIBCONTAINER_STATEDIR", &rootfd},
|
||
|
- } {
|
||
|
-
|
||
|
- s := os.Getenv(pair.k)
|
||
|
+ var (
|
||
|
+ pipefd, rootfd int
|
||
|
+ envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
|
||
|
+ envStateDir = os.Getenv("_LIBCONTAINER_STATEDIR")
|
||
|
+ )
|
||
|
|
||
|
- i, err := strconv.Atoi(s)
|
||
|
- if err != nil {
|
||
|
- return fmt.Errorf("unable to convert %s=%s to int", pair.k, s)
|
||
|
- }
|
||
|
- *pair.v = i
|
||
|
+ // Get the INITPIPE.
|
||
|
+ pipefd, err = strconv.Atoi(envInitPipe)
|
||
|
+ if err != nil {
|
||
|
+ return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
|
||
|
}
|
||
|
+
|
||
|
var (
|
||
|
pipe = os.NewFile(uintptr(pipefd), "pipe")
|
||
|
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
|
||
|
)
|
||
|
+
|
||
|
+ // Only get the STATEDIR if we're an init process. It's a bit late to do
|
||
|
+ // anything about this if we've already brought in an fd (a racing process
|
||
|
+ // could've opened the fd by now because we're in the PID namespace).
|
||
|
+ rootfd = -1
|
||
|
+ if it == initStandard {
|
||
|
+ rootfd, err = strconv.Atoi(envStateDir)
|
||
|
+ if err != nil {
|
||
|
+ return fmt.Errorf("unable to convert _LIBCONTAINER_STATEDIR=%s to int: %s", envStateDir, err)
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
// clear the current process's environment to clean any libcontainer
|
||
|
// specific env vars.
|
||
|
os.Clearenv()
|
||
|
diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go
|
||
|
index 4043d51c0bd0..b1e6762ecdf3 100644
|
||
|
--- a/libcontainer/init_linux.go
|
||
|
+++ b/libcontainer/init_linux.go
|
||
|
@@ -77,8 +77,7 @@ func newContainerInit(t initType, pipe *os.File, stateDirFD int) (initer, error)
|
||
|
switch t {
|
||
|
case initSetns:
|
||
|
return &linuxSetnsInit{
|
||
|
- config: config,
|
||
|
- stateDirFD: stateDirFD,
|
||
|
+ config: config,
|
||
|
}, nil
|
||
|
case initStandard:
|
||
|
return &linuxStandardInit{
|
||
|
diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go
|
||
|
index 5b81317fd711..7b90c6b8faae 100644
|
||
|
--- a/libcontainer/process_linux.go
|
||
|
+++ b/libcontainer/process_linux.go
|
||
|
@@ -51,7 +51,6 @@ type setnsProcess struct {
|
||
|
fds []string
|
||
|
process *Process
|
||
|
bootstrapData io.Reader
|
||
|
- rootDir *os.File
|
||
|
}
|
||
|
|
||
|
func (p *setnsProcess) startTime() (string, error) {
|
||
|
@@ -70,7 +69,6 @@ func (p *setnsProcess) start() (err error) {
|
||
|
defer p.parentPipe.Close()
|
||
|
err = p.cmd.Start()
|
||
|
p.childPipe.Close()
|
||
|
- p.rootDir.Close()
|
||
|
if err != nil {
|
||
|
return newSystemErrorWithCause(err, "starting setns process")
|
||
|
}
|
||
|
diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go
|
||
|
index 7f5f182402cc..2a8f34528179 100644
|
||
|
--- a/libcontainer/setns_init_linux.go
|
||
|
+++ b/libcontainer/setns_init_linux.go
|
||
|
@@ -5,7 +5,6 @@ package libcontainer
|
||
|
import (
|
||
|
"fmt"
|
||
|
"os"
|
||
|
- "syscall"
|
||
|
|
||
|
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||
|
"github.com/opencontainers/runc/libcontainer/keys"
|
||
|
@@ -17,8 +16,7 @@ import (
|
||
|
// linuxSetnsInit performs the container's initialization for running a new process
|
||
|
// inside an existing container.
|
||
|
type linuxSetnsInit struct {
|
||
|
- config *initConfig
|
||
|
- stateDirFD int
|
||
|
+ config *initConfig
|
||
|
}
|
||
|
|
||
|
func (l *linuxSetnsInit) getSessionRingName() string {
|
||
|
@@ -51,8 +49,5 @@ func (l *linuxSetnsInit) Init() error {
|
||
|
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
- // close the statedir fd before exec because the kernel resets dumpable in the wrong order
|
||
|
- // https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
|
||
|
- syscall.Close(l.stateDirFD)
|
||
|
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
||
|
}
|
||
|
--
|
||
|
2.11.0
|
||
|
|