265 lines
7.0 KiB
Diff
265 lines
7.0 KiB
Diff
|
From e461afd8cfa6d0781ae0c5c10e89b6ef1ca6da32 Mon Sep 17 00:00:00 2001
|
||
|
From: Alastair Houghton <alastair@alastairs-place.net>
|
||
|
Date: Tue, 29 Dec 2020 14:02:39 +0000
|
||
|
Subject: [PATCH] cifs.upcall: try to use container ipc/uts/net/pid/mnt/user
|
||
|
namespaces
|
||
|
|
||
|
In certain scenarios (e.g. kerberos multimount), when a process does
|
||
|
syscalls, the kernel sometimes has to query information or trigger
|
||
|
some actions in userspace. To do so it calls the cifs.upcall binary
|
||
|
with information on the process that triggered the syscall in the
|
||
|
first place.
|
||
|
|
||
|
ls(pid=10) ====> open("foo") ====> kernel
|
||
|
|
||
|
that user doesn't have an SMB
|
||
|
session, lets create one using his
|
||
|
kerberos credential cache
|
||
|
|
||
|
call cifs.upcall and ask for krb info
|
||
|
for whoever owns pid=10
|
||
|
|
|
||
|
cifs.upcall --pid 10 <=================+
|
||
|
|
||
|
...gather info...
|
||
|
return binary blob used
|
||
|
when establishing SMB session
|
||
|
===================> kernel
|
||
|
open SMB session, handle
|
||
|
open() syscall
|
||
|
ls <=================================== return open() result to ls
|
||
|
|
||
|
On a system using containers, the kernel is still calling the host
|
||
|
cifs.upcall and using the host configuration (for network, pid, etc).
|
||
|
|
||
|
This patch changes the behaviour of cifs.upcall so that it uses the
|
||
|
calling process namespaces (ls in the example) when doing its
|
||
|
job.
|
||
|
|
||
|
Note that the kernel still calls the binary in the host, but the
|
||
|
binary will place itself the contexts of the calling process
|
||
|
namespaces.
|
||
|
|
||
|
This code makes use of (but shouldn't require) the following kernel
|
||
|
config options and syscall flags:
|
||
|
|
||
|
approx. year |
|
||
|
introduced | config/flags
|
||
|
---------------+----------------
|
||
|
2008 | CONFIG_NAMESPACES=y
|
||
|
2007 | CONFIG_UTS_NS=y
|
||
|
2020 | CONFIG_TIME_NS=y
|
||
|
2006 | CONFIG_IPC_NS=y
|
||
|
2007 | CONFIG_USER_NS
|
||
|
2008 | CONFIG_PID_NS=y
|
||
|
2007 | CONFIG_NET_NS=y
|
||
|
2007 | CONFIG_CGROUPS
|
||
|
2016 | CLONE_NEWCGROUP setns() flag
|
||
|
|
||
|
Signed-off-by: Aurelien Aptel <aaptel@suse.com>
|
||
|
Signed-off-by: Alastair Houghton <alastair@alastairs-place.net>
|
||
|
---
|
||
|
cifs.upcall.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++
|
||
|
1 file changed, 172 insertions(+)
|
||
|
|
||
|
diff --git a/cifs.upcall.c b/cifs.upcall.c
|
||
|
index 400b42d..e413934 100644
|
||
|
--- a/cifs.upcall.c
|
||
|
+++ b/cifs.upcall.c
|
||
|
@@ -51,6 +51,7 @@
|
||
|
#include <grp.h>
|
||
|
#include <stdbool.h>
|
||
|
#include <errno.h>
|
||
|
+#include <sched.h>
|
||
|
|
||
|
#include "data_blob.h"
|
||
|
#include "spnego.h"
|
||
|
@@ -240,6 +241,164 @@ err_cache:
|
||
|
return credtime;
|
||
|
}
|
||
|
|
||
|
+static struct namespace_file {
|
||
|
+ int nstype;
|
||
|
+ const char *name;
|
||
|
+ int fd;
|
||
|
+} namespace_files[] = {
|
||
|
+
|
||
|
+#ifdef CLONE_NEWCGROUP
|
||
|
+ { CLONE_NEWCGROUP, "cgroup", -1 },
|
||
|
+#endif
|
||
|
+
|
||
|
+#ifdef CLONE_NEWIPC
|
||
|
+ { CLONE_NEWIPC, "ipc", -1 },
|
||
|
+#endif
|
||
|
+
|
||
|
+#ifdef CLONE_NEWUTS
|
||
|
+ { CLONE_NEWUTS, "uts", -1 },
|
||
|
+#endif
|
||
|
+
|
||
|
+#ifdef CLONE_NEWNET
|
||
|
+ { CLONE_NEWNET, "net", -1 },
|
||
|
+#endif
|
||
|
+
|
||
|
+#ifdef CLONE_NEWPID
|
||
|
+ { CLONE_NEWPID, "pid", -1 },
|
||
|
+#endif
|
||
|
+
|
||
|
+#ifdef CLONE_NEWTIME
|
||
|
+ { CLONE_NEWTIME, "time", -1 },
|
||
|
+#endif
|
||
|
+
|
||
|
+#ifdef CLONE_NEWNS
|
||
|
+ { CLONE_NEWNS, "mnt", -1 },
|
||
|
+#endif
|
||
|
+
|
||
|
+#ifdef CLONE_NEWUSER
|
||
|
+ { CLONE_NEWUSER, "user", -1 },
|
||
|
+#endif
|
||
|
+};
|
||
|
+
|
||
|
+#define NS_PATH_FMT "/proc/%d/ns/%s"
|
||
|
+#define NS_PATH_MAXLEN (6 + 10 + 4 + 6 + 1)
|
||
|
+
|
||
|
+/**
|
||
|
+ * in_same_user_ns - return true if two processes are in the same user
|
||
|
+ * namespace.
|
||
|
+ * @pid_a: the pid of the first process
|
||
|
+ * @pid_b: the pid of the second process
|
||
|
+ *
|
||
|
+ * Works by comparing the inode numbers for /proc/<pid>/user.
|
||
|
+ */
|
||
|
+static int
|
||
|
+in_same_user_ns(pid_t pid_a, pid_t pid_b)
|
||
|
+{
|
||
|
+ char path[NS_PATH_MAXLEN];
|
||
|
+ ino_t a_ino, b_ino;
|
||
|
+ struct stat st;
|
||
|
+
|
||
|
+ snprintf(path, sizeof(path), NS_PATH_FMT, pid_a, "user");
|
||
|
+ if (stat(path, &st) != 0)
|
||
|
+ return 0;
|
||
|
+ a_ino = st.st_ino;
|
||
|
+
|
||
|
+ snprintf(path, sizeof(path), NS_PATH_FMT, pid_b, "user");
|
||
|
+ if (stat(path, &st) != 0)
|
||
|
+ return 0;
|
||
|
+ b_ino = st.st_ino;
|
||
|
+
|
||
|
+ return a_ino == b_ino;
|
||
|
+}
|
||
|
+
|
||
|
+/**
|
||
|
+ * switch_to_process_ns - change the namespace to the one for the specified
|
||
|
+ * process.
|
||
|
+ * @pid: initiating pid value from the upcall string
|
||
|
+ *
|
||
|
+ * Uses setns() to switch process namespace.
|
||
|
+ * This ensures that we have the same access and configuration as the
|
||
|
+ * process that triggered the lookup.
|
||
|
+ */
|
||
|
+static int
|
||
|
+switch_to_process_ns(pid_t pid)
|
||
|
+{
|
||
|
+ int count = sizeof(namespace_files) / sizeof(struct namespace_file);
|
||
|
+ int n, err = 0;
|
||
|
+ int rc = 0;
|
||
|
+
|
||
|
+ /* First, open all the namespace fds. We do this first because
|
||
|
+ the namespace changes might prohibit us from opening them. */
|
||
|
+ for (n = 0; n < count; ++n) {
|
||
|
+ char nspath[NS_PATH_MAXLEN];
|
||
|
+ int ret, fd;
|
||
|
+
|
||
|
+#ifdef CLONE_NEWUSER
|
||
|
+ if (namespace_files[n].nstype == CLONE_NEWUSER
|
||
|
+ && in_same_user_ns(getpid(), pid)) {
|
||
|
+ /* Switching to the same user namespace is forbidden,
|
||
|
+ because switching to a user namespace grants all
|
||
|
+ capabilities in that namespace regardless of uid. */
|
||
|
+ namespace_files[n].fd = -1;
|
||
|
+ continue;
|
||
|
+ }
|
||
|
+#endif
|
||
|
+
|
||
|
+ ret = snprintf(nspath, NS_PATH_MAXLEN, NS_PATH_FMT,
|
||
|
+ pid, namespace_files[n].name);
|
||
|
+ if (ret >= NS_PATH_MAXLEN) {
|
||
|
+ syslog(LOG_DEBUG, "%s: unterminated path!\n", __func__);
|
||
|
+ err = ENAMETOOLONG;
|
||
|
+ rc = -1;
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+
|
||
|
+ fd = open(nspath, O_RDONLY);
|
||
|
+ if (fd < 0 && errno != ENOENT) {
|
||
|
+ /*
|
||
|
+ * don't stop on non-existing ns
|
||
|
+ * but stop for other errors
|
||
|
+ */
|
||
|
+ err = errno;
|
||
|
+ rc = -1;
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+
|
||
|
+ namespace_files[n].fd = fd;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* Next, call setns for each of them */
|
||
|
+ for (n = 0; n < count; ++n) {
|
||
|
+ /* skip non-existing ns */
|
||
|
+ if (namespace_files[n].fd < 0)
|
||
|
+ continue;
|
||
|
+
|
||
|
+ rc = setns(namespace_files[n].fd, namespace_files[n].nstype);
|
||
|
+
|
||
|
+ if (rc < 0) {
|
||
|
+ syslog(LOG_DEBUG, "%s: setns() failed for %s\n",
|
||
|
+ __func__, namespace_files[n].name);
|
||
|
+ err = errno;
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+out:
|
||
|
+ /* Finally, close all the fds */
|
||
|
+ for (n = 0; n < count; ++n) {
|
||
|
+ if (namespace_files[n].fd != -1) {
|
||
|
+ close(namespace_files[n].fd);
|
||
|
+ namespace_files[n].fd = -1;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ if (rc != 0) {
|
||
|
+ errno = err;
|
||
|
+ }
|
||
|
+
|
||
|
+ return rc;
|
||
|
+}
|
||
|
+
|
||
|
#define ENV_PATH_FMT "/proc/%d/environ"
|
||
|
#define ENV_PATH_MAXLEN (6 + 10 + 8 + 1)
|
||
|
|
||
|
@@ -1109,6 +1268,19 @@ int main(const int argc, char *const argv[])
|
||
|
env_cachename =
|
||
|
get_cachename_from_process_env(env_probe ? arg.pid : 0);
|
||
|
|
||
|
+ /*
|
||
|
+ * Change to the process's namespace. This means that things will work
|
||
|
+ * acceptably in containers, because we'll be looking at the correct
|
||
|
+ * filesystem and have the correct network configuration.
|
||
|
+ */
|
||
|
+ rc = switch_to_process_ns(arg.pid);
|
||
|
+ if (rc == -1) {
|
||
|
+ syslog(LOG_ERR, "unable to switch to process namespace: %s",
|
||
|
+ strerror(errno));
|
||
|
+ rc = 1;
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+
|
||
|
rc = setuid(uid);
|
||
|
if (rc == -1) {
|
||
|
syslog(LOG_ERR, "setuid: %s", strerror(errno));
|
||
|
--
|
||
|
2.30.0
|
||
|
|