cifs-utils/0001-cifs.upcall-try-to-use-container-ipc-uts-net-pid-mnt.patch

265 lines
7.0 KiB
Diff
Raw Normal View History

From e461afd8cfa6d0781ae0c5c10e89b6ef1ca6da32 Mon Sep 17 00:00:00 2001
From: Alastair Houghton <alastair@alastairs-place.net>
Date: Tue, 29 Dec 2020 14:02:39 +0000
Subject: [PATCH] cifs.upcall: try to use container ipc/uts/net/pid/mnt/user
namespaces
In certain scenarios (e.g. kerberos multimount), when a process does
syscalls, the kernel sometimes has to query information or trigger
some actions in userspace. To do so it calls the cifs.upcall binary
with information on the process that triggered the syscall in the
first place.
ls(pid=10) ====> open("foo") ====> kernel
that user doesn't have an SMB
session, lets create one using his
kerberos credential cache
call cifs.upcall and ask for krb info
for whoever owns pid=10
|
cifs.upcall --pid 10 <=================+
...gather info...
return binary blob used
when establishing SMB session
===================> kernel
open SMB session, handle
open() syscall
ls <=================================== return open() result to ls
On a system using containers, the kernel is still calling the host
cifs.upcall and using the host configuration (for network, pid, etc).
This patch changes the behaviour of cifs.upcall so that it uses the
calling process namespaces (ls in the example) when doing its
job.
Note that the kernel still calls the binary in the host, but the
binary will place itself the contexts of the calling process
namespaces.
This code makes use of (but shouldn't require) the following kernel
config options and syscall flags:
approx. year |
introduced | config/flags
---------------+----------------
2008 | CONFIG_NAMESPACES=y
2007 | CONFIG_UTS_NS=y
2020 | CONFIG_TIME_NS=y
2006 | CONFIG_IPC_NS=y
2007 | CONFIG_USER_NS
2008 | CONFIG_PID_NS=y
2007 | CONFIG_NET_NS=y
2007 | CONFIG_CGROUPS
2016 | CLONE_NEWCGROUP setns() flag
Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Alastair Houghton <alastair@alastairs-place.net>
---
cifs.upcall.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 172 insertions(+)
diff --git a/cifs.upcall.c b/cifs.upcall.c
index 400b42d..e413934 100644
--- a/cifs.upcall.c
+++ b/cifs.upcall.c
@@ -51,6 +51,7 @@
#include <grp.h>
#include <stdbool.h>
#include <errno.h>
+#include <sched.h>
#include "data_blob.h"
#include "spnego.h"
@@ -240,6 +241,164 @@ err_cache:
return credtime;
}
+static struct namespace_file {
+ int nstype;
+ const char *name;
+ int fd;
+} namespace_files[] = {
+
+#ifdef CLONE_NEWCGROUP
+ { CLONE_NEWCGROUP, "cgroup", -1 },
+#endif
+
+#ifdef CLONE_NEWIPC
+ { CLONE_NEWIPC, "ipc", -1 },
+#endif
+
+#ifdef CLONE_NEWUTS
+ { CLONE_NEWUTS, "uts", -1 },
+#endif
+
+#ifdef CLONE_NEWNET
+ { CLONE_NEWNET, "net", -1 },
+#endif
+
+#ifdef CLONE_NEWPID
+ { CLONE_NEWPID, "pid", -1 },
+#endif
+
+#ifdef CLONE_NEWTIME
+ { CLONE_NEWTIME, "time", -1 },
+#endif
+
+#ifdef CLONE_NEWNS
+ { CLONE_NEWNS, "mnt", -1 },
+#endif
+
+#ifdef CLONE_NEWUSER
+ { CLONE_NEWUSER, "user", -1 },
+#endif
+};
+
+#define NS_PATH_FMT "/proc/%d/ns/%s"
+#define NS_PATH_MAXLEN (6 + 10 + 4 + 6 + 1)
+
+/**
+ * in_same_user_ns - return true if two processes are in the same user
+ * namespace.
+ * @pid_a: the pid of the first process
+ * @pid_b: the pid of the second process
+ *
+ * Works by comparing the inode numbers for /proc/<pid>/user.
+ */
+static int
+in_same_user_ns(pid_t pid_a, pid_t pid_b)
+{
+ char path[NS_PATH_MAXLEN];
+ ino_t a_ino, b_ino;
+ struct stat st;
+
+ snprintf(path, sizeof(path), NS_PATH_FMT, pid_a, "user");
+ if (stat(path, &st) != 0)
+ return 0;
+ a_ino = st.st_ino;
+
+ snprintf(path, sizeof(path), NS_PATH_FMT, pid_b, "user");
+ if (stat(path, &st) != 0)
+ return 0;
+ b_ino = st.st_ino;
+
+ return a_ino == b_ino;
+}
+
+/**
+ * switch_to_process_ns - change the namespace to the one for the specified
+ * process.
+ * @pid: initiating pid value from the upcall string
+ *
+ * Uses setns() to switch process namespace.
+ * This ensures that we have the same access and configuration as the
+ * process that triggered the lookup.
+ */
+static int
+switch_to_process_ns(pid_t pid)
+{
+ int count = sizeof(namespace_files) / sizeof(struct namespace_file);
+ int n, err = 0;
+ int rc = 0;
+
+ /* First, open all the namespace fds. We do this first because
+ the namespace changes might prohibit us from opening them. */
+ for (n = 0; n < count; ++n) {
+ char nspath[NS_PATH_MAXLEN];
+ int ret, fd;
+
+#ifdef CLONE_NEWUSER
+ if (namespace_files[n].nstype == CLONE_NEWUSER
+ && in_same_user_ns(getpid(), pid)) {
+ /* Switching to the same user namespace is forbidden,
+ because switching to a user namespace grants all
+ capabilities in that namespace regardless of uid. */
+ namespace_files[n].fd = -1;
+ continue;
+ }
+#endif
+
+ ret = snprintf(nspath, NS_PATH_MAXLEN, NS_PATH_FMT,
+ pid, namespace_files[n].name);
+ if (ret >= NS_PATH_MAXLEN) {
+ syslog(LOG_DEBUG, "%s: unterminated path!\n", __func__);
+ err = ENAMETOOLONG;
+ rc = -1;
+ goto out;
+ }
+
+ fd = open(nspath, O_RDONLY);
+ if (fd < 0 && errno != ENOENT) {
+ /*
+ * don't stop on non-existing ns
+ * but stop for other errors
+ */
+ err = errno;
+ rc = -1;
+ goto out;
+ }
+
+ namespace_files[n].fd = fd;
+ }
+
+ /* Next, call setns for each of them */
+ for (n = 0; n < count; ++n) {
+ /* skip non-existing ns */
+ if (namespace_files[n].fd < 0)
+ continue;
+
+ rc = setns(namespace_files[n].fd, namespace_files[n].nstype);
+
+ if (rc < 0) {
+ syslog(LOG_DEBUG, "%s: setns() failed for %s\n",
+ __func__, namespace_files[n].name);
+ err = errno;
+ goto out;
+ }
+ }
+
+out:
+ /* Finally, close all the fds */
+ for (n = 0; n < count; ++n) {
+ if (namespace_files[n].fd != -1) {
+ close(namespace_files[n].fd);
+ namespace_files[n].fd = -1;
+ }
+ }
+
+ if (rc != 0) {
+ errno = err;
+ }
+
+ return rc;
+}
+
#define ENV_PATH_FMT "/proc/%d/environ"
#define ENV_PATH_MAXLEN (6 + 10 + 8 + 1)
@@ -1109,6 +1268,19 @@ int main(const int argc, char *const argv[])
env_cachename =
get_cachename_from_process_env(env_probe ? arg.pid : 0);
+ /*
+ * Change to the process's namespace. This means that things will work
+ * acceptably in containers, because we'll be looking at the correct
+ * filesystem and have the correct network configuration.
+ */
+ rc = switch_to_process_ns(arg.pid);
+ if (rc == -1) {
+ syslog(LOG_ERR, "unable to switch to process namespace: %s",
+ strerror(errno));
+ rc = 1;
+ goto out;
+ }
+
rc = setuid(uid);
if (rc == -1) {
syslog(LOG_ERR, "setuid: %s", strerror(errno));
--
2.30.0