336 lines
13 KiB
Diff
336 lines
13 KiB
Diff
|
From: Vivek Goyal <vgoyal@redhat.com>
|
||
|
Date: Mon, 8 Feb 2021 17:40:24 -0500
|
||
|
Subject: viriofsd: Add support for FUSE_HANDLE_KILLPRIV_V2
|
||
|
|
||
|
Git-commit: d64907acbf6e436099fd26fbb6312fd56f9fb29d
|
||
|
References: bsc#1183373, CVE-2021-20263
|
||
|
|
||
|
This patch adds basic support for FUSE_HANDLE_KILLPRIV_V2. virtiofsd
|
||
|
can enable/disable this by specifying option "-o killpriv_v2/no_killpriv_v2".
|
||
|
By default this is enabled as long as client supports it
|
||
|
|
||
|
Enabling this option helps with performance in write path. Without this
|
||
|
option, currently every write is first preceeded with a getxattr() operation
|
||
|
to find out if security.capability is set. (Write is supposed to clear
|
||
|
security.capability). With this option enabled, server is signing up for
|
||
|
clearing security.capability on every WRITE and also clearing suid/sgid
|
||
|
subject to certain rules. This gets rid of extra getxattr() call for every
|
||
|
WRITE and improves performance. This is true when virtiofsd is run with
|
||
|
option -o xattr.
|
||
|
|
||
|
What does enabling FUSE_HANDLE_KILLPRIV_V2 mean for file server implementation.
|
||
|
It needs to adhere to following rules. Thanks to Miklos for this summary.
|
||
|
|
||
|
- clear "security.capability" on write, truncate and chown unconditionally
|
||
|
- clear suid/sgid in case of following. Note, sgid is cleared only if
|
||
|
group executable bit is set.
|
||
|
o setattr has FATTR_SIZE and FATTR_KILL_SUIDGID set.
|
||
|
o setattr has FATTR_UID or FATTR_GID
|
||
|
o open has O_TRUNC and FUSE_OPEN_KILL_SUIDGID
|
||
|
o create has O_TRUNC and FUSE_OPEN_KILL_SUIDGID flag set.
|
||
|
o write has FUSE_WRITE_KILL_SUIDGID
|
||
|
|
||
|
>From Linux VFS client perspective, here are the requirements.
|
||
|
|
||
|
- caps are always cleared on chown/write/truncate
|
||
|
- suid is always cleared on chown, while for truncate/write it is cleared
|
||
|
only if caller does not have CAP_FSETID.
|
||
|
- sgid is always cleared on chown, while for truncate/write it is cleared
|
||
|
only if caller does not have CAP_FSETID as well as file has group execute
|
||
|
permission.
|
||
|
|
||
|
virtiofsd implementation has not changed much to adhere to above ruls. And
|
||
|
reason being that current assumption is that we are running on Linux
|
||
|
and on top of filesystems like ext4/xfs which already follow above rules.
|
||
|
On write, truncate, chown, seucurity.capability is cleared. And virtiofsd
|
||
|
drops CAP_FSETID if need be and that will lead to clearing of suid/sgid.
|
||
|
|
||
|
But if virtiofsd is running on top a filesystem which breaks above assumptions,
|
||
|
then it will have to take extra actions to emulate above. That's a TODO
|
||
|
for later when need arises.
|
||
|
|
||
|
Note: create normally is supposed to be called only when file does not
|
||
|
exist. So generally there should not be any question of clearing
|
||
|
setuid/setgid. But it is possible that after client checks that
|
||
|
file is not present, some other client creates file on server
|
||
|
and this race can trigger sending FUSE_CREATE. In that case, if
|
||
|
O_TRUNC is set, we should clear suid/sgid if FUSE_OPEN_KILL_SUIDGID
|
||
|
is also set.
|
||
|
|
||
|
v3:
|
||
|
- Resolved conflicts due to lo_inode_open() changes.
|
||
|
- Moved capability code in lo_do_open() so that both lo_open() and
|
||
|
lo_create() can benefit from common code.
|
||
|
- Dropped changes to kernel headers as these are part of qemu already.
|
||
|
|
||
|
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
|
||
|
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||
|
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||
|
Message-Id: <20210208224024.43555-3-vgoyal@redhat.com>
|
||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||
|
Signed-off-by: Bruce Rogers <brogers@suse.com>
|
||
|
---
|
||
|
tools/virtiofsd/fuse_common.h | 15 ++++++
|
||
|
tools/virtiofsd/fuse_lowlevel.c | 11 ++++-
|
||
|
tools/virtiofsd/fuse_lowlevel.h | 1 +
|
||
|
tools/virtiofsd/passthrough_ll.c | 84 +++++++++++++++++++++++++++++---
|
||
|
4 files changed, 103 insertions(+), 8 deletions(-)
|
||
|
|
||
|
diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
|
||
|
index 5aee5193eb29ea10de8e6ce46b63..6f4a1ff3a9227964ea98d547d110 100644
|
||
|
--- a/tools/virtiofsd/fuse_common.h
|
||
|
+++ b/tools/virtiofsd/fuse_common.h
|
||
|
@@ -359,6 +359,21 @@ struct fuse_file_info {
|
||
|
*/
|
||
|
#define FUSE_CAP_SUBMOUNTS (1 << 27)
|
||
|
|
||
|
+/**
|
||
|
+ * Indicates that the filesystem is responsible for clearing
|
||
|
+ * security.capability xattr and clearing setuid and setgid bits. Following
|
||
|
+ * are the rules.
|
||
|
+ * - clear "security.capability" on write, truncate and chown unconditionally
|
||
|
+ * - clear suid/sgid if following is true. Note, sgid is cleared only if
|
||
|
+ * group executable bit is set.
|
||
|
+ * o setattr has FATTR_SIZE and FATTR_KILL_SUIDGID set.
|
||
|
+ * o setattr has FATTR_UID or FATTR_GID
|
||
|
+ * o open has O_TRUNC and FUSE_OPEN_KILL_SUIDGID
|
||
|
+ * o create has O_TRUNC and FUSE_OPEN_KILL_SUIDGID flag set.
|
||
|
+ * o write has FUSE_WRITE_KILL_SUIDGID
|
||
|
+ */
|
||
|
+#define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28)
|
||
|
+
|
||
|
/**
|
||
|
* Ioctl flags
|
||
|
*
|
||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
|
||
|
index c70fb16a9a5313160a4cc53faf86..65f01a3fe31de0864948230b2e47 100644
|
||
|
--- a/tools/virtiofsd/fuse_lowlevel.c
|
||
|
+++ b/tools/virtiofsd/fuse_lowlevel.c
|
||
|
@@ -865,7 +865,7 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid,
|
||
|
FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE |
|
||
|
FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME |
|
||
|
FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW |
|
||
|
- FUSE_SET_ATTR_CTIME;
|
||
|
+ FUSE_SET_ATTR_CTIME | FUSE_SET_ATTR_KILL_SUIDGID;
|
||
|
|
||
|
req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi);
|
||
|
} else {
|
||
|
@@ -1079,6 +1079,7 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid,
|
||
|
|
||
|
memset(&fi, 0, sizeof(fi));
|
||
|
fi.flags = arg->flags;
|
||
|
+ fi.kill_priv = arg->open_flags & FUSE_OPEN_KILL_SUIDGID;
|
||
|
|
||
|
req->ctx.umask = arg->umask;
|
||
|
|
||
|
@@ -1102,6 +1103,7 @@ static void do_open(fuse_req_t req, fuse_ino_t nodeid,
|
||
|
|
||
|
memset(&fi, 0, sizeof(fi));
|
||
|
fi.flags = arg->flags;
|
||
|
+ fi.kill_priv = arg->open_flags & FUSE_OPEN_KILL_SUIDGID;
|
||
|
|
||
|
if (req->se->op.open) {
|
||
|
req->se->op.open(req, nodeid, &fi);
|
||
|
@@ -1993,6 +1995,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
|
||
|
if (arg->flags & FUSE_SUBMOUNTS) {
|
||
|
se->conn.capable |= FUSE_CAP_SUBMOUNTS;
|
||
|
}
|
||
|
+ if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
|
||
|
+ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2;
|
||
|
+ }
|
||
|
#ifdef HAVE_SPLICE
|
||
|
#ifdef HAVE_VMSPLICE
|
||
|
se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
|
||
|
@@ -2124,6 +2129,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
|
||
|
outarg.congestion_threshold = se->conn.congestion_threshold;
|
||
|
outarg.time_gran = se->conn.time_gran;
|
||
|
|
||
|
+ if (se->conn.want & FUSE_CAP_HANDLE_KILLPRIV_V2) {
|
||
|
+ outarg.flags |= FUSE_HANDLE_KILLPRIV_V2;
|
||
|
+ }
|
||
|
+
|
||
|
fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor);
|
||
|
fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags);
|
||
|
fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", outarg.max_readahead);
|
||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
|
||
|
index 9c06240f9e61ea259241a3ec77e9..96d10defc8936b00e8410589a183 100644
|
||
|
--- a/tools/virtiofsd/fuse_lowlevel.h
|
||
|
+++ b/tools/virtiofsd/fuse_lowlevel.h
|
||
|
@@ -146,6 +146,7 @@ struct fuse_forget_data {
|
||
|
#define FUSE_SET_ATTR_ATIME_NOW (1 << 7)
|
||
|
#define FUSE_SET_ATTR_MTIME_NOW (1 << 8)
|
||
|
#define FUSE_SET_ATTR_CTIME (1 << 10)
|
||
|
+#define FUSE_SET_ATTR_KILL_SUIDGID (1 << 11)
|
||
|
|
||
|
/*
|
||
|
* Request methods and replies
|
||
|
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||
|
index 200a1d26bd11bcde5729c4f33195..f330fb72b93f24331174f52bf6b9 100644
|
||
|
--- a/tools/virtiofsd/passthrough_ll.c
|
||
|
+++ b/tools/virtiofsd/passthrough_ll.c
|
||
|
@@ -180,6 +180,7 @@ struct lo_data {
|
||
|
|
||
|
/* An O_PATH file descriptor to /proc/self/fd/ */
|
||
|
int proc_self_fd;
|
||
|
+ int user_killpriv_v2, killpriv_v2;
|
||
|
};
|
||
|
|
||
|
static const struct fuse_opt lo_opts[] = {
|
||
|
@@ -210,6 +211,8 @@ static const struct fuse_opt lo_opts[] = {
|
||
|
{ "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 },
|
||
|
{ "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 },
|
||
|
{ "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 },
|
||
|
+ { "killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 1 },
|
||
|
+ { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 },
|
||
|
FUSE_OPT_END
|
||
|
};
|
||
|
static bool use_syslog = false;
|
||
|
@@ -642,6 +645,34 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
|
||
|
"does not support it\n");
|
||
|
lo->announce_submounts = false;
|
||
|
}
|
||
|
+
|
||
|
+ if (lo->user_killpriv_v2 == 1) {
|
||
|
+ /*
|
||
|
+ * User explicitly asked for this option. Enable it unconditionally.
|
||
|
+ * If connection does not have this capability, it should fail
|
||
|
+ * in fuse_lowlevel.c
|
||
|
+ */
|
||
|
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling killpriv_v2\n");
|
||
|
+ conn->want |= FUSE_CAP_HANDLE_KILLPRIV_V2;
|
||
|
+ lo->killpriv_v2 = 1;
|
||
|
+ } else if (lo->user_killpriv_v2 == -1 &&
|
||
|
+ conn->capable & FUSE_CAP_HANDLE_KILLPRIV_V2) {
|
||
|
+ /*
|
||
|
+ * User did not specify a value for killpriv_v2. By default enable it
|
||
|
+ * if connection offers this capability
|
||
|
+ */
|
||
|
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling killpriv_v2\n");
|
||
|
+ conn->want |= FUSE_CAP_HANDLE_KILLPRIV_V2;
|
||
|
+ lo->killpriv_v2 = 1;
|
||
|
+ } else {
|
||
|
+ /*
|
||
|
+ * Either user specified to disable killpriv_v2, or connection does
|
||
|
+ * not offer this capability. Disable killpriv_v2 in both the cases
|
||
|
+ */
|
||
|
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling killpriv_v2\n");
|
||
|
+ conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;
|
||
|
+ lo->killpriv_v2 = 0;
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
|
||
|
@@ -726,7 +757,10 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
|
||
|
}
|
||
|
if (valid & FUSE_SET_ATTR_SIZE) {
|
||
|
int truncfd;
|
||
|
+ bool kill_suidgid;
|
||
|
+ bool cap_fsetid_dropped = false;
|
||
|
|
||
|
+ kill_suidgid = lo->killpriv_v2 && (valid & FUSE_SET_ATTR_KILL_SUIDGID);
|
||
|
if (fi) {
|
||
|
truncfd = fd;
|
||
|
} else {
|
||
|
@@ -737,8 +771,25 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
+ if (kill_suidgid) {
|
||
|
+ res = drop_effective_cap("FSETID", &cap_fsetid_dropped);
|
||
|
+ if (res != 0) {
|
||
|
+ saverr = res;
|
||
|
+ if (!fi) {
|
||
|
+ close(truncfd);
|
||
|
+ }
|
||
|
+ goto out_err;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
res = ftruncate(truncfd, attr->st_size);
|
||
|
saverr = res == -1 ? errno : 0;
|
||
|
+
|
||
|
+ if (cap_fsetid_dropped) {
|
||
|
+ if (gain_effective_cap("FSETID")) {
|
||
|
+ fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
|
||
|
+ }
|
||
|
+ }
|
||
|
if (!fi) {
|
||
|
close(truncfd);
|
||
|
}
|
||
|
@@ -1719,11 +1770,27 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode,
|
||
|
{
|
||
|
ssize_t fh;
|
||
|
int fd = existing_fd;
|
||
|
+ int err;
|
||
|
+ bool cap_fsetid_dropped = false;
|
||
|
+ bool kill_suidgid = lo->killpriv_v2 && fi->kill_priv;
|
||
|
|
||
|
update_open_flags(lo->writeback, lo->allow_direct_io, fi);
|
||
|
|
||
|
if (fd < 0) {
|
||
|
+ if (kill_suidgid) {
|
||
|
+ err = drop_effective_cap("FSETID", &cap_fsetid_dropped);
|
||
|
+ if (err) {
|
||
|
+ return err;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
fd = lo_inode_open(lo, inode, fi->flags);
|
||
|
+
|
||
|
+ if (cap_fsetid_dropped) {
|
||
|
+ if (gain_effective_cap("FSETID")) {
|
||
|
+ fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
|
||
|
+ }
|
||
|
+ }
|
||
|
if (fd < 0) {
|
||
|
return -fd;
|
||
|
}
|
||
|
@@ -1757,8 +1824,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||
|
int err;
|
||
|
struct lo_cred old = {};
|
||
|
|
||
|
- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent,
|
||
|
- name);
|
||
|
+ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)"
|
||
|
+ " kill_priv=%d\n", parent, name, fi->kill_priv);
|
||
|
|
||
|
if (!is_safe_path_component(name)) {
|
||
|
fuse_reply_err(req, EINVAL);
|
||
|
@@ -1981,8 +2048,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
|
||
|
struct lo_inode *inode = lo_inode(req, ino);
|
||
|
int err;
|
||
|
|
||
|
- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
|
||
|
- fi->flags);
|
||
|
+ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d, kill_priv=%d)"
|
||
|
+ "\n", ino, fi->flags, fi->kill_priv);
|
||
|
|
||
|
if (!inode) {
|
||
|
fuse_reply_err(req, EBADF);
|
||
|
@@ -2112,12 +2179,14 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
|
||
|
out_buf.buf[0].pos = off;
|
||
|
|
||
|
fuse_log(FUSE_LOG_DEBUG,
|
||
|
- "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino,
|
||
|
- out_buf.buf[0].size, (unsigned long)off);
|
||
|
+ "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu kill_priv=%d)\n",
|
||
|
+ ino, out_buf.buf[0].size, (unsigned long)off, fi->kill_priv);
|
||
|
|
||
|
/*
|
||
|
* If kill_priv is set, drop CAP_FSETID which should lead to kernel
|
||
|
- * clearing setuid/setgid on file.
|
||
|
+ * clearing setuid/setgid on file. Note, for WRITE, we need to do
|
||
|
+ * this even if killpriv_v2 is not enabled. fuse direct write path
|
||
|
+ * relies on this.
|
||
|
*/
|
||
|
if (fi->kill_priv) {
|
||
|
res = drop_effective_cap("FSETID", &cap_fsetid_dropped);
|
||
|
@@ -3496,6 +3565,7 @@ int main(int argc, char *argv[])
|
||
|
.posix_lock = 0,
|
||
|
.allow_direct_io = 0,
|
||
|
.proc_self_fd = -1,
|
||
|
+ .user_killpriv_v2 = -1,
|
||
|
};
|
||
|
struct lo_map_elem *root_elem;
|
||
|
struct lo_map_elem *reserve_elem;
|