Based on fdd25311706bd32580ec4d43211cdf4665d2f9de Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 28 May 2014 18:37:11 +0800
Subject: [PATCH] virt: rework container detection logic

Instead of accessing /proc/1/environ directly, trying to read the
$container variable from it, let's make PID 1 save the contents of that
variable to /run/systemd/container. This allows us to detect containers
without the need for CAP_SYS_PTRACE, which allows us to drop it from a
number of daemons and from the file capabilities of systemd-detect-virt.

Also, don't consider chroot a container technology anymore. After all,
we don't consider file system namespaces container technology anymore,
and hence chroot() should be considered a container even less.
---
 Makefile.am       |    3 ---
 configure.ac      |    2 --
 src/core/main.c   |   12 ++++++++++++
 src/shared/virt.c |   48 ++++++++++++++++++++++++++++++------------------
 4 files changed, 42 insertions(+), 23 deletions(-)

diff --git Makefile.am Makefile.am
index 5b26bc3..f66ef42 100644
--- Makefile.am
+++ Makefile.am
@@ -1798,9 +1798,6 @@ systemd_detect_virt_SOURCES = \
 systemd_detect_virt_LDADD = \
 	libsystemd-shared.la
 
-systemd-detect-virt-install-hook:
-	-$(SETCAP) cap_dac_override,cap_sys_ptrace=ep $(DESTDIR)$(bindir)/systemd-detect-virt
-
 INSTALL_EXEC_HOOKS += \
 	systemd-detect-virt-install-hook
 
--- configure.ac
+++ configure.ac	2014-06-03 14:16:45.046237826 +0000
@@ -68,8 +68,6 @@ AC_PATH_PROG([XSLTPROC], [xsltproc])
 AC_PATH_PROG([QUOTAON], [quotaon], [/usr/sbin/quotaon])
 AC_PATH_PROG([QUOTACHECK], [quotacheck], [/usr/sbin/quotacheck])
 
-AC_PATH_PROG([SETCAP], [setcap], [/usr/sbin/setcap])
-
 AC_PATH_PROG([KILL], [kill], [/usr/bin/kill])
 
 AC_PATH_PROG([KMOD], [kmod], [/usr/bin/kmod])
diff --git src/core/main.c src/core/main.c
index 77cc2fb..d5d1ee2 100644
--- src/core/main.c
+++ src/core/main.c
@@ -1261,6 +1261,16 @@ static int status_welcome(void) {
                              isempty(pretty_name) ? "Linux" : pretty_name);
 }
 
+static int write_container_id(void) {
+        const char *c;
+
+        c = getenv("container");
+        if (isempty(c))
+                return 0;
+
+        return write_string_file("/run/systemd/container", c);
+}
+
 int main(int argc, char *argv[]) {
         Manager *m = NULL;
         int r, retval = EXIT_FAILURE;
@@ -1544,6 +1554,8 @@ int main(int argc, char *argv[]) {
                 if (virtualization)
                         log_info("Detected virtualization '%s'.", virtualization);
 
+                write_container_id();
+
                 log_info("Detected architecture '%s'.", architecture_to_string(uname_architecture()));
 
                 if (in_initrd())
diff --git src/shared/virt.c src/shared/virt.c
index 0db0514..1e227c5 100644
--- src/shared/virt.c
+++ src/shared/virt.c
@@ -217,8 +217,8 @@ int detect_container(const char **id) {
         static thread_local int cached_found = -1;
         static thread_local const char *cached_id = NULL;
 
-        _cleanup_free_ char *e = NULL;
-        const char *_id = NULL;
+        _cleanup_free_ char *m = NULL;
+        const char *_id = NULL, *e = NULL;
         int r;
 
         if (_likely_(cached_found >= 0)) {
@@ -229,17 +229,6 @@ int detect_container(const char **id) {
                 return cached_found;
         }
 
-        /* Unfortunately many of these operations require root access
-         * in one way or another */
-
-        r = running_in_chroot();
-        if (r < 0)
-                return r;
-        if (r > 0) {
-                _id = "chroot";
-                goto finish;
-        }
-
         /* /proc/vz exists in container and outside of the container,
          * /proc/bc only outside of the container. */
         if (access("/proc/vz", F_OK) >= 0 &&
@@ -249,11 +238,32 @@ int detect_container(const char **id) {
                 goto finish;
         }
 
-        r = getenv_for_pid(1, "container", &e);
-        if (r < 0)
-                return r;
-        if (r == 0)
-                goto finish;
+        if (getpid() == 1) {
+                /* If we are PID 1 we can just check our own
+                 * environment variable */
+
+                e = getenv("container");
+                if (isempty(e)) {
+                        r = 0;
+                        goto finish;
+                }
+        } else {
+
+                /* Otherwise, PID 1 dropped this information into a
+                 * file in /run. This is better than accessing
+                 * /proc/1/environ, since we don't need CAP_SYS_PTRACE
+                 * for that. */
+
+                r = read_one_line_file("/run/systemd/container", &m);
+                if (r == -ENOENT) {
+                        r = 0;
+                        goto finish;
+                }
+                if (r < 0)
+                        return r;
+
+                e = m;
+        }
 
         /* We only recognize a selected few here, since we want to
          * enforce a redacted namespace */
@@ -266,6 +276,8 @@ int detect_container(const char **id) {
         else
                 _id = "other";
 
+        r = 1;
+
 finish:
         cached_found = r;
 
-- 
1.7.9.2