mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-02-23 18:52:09 +01:00
gmain: fix race with waitpid() and child watcher sources
GChildWatchSource uses waitpid(), among pidfd and GetExitCodeProcess(). It thus only works for child processes which the user must ensure to exist and not being reaped yet. Also, the user must not kill() the PID after the child process is reaped and must not race kill() against waitpid(). Also, the user must not call waitpid()/kill() after the child process is reaped. Previously, GChildWatchSource would call waitpid() already when adding the source (g_child_watch_source_new()) and from the worker thread (dispatch_unix_signals_unlocked()). That is racy: - if a child watcher is attached and did not yet fire, you cannot call kill() on the PID without racing against the PID being reaped on the worker thread. That would then lead to ESRCH or even worse, killing the wrong process. - if you g_source_destroy() the source that didn't fire yet, the user doesn't know whether the PID was reaped in the background. Any subsequent kill()/waitpid() may fail with ESRCH/ECHILD or even address the wrong process. The race is most visible on Unix without pidfd support, because then the process gets reaped on the worker thread or during g_child_watch_source_new(). But it's also with Windows and pidfd, because we would have waited for the process in g_child_watch_check(), where other callbacks could fire between reaping the process status and emitting the source's callback. Fix all that by calling waitpid() right before dispatching the callback.
This commit is contained in:
parent
b71ae65f14
commit
ccbebd3bd2
189
glib/gmain.c
189
glib/gmain.c
@ -350,11 +350,10 @@ struct _GChildWatchSource
|
||||
{
|
||||
GSource source;
|
||||
GPid pid;
|
||||
gint child_status;
|
||||
/* @poll is always used on Windows, and used on Unix iff @using_pidfd is set: */
|
||||
GPollFD poll;
|
||||
#ifndef G_OS_WIN32
|
||||
gboolean child_exited; /* (atomic); not used iff @using_pidfd is set */
|
||||
gboolean child_maybe_exited; /* (atomic) */
|
||||
gboolean using_pidfd;
|
||||
#endif /* G_OS_WIN32 */
|
||||
};
|
||||
@ -5252,7 +5251,7 @@ g_child_watch_prepare (GSource *source,
|
||||
|
||||
child_watch_source = (GChildWatchSource *) source;
|
||||
|
||||
return g_atomic_int_get (&child_watch_source->child_exited);
|
||||
return !child_watch_source->using_pidfd && g_atomic_int_get (&child_watch_source->child_maybe_exited);
|
||||
}
|
||||
#endif /* G_OS_WIN32 */
|
||||
}
|
||||
@ -5267,57 +5266,15 @@ g_child_watch_check (GSource *source)
|
||||
|
||||
#ifdef G_OS_WIN32
|
||||
child_exited = child_watch_source->poll.revents & G_IO_IN;
|
||||
|
||||
if (child_exited)
|
||||
{
|
||||
DWORD child_status;
|
||||
|
||||
/*
|
||||
* Note: We do _not_ check for the special value of STILL_ACTIVE
|
||||
* since we know that the process has exited and doing so runs into
|
||||
* problems if the child process "happens to return STILL_ACTIVE(259)"
|
||||
* as Microsoft's Platform SDK puts it.
|
||||
*/
|
||||
if (!GetExitCodeProcess (child_watch_source->pid, &child_status))
|
||||
{
|
||||
gchar *emsg = g_win32_error_message (GetLastError ());
|
||||
g_warning (G_STRLOC ": GetExitCodeProcess() failed: %s", emsg);
|
||||
g_free (emsg);
|
||||
|
||||
child_watch_source->child_status = -1;
|
||||
}
|
||||
else
|
||||
child_watch_source->child_status = child_status;
|
||||
}
|
||||
#else /* G_OS_WIN32 */
|
||||
#ifdef HAVE_PIDFD
|
||||
if (child_watch_source->using_pidfd)
|
||||
{
|
||||
child_exited = child_watch_source->poll.revents & G_IO_IN;
|
||||
|
||||
if (child_exited)
|
||||
{
|
||||
siginfo_t child_info = { 0, };
|
||||
|
||||
/* Get the exit status */
|
||||
if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 &&
|
||||
child_info.si_pid != 0)
|
||||
{
|
||||
/* waitid() helpfully provides the wait status in a decomposed
|
||||
* form which is quite useful. Unfortunately we have to report it
|
||||
* to the #GChildWatchFunc as a waitpid()-style platform-specific
|
||||
* wait status, so that the user code in #GChildWatchFunc can then
|
||||
* call WIFEXITED() (etc.) on it. That means re-composing the
|
||||
* status information. */
|
||||
child_watch_source->child_status = siginfo_t_to_wait_status (&child_info);
|
||||
child_watch_source->child_exited = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
return child_exited;
|
||||
}
|
||||
#endif /* HAVE_PIDFD */
|
||||
child_exited = g_atomic_int_get (&child_watch_source->child_exited);
|
||||
child_exited = g_atomic_int_get (&child_watch_source->child_maybe_exited);
|
||||
#endif /* G_OS_WIN32 */
|
||||
|
||||
return child_exited;
|
||||
@ -5426,31 +5383,10 @@ dispatch_unix_signals_unlocked (void)
|
||||
for (node = unix_child_watches; node; node = node->next)
|
||||
{
|
||||
GChildWatchSource *source = node->data;
|
||||
pid_t pid;
|
||||
|
||||
if (g_atomic_int_get (&source->child_exited))
|
||||
continue;
|
||||
|
||||
do
|
||||
{
|
||||
g_assert (source->pid > 0);
|
||||
|
||||
pid = waitpid (source->pid, &source->child_status, WNOHANG);
|
||||
if (pid > 0)
|
||||
{
|
||||
g_atomic_int_set (&source->child_exited, TRUE);
|
||||
if (g_atomic_int_compare_and_exchange (&source->child_maybe_exited, FALSE, TRUE))
|
||||
wake_source ((GSource *) source);
|
||||
}
|
||||
else if (pid == -1 && errno == ECHILD)
|
||||
{
|
||||
g_warning ("GChildWatchSource: Exit status of a child process was requested but ECHILD was received by waitpid(). See the documentation of g_child_watch_source_new() for possible causes.");
|
||||
source->child_status = 0;
|
||||
g_atomic_int_set (&source->child_exited, TRUE);
|
||||
wake_source ((GSource *) source);
|
||||
}
|
||||
}
|
||||
while (pid == -1 && errno == EINTR);
|
||||
}
|
||||
}
|
||||
|
||||
/* handle GUnixSignalWatchSource instances */
|
||||
@ -5662,9 +5598,106 @@ g_child_watch_dispatch (GSource *source,
|
||||
{
|
||||
GChildWatchSource *child_watch_source;
|
||||
GChildWatchFunc child_watch_callback = (GChildWatchFunc) callback;
|
||||
int wait_status;
|
||||
|
||||
child_watch_source = (GChildWatchSource *) source;
|
||||
|
||||
/* We only (try to) reap the child process right before dispatching the callback.
|
||||
* That way, the caller can rely that the process is there until the callback
|
||||
* is invoked; or, if the caller calls g_source_destroy() without the callback
|
||||
* being dispatched, the process is still not reaped. */
|
||||
|
||||
#ifdef G_OS_WIN32
|
||||
{
|
||||
DWORD child_status;
|
||||
|
||||
/*
|
||||
* Note: We do _not_ check for the special value of STILL_ACTIVE
|
||||
* since we know that the process has exited and doing so runs into
|
||||
* problems if the child process "happens to return STILL_ACTIVE(259)"
|
||||
* as Microsoft's Platform SDK puts it.
|
||||
*/
|
||||
if (!GetExitCodeProcess (child_watch_source->pid, &child_status))
|
||||
{
|
||||
gchar *emsg = g_win32_error_message (GetLastError ());
|
||||
g_warning (G_STRLOC ": GetExitCodeProcess() failed: %s", emsg);
|
||||
g_free (emsg);
|
||||
|
||||
/* Unknown error. We got signaled that the process might be exited,
|
||||
* but now we failed to reap it? Assume the process is gone and proceed. */
|
||||
wait_status = -1;
|
||||
}
|
||||
else
|
||||
wait_status = child_status;
|
||||
}
|
||||
#else /* G_OS_WIN32 */
|
||||
{
|
||||
gboolean child_exited = FALSE;
|
||||
|
||||
wait_status = -1;
|
||||
|
||||
#ifdef HAVE_PIDFD
|
||||
if (child_watch_source->using_pidfd)
|
||||
{
|
||||
siginfo_t child_info = {
|
||||
0,
|
||||
};
|
||||
|
||||
/* Get the exit status */
|
||||
if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 &&
|
||||
child_info.si_pid != 0)
|
||||
{
|
||||
/* waitid() helpfully provides the wait status in a decomposed
|
||||
* form which is quite useful. Unfortunately we have to report it
|
||||
* to the #GChildWatchFunc as a waitpid()-style platform-specific
|
||||
* wait status, so that the user code in #GChildWatchFunc can then
|
||||
* call WIFEXITED() (etc.) on it. That means re-composing the
|
||||
* status information. */
|
||||
wait_status = siginfo_t_to_wait_status (&child_info);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Unknown error. We got signaled that the process might be exited,
|
||||
* but now we failed to reap it? Assume the process is gone and proceed. */
|
||||
g_warning (G_STRLOC ": pidfd signaled ready but failed");
|
||||
}
|
||||
child_exited = TRUE;
|
||||
}
|
||||
#endif /* HAVE_PIDFD*/
|
||||
|
||||
if (!child_exited)
|
||||
{
|
||||
pid_t pid;
|
||||
int wstatus;
|
||||
|
||||
waitpid_again:
|
||||
|
||||
/* We must reset the flag before waitpid(). Otherwise, there would be a
|
||||
* race. */
|
||||
g_atomic_int_set (&child_watch_source->child_maybe_exited, FALSE);
|
||||
|
||||
pid = waitpid (child_watch_source->pid, &wstatus, WNOHANG);
|
||||
|
||||
if (pid == 0)
|
||||
{
|
||||
/* Not exited yet. Wait longer. */
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (pid > 0)
|
||||
wait_status = wstatus;
|
||||
else if (errno == ECHILD)
|
||||
g_warning ("GChildWatchSource: Exit status of a child process was requested but ECHILD was received by waitpid(). See the documentation of g_child_watch_source_new() for possible causes.");
|
||||
else if (errno == EINTR)
|
||||
goto waitpid_again;
|
||||
else
|
||||
{
|
||||
/* Unexpected error. Whatever happened, we are done waiting for this child. */
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* G_OS_WIN32 */
|
||||
|
||||
if (!callback)
|
||||
{
|
||||
g_warning ("Child watch source dispatched without callback. "
|
||||
@ -5672,7 +5705,7 @@ g_child_watch_dispatch (GSource *source,
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
(child_watch_callback) (child_watch_source->pid, child_watch_source->child_status, user_data);
|
||||
(child_watch_callback) (child_watch_source->pid, wait_status, user_data);
|
||||
|
||||
/* We never keep a child watch source around as the child is gone */
|
||||
return FALSE;
|
||||
@ -5731,6 +5764,14 @@ g_unix_signal_handler (int signum)
|
||||
* mechanism, including `waitpid(pid, ...)` or a second child-watch
|
||||
* source for the same @pid
|
||||
* * the application must not ignore `SIGCHLD`
|
||||
* * Before 2.78, the application could not send a signal (`kill()`) to the
|
||||
* watched @pid in a race free manner. Since 2.78, you can do that while the
|
||||
* associated #GMainContext is acquired.
|
||||
* * Before 2.78, even after destroying the #GSource, you could not
|
||||
* be sure that @pid wasn't already reaped. Hence, it was also not
|
||||
* safe to `kill()` or `waitpid()` on the process ID after the child watch
|
||||
* source was gone. Destroying the source before it fired made it
|
||||
* impossible to reliably reap the process.
|
||||
*
|
||||
* If any of those conditions are not met, this and related APIs will
|
||||
* not work correctly. This can often be diagnosed via a GLib warning
|
||||
@ -5791,19 +5832,19 @@ g_child_watch_source_new (GPid pid)
|
||||
|
||||
return source;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
g_debug ("pidfd_open(%" G_PID_FORMAT ") failed with error: %s",
|
||||
pid, g_strerror (errsv));
|
||||
/* Fall through; likely the kernel isn’t new enough to support pidfd_open() */
|
||||
}
|
||||
#endif /* HAVE_PIDFD */
|
||||
|
||||
/* We can do that without atomic, as the source is not yet added in
|
||||
* unix_child_watches (which we do next under a lock). */
|
||||
child_watch_source->child_maybe_exited = TRUE;
|
||||
|
||||
G_LOCK (unix_signal_lock);
|
||||
ref_unix_signal_handler_unlocked (SIGCHLD);
|
||||
unix_child_watches = g_slist_prepend (unix_child_watches, child_watch_source);
|
||||
if (waitpid (pid, &child_watch_source->child_status, WNOHANG) > 0)
|
||||
child_watch_source->child_exited = TRUE;
|
||||
G_UNLOCK (unix_signal_lock);
|
||||
#endif /* !G_OS_WIN32 */
|
||||
|
||||
|
@ -330,6 +330,99 @@ test_get_passwd_entry_nonexistent (void)
|
||||
g_clear_error (&local_error);
|
||||
}
|
||||
|
||||
static void
|
||||
_child_wait_watch_cb (GPid pid,
|
||||
gint wait_status,
|
||||
gpointer user_data)
|
||||
{
|
||||
gboolean *p_got_callback = user_data;
|
||||
|
||||
g_assert_nonnull (p_got_callback);
|
||||
g_assert_false (*p_got_callback);
|
||||
*p_got_callback = TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
test_child_wait (void)
|
||||
{
|
||||
gboolean r;
|
||||
GPid pid;
|
||||
guint id;
|
||||
pid_t pid2;
|
||||
int wstatus;
|
||||
gboolean got_callback = FALSE;
|
||||
gboolean iterate_maincontext = g_test_rand_bit ();
|
||||
char **argv;
|
||||
int errsv;
|
||||
|
||||
/* - We spawn a trivial child process that exits after a short time.
|
||||
* - We schedule a g_child_watch_add()
|
||||
* - we may iterate the GMainContext a bit. Randomly we either get the
|
||||
* child-watcher callback or not.
|
||||
* - if we didn't get the callback, we g_source_remove() the child watcher.
|
||||
*
|
||||
* Afterwards, if the callback didn't fire, we check that we are able to waitpid()
|
||||
* on the process ourselves. Of course, if the child watcher notified, the waitpid()
|
||||
* will fail with ECHILD.
|
||||
*/
|
||||
|
||||
argv = g_test_rand_bit () ? ((char *[]){ "/bin/sleep", "0.05", NULL }) : ((char *[]){ "/bin/true", NULL });
|
||||
|
||||
r = g_spawn_async (NULL,
|
||||
argv,
|
||||
NULL,
|
||||
G_SPAWN_DO_NOT_REAP_CHILD,
|
||||
NULL,
|
||||
NULL,
|
||||
&pid,
|
||||
NULL);
|
||||
if (!r)
|
||||
{
|
||||
/* Some odd system without /bin/sleep? Skip the test. */
|
||||
g_test_skip ("failure to spawn test process in test_child_wait()");
|
||||
return;
|
||||
}
|
||||
|
||||
g_assert_cmpint (pid, >=, 1);
|
||||
|
||||
if (g_test_rand_bit ())
|
||||
g_usleep (g_test_rand_int_range (0, (G_USEC_PER_SEC / 10)));
|
||||
|
||||
id = g_child_watch_add (pid, _child_wait_watch_cb, &got_callback);
|
||||
|
||||
if (g_test_rand_bit ())
|
||||
g_usleep (g_test_rand_int_range (0, (G_USEC_PER_SEC / 10)));
|
||||
|
||||
if (iterate_maincontext)
|
||||
{
|
||||
gint64 start_usec = g_get_monotonic_time ();
|
||||
gint64 end_usec = start_usec + g_test_rand_int_range (0, (G_USEC_PER_SEC / 10));
|
||||
|
||||
while (!got_callback && g_get_monotonic_time () < end_usec)
|
||||
g_main_context_iteration (NULL, FALSE);
|
||||
}
|
||||
|
||||
if (!got_callback)
|
||||
g_source_remove (id);
|
||||
|
||||
errno = 0;
|
||||
pid2 = waitpid (pid, &wstatus, 0);
|
||||
errsv = errno;
|
||||
if (got_callback)
|
||||
{
|
||||
g_assert_true (iterate_maincontext);
|
||||
g_assert_cmpint (errsv, ==, ECHILD);
|
||||
g_assert_cmpint (pid2, <, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
g_assert_cmpint (errsv, ==, 0);
|
||||
g_assert_cmpint (pid2, ==, pid);
|
||||
g_assert_true (WIFEXITED (wstatus));
|
||||
g_assert_cmpint (WEXITSTATUS (wstatus), ==, 0);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc,
|
||||
char *argv[])
|
||||
|
Loading…
x
Reference in New Issue
Block a user