diff --git a/glib/gmain.c b/glib/gmain.c index aec04314c..d1ab421fb 100644 --- a/glib/gmain.c +++ b/glib/gmain.c @@ -350,12 +350,11 @@ struct _GChildWatchSource { GSource source; GPid pid; - gint child_status; /* @poll is always used on Windows, and used on Unix iff @using_pidfd is set: */ GPollFD poll; #ifndef G_OS_WIN32 - gboolean child_exited; /* (atomic); not used iff @using_pidfd is set */ - gboolean using_pidfd; + gboolean child_maybe_exited; /* (atomic) */ + gboolean using_pidfd; #endif /* G_OS_WIN32 */ }; @@ -5238,7 +5237,7 @@ siginfo_t_to_wait_status (const siginfo_t *info) return W_STOPCODE (info->si_status); } } -#endif /* HAVE_PIDFD */ +#endif /* HAVE_PIDFD */ static gboolean g_child_watch_prepare (GSource *source, @@ -5246,19 +5245,19 @@ g_child_watch_prepare (GSource *source, { #ifdef G_OS_WIN32 return FALSE; -#else /* G_OS_WIN32 */ +#else /* G_OS_WIN32 */ { GChildWatchSource *child_watch_source; child_watch_source = (GChildWatchSource *) source; - return g_atomic_int_get (&child_watch_source->child_exited); + return !child_watch_source->using_pidfd && g_atomic_int_get (&child_watch_source->child_maybe_exited); } #endif /* G_OS_WIN32 */ } static gboolean -g_child_watch_check (GSource *source) +g_child_watch_check (GSource *source) { GChildWatchSource *child_watch_source; gboolean child_exited; @@ -5267,57 +5266,15 @@ g_child_watch_check (GSource *source) #ifdef G_OS_WIN32 child_exited = child_watch_source->poll.revents & G_IO_IN; - - if (child_exited) - { - DWORD child_status; - - /* - * Note: We do _not_ check for the special value of STILL_ACTIVE - * since we know that the process has exited and doing so runs into - * problems if the child process "happens to return STILL_ACTIVE(259)" - * as Microsoft's Platform SDK puts it. - */ - if (!GetExitCodeProcess (child_watch_source->pid, &child_status)) - { - gchar *emsg = g_win32_error_message (GetLastError ()); - g_warning (G_STRLOC ": GetExitCodeProcess() failed: %s", emsg); - g_free (emsg); - - child_watch_source->child_status = -1; - } - else - child_watch_source->child_status = child_status; - } #else /* G_OS_WIN32 */ #ifdef HAVE_PIDFD if (child_watch_source->using_pidfd) { child_exited = child_watch_source->poll.revents & G_IO_IN; - - if (child_exited) - { - siginfo_t child_info = { 0, }; - - /* Get the exit status */ - if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 && - child_info.si_pid != 0) - { - /* waitid() helpfully provides the wait status in a decomposed - * form which is quite useful. Unfortunately we have to report it - * to the #GChildWatchFunc as a waitpid()-style platform-specific - * wait status, so that the user code in #GChildWatchFunc can then - * call WIFEXITED() (etc.) on it. That means re-composing the - * status information. */ - child_watch_source->child_status = siginfo_t_to_wait_status (&child_info); - child_watch_source->child_exited = TRUE; - } - } - return child_exited; } -#endif /* HAVE_PIDFD */ - child_exited = g_atomic_int_get (&child_watch_source->child_exited); +#endif /* HAVE_PIDFD */ + child_exited = g_atomic_int_get (&child_watch_source->child_maybe_exited); #endif /* G_OS_WIN32 */ return child_exited; @@ -5426,30 +5383,9 @@ dispatch_unix_signals_unlocked (void) for (node = unix_child_watches; node; node = node->next) { GChildWatchSource *source = node->data; - pid_t pid; - if (g_atomic_int_get (&source->child_exited)) - continue; - - do - { - g_assert (source->pid > 0); - - pid = waitpid (source->pid, &source->child_status, WNOHANG); - if (pid > 0) - { - g_atomic_int_set (&source->child_exited, TRUE); - wake_source ((GSource *) source); - } - else if (pid == -1 && errno == ECHILD) - { - g_warning ("GChildWatchSource: Exit status of a child process was requested but ECHILD was received by waitpid(). See the documentation of g_child_watch_source_new() for possible causes."); - source->child_status = 0; - g_atomic_int_set (&source->child_exited, TRUE); - wake_source ((GSource *) source); - } - } - while (pid == -1 && errno == EINTR); + if (g_atomic_int_compare_and_exchange (&source->child_maybe_exited, FALSE, TRUE)) + wake_source ((GSource *) source); } } @@ -5662,9 +5598,106 @@ g_child_watch_dispatch (GSource *source, { GChildWatchSource *child_watch_source; GChildWatchFunc child_watch_callback = (GChildWatchFunc) callback; + int wait_status; child_watch_source = (GChildWatchSource *) source; + /* We only (try to) reap the child process right before dispatching the callback. + * That way, the caller can rely that the process is there until the callback + * is invoked; or, if the caller calls g_source_destroy() without the callback + * being dispatched, the process is still not reaped. */ + +#ifdef G_OS_WIN32 + { + DWORD child_status; + + /* + * Note: We do _not_ check for the special value of STILL_ACTIVE + * since we know that the process has exited and doing so runs into + * problems if the child process "happens to return STILL_ACTIVE(259)" + * as Microsoft's Platform SDK puts it. + */ + if (!GetExitCodeProcess (child_watch_source->pid, &child_status)) + { + gchar *emsg = g_win32_error_message (GetLastError ()); + g_warning (G_STRLOC ": GetExitCodeProcess() failed: %s", emsg); + g_free (emsg); + + /* Unknown error. We got signaled that the process might be exited, + * but now we failed to reap it? Assume the process is gone and proceed. */ + wait_status = -1; + } + else + wait_status = child_status; + } +#else /* G_OS_WIN32 */ + { + gboolean child_exited = FALSE; + + wait_status = -1; + +#ifdef HAVE_PIDFD + if (child_watch_source->using_pidfd) + { + siginfo_t child_info = { + 0, + }; + + /* Get the exit status */ + if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 && + child_info.si_pid != 0) + { + /* waitid() helpfully provides the wait status in a decomposed + * form which is quite useful. Unfortunately we have to report it + * to the #GChildWatchFunc as a waitpid()-style platform-specific + * wait status, so that the user code in #GChildWatchFunc can then + * call WIFEXITED() (etc.) on it. That means re-composing the + * status information. */ + wait_status = siginfo_t_to_wait_status (&child_info); + } + else + { + /* Unknown error. We got signaled that the process might be exited, + * but now we failed to reap it? Assume the process is gone and proceed. */ + g_warning (G_STRLOC ": pidfd signaled ready but failed"); + } + child_exited = TRUE; + } +#endif /* HAVE_PIDFD*/ + + if (!child_exited) + { + pid_t pid; + int wstatus; + + waitpid_again: + + /* We must reset the flag before waitpid(). Otherwise, there would be a + * race. */ + g_atomic_int_set (&child_watch_source->child_maybe_exited, FALSE); + + pid = waitpid (child_watch_source->pid, &wstatus, WNOHANG); + + if (pid == 0) + { + /* Not exited yet. Wait longer. */ + return TRUE; + } + + if (pid > 0) + wait_status = wstatus; + else if (errno == ECHILD) + g_warning ("GChildWatchSource: Exit status of a child process was requested but ECHILD was received by waitpid(). See the documentation of g_child_watch_source_new() for possible causes."); + else if (errno == EINTR) + goto waitpid_again; + else + { + /* Unexpected error. Whatever happened, we are done waiting for this child. */ + } + } + } +#endif /* G_OS_WIN32 */ + if (!callback) { g_warning ("Child watch source dispatched without callback. " @@ -5672,7 +5705,7 @@ g_child_watch_dispatch (GSource *source, return FALSE; } - (child_watch_callback) (child_watch_source->pid, child_watch_source->child_status, user_data); + (child_watch_callback) (child_watch_source->pid, wait_status, user_data); /* We never keep a child watch source around as the child is gone */ return FALSE; @@ -5731,6 +5764,14 @@ g_unix_signal_handler (int signum) * mechanism, including `waitpid(pid, ...)` or a second child-watch * source for the same @pid * * the application must not ignore `SIGCHLD` + * * Before 2.78, the application could not send a signal (`kill()`) to the + * watched @pid in a race free manner. Since 2.78, you can do that while the + * associated #GMainContext is acquired. + * * Before 2.78, even after destroying the #GSource, you could not + * be sure that @pid wasn't already reaped. Hence, it was also not + * safe to `kill()` or `waitpid()` on the process ID after the child watch + * source was gone. Destroying the source before it fired made it + * impossible to reliably reap the process. * * If any of those conditions are not met, this and related APIs will * not work correctly. This can often be diagnosed via a GLib warning @@ -5791,19 +5832,19 @@ g_child_watch_source_new (GPid pid) return source; } - else - { - g_debug ("pidfd_open(%" G_PID_FORMAT ") failed with error: %s", - pid, g_strerror (errsv)); - /* Fall through; likely the kernel isn’t new enough to support pidfd_open() */ - } -#endif /* HAVE_PIDFD */ + + g_debug ("pidfd_open(%" G_PID_FORMAT ") failed with error: %s", + pid, g_strerror (errsv)); + /* Fall through; likely the kernel isn’t new enough to support pidfd_open() */ +#endif /* HAVE_PIDFD */ + + /* We can do that without atomic, as the source is not yet added in + * unix_child_watches (which we do next under a lock). */ + child_watch_source->child_maybe_exited = TRUE; G_LOCK (unix_signal_lock); ref_unix_signal_handler_unlocked (SIGCHLD); unix_child_watches = g_slist_prepend (unix_child_watches, child_watch_source); - if (waitpid (pid, &child_watch_source->child_status, WNOHANG) > 0) - child_watch_source->child_exited = TRUE; G_UNLOCK (unix_signal_lock); #endif /* !G_OS_WIN32 */ diff --git a/glib/tests/unix.c b/glib/tests/unix.c index 7639d066a..6f40ff893 100644 --- a/glib/tests/unix.c +++ b/glib/tests/unix.c @@ -330,6 +330,99 @@ test_get_passwd_entry_nonexistent (void) g_clear_error (&local_error); } +static void +_child_wait_watch_cb (GPid pid, + gint wait_status, + gpointer user_data) +{ + gboolean *p_got_callback = user_data; + + g_assert_nonnull (p_got_callback); + g_assert_false (*p_got_callback); + *p_got_callback = TRUE; +} + +static void +test_child_wait (void) +{ + gboolean r; + GPid pid; + guint id; + pid_t pid2; + int wstatus; + gboolean got_callback = FALSE; + gboolean iterate_maincontext = g_test_rand_bit (); + char **argv; + int errsv; + + /* - We spawn a trivial child process that exits after a short time. + * - We schedule a g_child_watch_add() + * - we may iterate the GMainContext a bit. Randomly we either get the + * child-watcher callback or not. + * - if we didn't get the callback, we g_source_remove() the child watcher. + * + * Afterwards, if the callback didn't fire, we check that we are able to waitpid() + * on the process ourselves. Of course, if the child watcher notified, the waitpid() + * will fail with ECHILD. + */ + + argv = g_test_rand_bit () ? ((char *[]){ "/bin/sleep", "0.05", NULL }) : ((char *[]){ "/bin/true", NULL }); + + r = g_spawn_async (NULL, + argv, + NULL, + G_SPAWN_DO_NOT_REAP_CHILD, + NULL, + NULL, + &pid, + NULL); + if (!r) + { + /* Some odd system without /bin/sleep? Skip the test. */ + g_test_skip ("failure to spawn test process in test_child_wait()"); + return; + } + + g_assert_cmpint (pid, >=, 1); + + if (g_test_rand_bit ()) + g_usleep (g_test_rand_int_range (0, (G_USEC_PER_SEC / 10))); + + id = g_child_watch_add (pid, _child_wait_watch_cb, &got_callback); + + if (g_test_rand_bit ()) + g_usleep (g_test_rand_int_range (0, (G_USEC_PER_SEC / 10))); + + if (iterate_maincontext) + { + gint64 start_usec = g_get_monotonic_time (); + gint64 end_usec = start_usec + g_test_rand_int_range (0, (G_USEC_PER_SEC / 10)); + + while (!got_callback && g_get_monotonic_time () < end_usec) + g_main_context_iteration (NULL, FALSE); + } + + if (!got_callback) + g_source_remove (id); + + errno = 0; + pid2 = waitpid (pid, &wstatus, 0); + errsv = errno; + if (got_callback) + { + g_assert_true (iterate_maincontext); + g_assert_cmpint (errsv, ==, ECHILD); + g_assert_cmpint (pid2, <, 0); + } + else + { + g_assert_cmpint (errsv, ==, 0); + g_assert_cmpint (pid2, ==, pid); + g_assert_true (WIFEXITED (wstatus)); + g_assert_cmpint (WEXITSTATUS (wstatus), ==, 0); + } +} + int main (int argc, char *argv[])