From d1e558f49227aeb7b651371c5b9b774d7b0d8f89 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Wed, 29 Mar 2023 16:21:52 +0200 Subject: [PATCH 1/8] gmain/tests: add unit test for g_child_watch_add() reaping processes without notifying The test currently tests broken behavior. It will be fixed next. --- glib/tests/unix.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/glib/tests/unix.c b/glib/tests/unix.c index 0c1ec21e9..0d922367f 100644 --- a/glib/tests/unix.c +++ b/glib/tests/unix.c @@ -372,6 +372,105 @@ test_get_passwd_entry_nonexistent (void) g_clear_error (&local_error); } +static void +_child_wait_watch_cb (GPid pid, + gint wait_status, + gpointer user_data) +{ + gboolean *p_got_callback = user_data; + + g_assert_nonnull (p_got_callback); + g_assert_false (*p_got_callback); + *p_got_callback = TRUE; +} + +static void +test_child_wait (void) +{ + gboolean r; + GPid pid; + guint id; + pid_t pid2; + int wstatus; + gboolean got_callback = FALSE; + gboolean iterate_maincontext = g_test_rand_bit (); + char **argv; + int errsv; + + /* - We spawn a trivial child process that exits after a short time. + * - We schedule a g_child_watch_add() + * - we may iterate the GMainContext a bit. Randomly we either get the + * child-watcher callback or not. + * - if we didn't get the callback, we g_source_remove() the child watcher. + * + * Afterwards, if the callback didn't fire, we check that we are able to waitpid() + * on the process ourselves. Of course, if the child watcher notified, the waitpid() + * will fail with ECHILD. + */ + + argv = g_test_rand_bit () ? ((char *[]){ "/bin/sleep", "0.05", NULL }) : ((char *[]){ "/bin/true", NULL }); + + r = g_spawn_async (NULL, + argv, + NULL, + G_SPAWN_DO_NOT_REAP_CHILD, + NULL, + NULL, + &pid, + NULL); + if (!r) + { + /* Some odd system without /bin/sleep? Skip the test. */ + g_test_skip ("failure to spawn test process in test_child_wait()"); + return; + } + + g_assert_cmpint (pid, >=, 1); + + if (g_test_rand_bit ()) + g_usleep (g_test_rand_int_range (0, (G_USEC_PER_SEC / 10))); + + id = g_child_watch_add (pid, _child_wait_watch_cb, &got_callback); + + if (g_test_rand_bit ()) + g_usleep (g_test_rand_int_range (0, (G_USEC_PER_SEC / 10))); + + if (iterate_maincontext) + { + gint64 start_usec = g_get_monotonic_time (); + gint64 end_usec = start_usec + g_test_rand_int_range (0, (G_USEC_PER_SEC / 10)); + + while (!got_callback && g_get_monotonic_time () < end_usec) + g_main_context_iteration (NULL, FALSE); + } + + if (!got_callback) + g_source_remove (id); + + errno = 0; + pid2 = waitpid (pid, &wstatus, 0); + errsv = errno; + if (got_callback) + { + g_assert_true (iterate_maincontext); + g_assert_cmpint (errsv, ==, ECHILD); + g_assert_cmpint (pid2, <, 0); + } + else if (errsv == ECHILD) + { + /* FIXME: This is a bug. We didn't get the callback from the child + * watcher, but still the child is already reaped. */ + g_assert_cmpint (pid2, <, 0); + } + else + { + g_assert_cmpint (errsv, ==, 0); + g_assert_cmpint (pid2, ==, pid); + g_assert_true (WIFEXITED (wstatus)); + g_assert_cmpint (WEXITSTATUS (wstatus), ==, 0); + } +} + int main (int argc, char *argv[]) @@ -390,6 +489,7 @@ main (int argc, g_test_add_func ("/glib-unix/callback_after_signal", test_callback_after_signal); g_test_add_func ("/glib-unix/get-passwd-entry/root", test_get_passwd_entry_root); g_test_add_func ("/glib-unix/get-passwd-entry/nonexistent", test_get_passwd_entry_nonexistent); + g_test_add_func ("/glib-unix/child-wait", test_child_wait); return g_test_run(); } From 9315a211fac793d5c015401ab9e03e73ed26b3ee Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Mar 2023 14:38:08 +0200 Subject: [PATCH 2/8] gmain: unify win/unix implementations for child watcher Let's move the difference between the win/unix implementations closer to where the difference is. Thereby, we easier see the two implementations side by side. Splitting it at a higher layer makes the code harder to read. This is just a preparation for what comes next. --- glib/gmain.c | 192 ++++++++++++++++++++++++--------------------------- 1 file changed, 92 insertions(+), 100 deletions(-) diff --git a/glib/gmain.c b/glib/gmain.c index b91e68d36..d50fd8315 100644 --- a/glib/gmain.c +++ b/glib/gmain.c @@ -488,6 +488,11 @@ static gboolean g_child_watch_dispatch (GSource *source, GSourceFunc callback, gpointer user_data); static void g_child_watch_finalize (GSource *source); + +#ifndef G_OS_WIN32 +static void unref_unix_signal_handler_unlocked (int signum); +#endif + #ifdef G_OS_UNIX static void g_unix_signal_handler (int signum); static gboolean g_unix_signal_watch_prepare (GSource *source, @@ -5479,17 +5484,49 @@ g_timeout_add_seconds_once (guint interval, /* Child watch functions */ -#ifdef G_OS_WIN32 +#ifdef HAVE_PIDFD +static int +siginfo_t_to_wait_status (const siginfo_t *info) +{ + /* Each of these returns is essentially the inverse of WIFEXITED(), + * WIFSIGNALED(), etc. */ + switch (info->si_code) + { + case CLD_EXITED: + return W_EXITCODE (info->si_status, 0); + case CLD_KILLED: + return W_EXITCODE (0, info->si_status); + case CLD_DUMPED: + return W_EXITCODE (0, info->si_status | WCOREFLAG); + case CLD_CONTINUED: + return __W_CONTINUED; + case CLD_STOPPED: + case CLD_TRAPPED: + default: + return W_STOPCODE (info->si_status); + } +} +#endif /* HAVE_PIDFD */ static gboolean g_child_watch_prepare (GSource *source, gint *timeout) { +#ifdef G_OS_WIN32 *timeout = -1; return FALSE; +#else /* G_OS_WIN32 */ + { + GChildWatchSource *child_watch_source; + + child_watch_source = (GChildWatchSource *) source; + + return g_atomic_int_get (&child_watch_source->child_exited); + } +#endif /* G_OS_WIN32 */ } -static gboolean +static gboolean g_child_watch_check (GSource *source) { GChildWatchSource *child_watch_source; @@ -5497,6 +5534,7 @@ g_child_watch_check (GSource *source) child_watch_source = (GChildWatchSource *) source; +#ifdef G_OS_WIN32 child_exited = child_watch_source->poll.revents & G_IO_IN; if (child_exited) @@ -5511,15 +5549,45 @@ g_child_watch_check (GSource *source) */ if (!GetExitCodeProcess (child_watch_source->pid, &child_status)) { - gchar *emsg = g_win32_error_message (GetLastError ()); - g_warning (G_STRLOC ": GetExitCodeProcess() failed: %s", emsg); - g_free (emsg); + gchar *emsg = g_win32_error_message (GetLastError ()); + g_warning (G_STRLOC ": GetExitCodeProcess() failed: %s", emsg); + g_free (emsg); - child_watch_source->child_status = -1; - } + child_watch_source->child_status = -1; + } else - child_watch_source->child_status = child_status; + child_watch_source->child_status = child_status; } +#else /* G_OS_WIN32 */ +#ifdef HAVE_PIDFD + if (child_watch_source->using_pidfd) + { + child_exited = child_watch_source->poll.revents & G_IO_IN; + + if (child_exited) + { + siginfo_t child_info = { 0, }; + + /* Get the exit status */ + if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 && + child_info.si_pid != 0) + { + /* waitid() helpfully provides the wait status in a decomposed + * form which is quite useful. Unfortunately we have to report it + * to the #GChildWatchFunc as a waitpid()-style platform-specific + * wait status, so that the user code in #GChildWatchFunc can then + * call WIFEXITED() (etc.) on it. That means re-composing the + * status information. */ + child_watch_source->child_status = siginfo_t_to_wait_status (&child_info); + child_watch_source->child_exited = TRUE; + } + } + + return child_exited; + } +#endif /* HAVE_PIDFD */ + child_exited = g_atomic_int_get (&child_watch_source->child_exited); +#endif /* G_OS_WIN32 */ return child_exited; } @@ -5527,9 +5595,24 @@ g_child_watch_check (GSource *source) static void g_child_watch_finalize (GSource *source) { +#ifndef G_OS_WIN32 + GChildWatchSource *child_watch_source = (GChildWatchSource *) source; + + if (child_watch_source->using_pidfd) + { + if (child_watch_source->poll.fd >= 0) + close (child_watch_source->poll.fd); + return; + } + + G_LOCK (unix_signal_lock); + unix_child_watches = g_slist_remove (unix_child_watches, source); + unref_unix_signal_handler_unlocked (SIGCHLD); + G_UNLOCK (unix_signal_lock); +#endif /* G_OS_WIN32 */ } -#else /* G_OS_WIN32 */ +#ifndef G_OS_WIN32 static void wake_source (GSource *source) @@ -5662,79 +5745,6 @@ dispatch_unix_signals (void) G_UNLOCK(unix_signal_lock); } -static gboolean -g_child_watch_prepare (GSource *source, - gint *timeout) -{ - GChildWatchSource *child_watch_source; - - child_watch_source = (GChildWatchSource *) source; - - return g_atomic_int_get (&child_watch_source->child_exited); -} - -#ifdef HAVE_PIDFD -static int -siginfo_t_to_wait_status (const siginfo_t *info) -{ - /* Each of these returns is essentially the inverse of WIFEXITED(), - * WIFSIGNALED(), etc. */ - switch (info->si_code) - { - case CLD_EXITED: - return W_EXITCODE (info->si_status, 0); - case CLD_KILLED: - return W_EXITCODE (0, info->si_status); - case CLD_DUMPED: - return W_EXITCODE (0, info->si_status | WCOREFLAG); - case CLD_CONTINUED: - return __W_CONTINUED; - case CLD_STOPPED: - case CLD_TRAPPED: - default: - return W_STOPCODE (info->si_status); - } -} -#endif /* HAVE_PIDFD */ - -static gboolean -g_child_watch_check (GSource *source) -{ - GChildWatchSource *child_watch_source; - - child_watch_source = (GChildWatchSource *) source; - -#ifdef HAVE_PIDFD - if (child_watch_source->using_pidfd) - { - gboolean child_exited = child_watch_source->poll.revents & G_IO_IN; - - if (child_exited) - { - siginfo_t child_info = { 0, }; - - /* Get the exit status */ - if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 && - child_info.si_pid != 0) - { - /* waitid() helpfully provides the wait status in a decomposed - * form which is quite useful. Unfortunately we have to report it - * to the #GChildWatchFunc as a waitpid()-style platform-specific - * wait status, so that the user code in #GChildWatchFunc can then - * call WIFEXITED() (etc.) on it. That means re-composing the - * status information. */ - child_watch_source->child_status = siginfo_t_to_wait_status (&child_info); - child_watch_source->child_exited = TRUE; - } - } - - return child_exited; - } -#endif /* HAVE_PIDFD */ - - return g_atomic_int_get (&child_watch_source->child_exited); -} - static gboolean g_unix_signal_watch_prepare (GSource *source, gint *timeout) @@ -5913,24 +5923,6 @@ g_unix_signal_watch_finalize (GSource *source) G_UNLOCK (unix_signal_lock); } -static void -g_child_watch_finalize (GSource *source) -{ - GChildWatchSource *child_watch_source = (GChildWatchSource *) source; - - if (child_watch_source->using_pidfd) - { - if (child_watch_source->poll.fd >= 0) - close (child_watch_source->poll.fd); - return; - } - - G_LOCK (unix_signal_lock); - unix_child_watches = g_slist_remove (unix_child_watches, source); - unref_unix_signal_handler_unlocked (SIGCHLD); - G_UNLOCK (unix_signal_lock); -} - #endif /* G_OS_WIN32 */ static gboolean From a71b0c04617e036bc2ba9446d6b08719c057a364 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Mar 2023 15:44:30 +0200 Subject: [PATCH 3/8] gmain: simplify handling child watchers in dispatch_unix_signals_unlocked() - if a child watch source has "using_pidfd", it is never linked in the unix_child_watches list. Drop that check. - replace the deep nested if, with an early "continue" in the loop, if we detect there is nothing to do. It makes the code easier to read. --- glib/gmain.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/glib/gmain.c b/glib/gmain.c index d50fd8315..2e829476f 100644 --- a/glib/gmain.c +++ b/glib/gmain.c @@ -5695,31 +5695,30 @@ dispatch_unix_signals_unlocked (void) for (node = unix_child_watches; node; node = node->next) { GChildWatchSource *source = node->data; + pid_t pid; - if (!source->using_pidfd && - !g_atomic_int_get (&source->child_exited)) + if (g_atomic_int_get (&source->child_exited)) + continue; + + do { - pid_t pid; - do - { - g_assert (source->pid > 0); + g_assert (source->pid > 0); - pid = waitpid (source->pid, &source->child_status, WNOHANG); - if (pid > 0) - { - g_atomic_int_set (&source->child_exited, TRUE); - wake_source ((GSource *) source); - } - else if (pid == -1 && errno == ECHILD) - { - g_warning ("GChildWatchSource: Exit status of a child process was requested but ECHILD was received by waitpid(). See the documentation of g_child_watch_source_new() for possible causes."); - source->child_status = 0; - g_atomic_int_set (&source->child_exited, TRUE); - wake_source ((GSource *) source); - } + pid = waitpid (source->pid, &source->child_status, WNOHANG); + if (pid > 0) + { + g_atomic_int_set (&source->child_exited, TRUE); + wake_source ((GSource *) source); + } + else if (pid == -1 && errno == ECHILD) + { + g_warning ("GChildWatchSource: Exit status of a child process was requested but ECHILD was received by waitpid(). See the documentation of g_child_watch_source_new() for possible causes."); + source->child_status = 0; + g_atomic_int_set (&source->child_exited, TRUE); + wake_source ((GSource *) source); } - while (pid == -1 && errno == EINTR); } + while (pid == -1 && errno == EINTR); } } From 30b5418608d7d4553d9b941295f845f5c49cc2e9 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Wed, 17 May 2023 08:04:02 +0200 Subject: [PATCH 4/8] gmain: remove unnecessary initialization of source_timeout in g_main_context_prepare_unlocked() Note that the variable source_timeout is already initialized upon definition, at the beginning of the block. It's easy to see, that no code changes the variable between the variable definition, and the place where it was initialized. It was thus unnecessary. It's not about dropping the unnecessary code (the compiler could do that just fine too). It's that there is the other branch of the "if/else", where the variable is also not initialized. But the other branch also requires that the variable is in fact initialized to -1, because prepare() callbacks are free not to explicitly set the output value. So both branches require the variable to be initialized to -1, but only one of them did. This poses unnecessary questions about whether anything is wrong. Avoid that by dropping the redundant code. --- glib/gmain.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/glib/gmain.c b/glib/gmain.c index 2e829476f..ee6a699f3 100644 --- a/glib/gmain.c +++ b/glib/gmain.c @@ -3855,10 +3855,7 @@ g_main_context_prepare_unlocked (GMainContext *context, context->in_check_or_prepare--; } else - { - source_timeout = -1; - result = FALSE; - } + result = FALSE; if (result == FALSE && source->priv->ready_time != -1) { From cdda194844c9fa9e9ba681879942fd41d1a06c64 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Wed, 17 May 2023 08:15:16 +0200 Subject: [PATCH 5/8] gmain: remove unnecessary initialization of *timeout in prepare() callbacks Note that the prepare callback only has one caller, which pre-initializes the timeout argument to -1. That may be an implementation detail and not publicly promised, but it wouldn't make sense to do it any other way in the caller. Also, note that g_unix_signal_watch_prepare() and the UNIX branch of g_child_watch_prepare() already relied on that. --- gio/gsocket.c | 2 -- glib/giounix.c | 2 -- glib/giowin32.c | 2 -- glib/gmain.c | 1 - glib/tests/mainloop.c | 2 ++ 5 files changed, 2 insertions(+), 7 deletions(-) diff --git a/gio/gsocket.c b/gio/gsocket.c index 3411b7785..a4b57d909 100644 --- a/gio/gsocket.c +++ b/gio/gsocket.c @@ -4003,8 +4003,6 @@ socket_source_prepare (GSource *source, { GSocketSource *socket_source = (GSocketSource *)source; - *timeout = -1; - #ifdef G_OS_WIN32 if ((socket_source->pollfd.revents & G_IO_NVAL) != 0) return TRUE; diff --git a/glib/giounix.c b/glib/giounix.c index 9d9492b28..84e81357c 100644 --- a/glib/giounix.c +++ b/glib/giounix.c @@ -135,8 +135,6 @@ g_io_unix_prepare (GSource *source, GIOUnixWatch *watch = (GIOUnixWatch *)source; GIOCondition buffer_condition = g_io_channel_get_buffer_condition (watch->channel); - *timeout = -1; - /* Only return TRUE here if _all_ bits in watch->condition will be set */ return ((watch->condition & buffer_condition) == watch->condition); diff --git a/glib/giowin32.c b/glib/giowin32.c index b9b481921..f0e025d4a 100644 --- a/glib/giowin32.c +++ b/glib/giowin32.c @@ -709,8 +709,6 @@ g_io_win32_prepare (GSource *source, GIOWin32Channel *channel = (GIOWin32Channel *)watch->channel; int event_mask; - *timeout = -1; - if (channel->debug) g_print ("g_io_win32_prepare: source=%p channel=%p", source, channel); diff --git a/glib/gmain.c b/glib/gmain.c index ee6a699f3..5fb2746f8 100644 --- a/glib/gmain.c +++ b/glib/gmain.c @@ -5510,7 +5510,6 @@ g_child_watch_prepare (GSource *source, gint *timeout) { #ifdef G_OS_WIN32 - *timeout = -1; return FALSE; #else /* G_OS_WIN32 */ { diff --git a/glib/tests/mainloop.c b/glib/tests/mainloop.c index 5c34cf4ae..152e74b87 100644 --- a/glib/tests/mainloop.c +++ b/glib/tests/mainloop.c @@ -37,6 +37,8 @@ cb (gpointer data) static gboolean prepare (GSource *source, gint *time) { + g_assert_nonnull (time); + g_assert_cmpint (*time, ==, -1); return FALSE; } static gboolean From cecbb25eeb31257302d5e26ecf6c505a332be860 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Mar 2023 14:30:58 +0200 Subject: [PATCH 6/8] gmain: fix race with waitpid() and child watcher sources GChildWatchSource uses waitpid(), among pidfd and GetExitCodeProcess(). It thus only works for child processes which the user must ensure to exist and not being reaped yet. Also, the user must not kill() the PID after the child process is reaped and must not race kill() against waitpid(). Also, the user must not call waitpid()/kill() after the child process is reaped. Previously, GChildWatchSource would call waitpid() already when adding the source (g_child_watch_source_new()) and from the worker thread (dispatch_unix_signals_unlocked()). That is racy: - if a child watcher is attached and did not yet fire, you cannot call kill() on the PID without racing against the PID being reaped on the worker thread. That would then lead to ESRCH or even worse, killing the wrong process. - if you g_source_destroy() the source that didn't fire yet, the user doesn't know whether the PID was reaped in the background. Any subsequent kill()/waitpid() may fail with ESRCH/ECHILD or even address the wrong process. The race is most visible on Unix without pidfd support, because then the process gets reaped on the worker thread or during g_child_watch_source_new(). But it's also with Windows and pidfd, because we would have waited for the process in g_child_watch_check(), where other callbacks could fire between reaping the process status and emitting the source's callback. Fix all that by calling waitpid() right before dispatching the callback. --- glib/gmain.c | 211 +++++++++++++++++++++++++++------------------- glib/tests/unix.c | 6 -- 2 files changed, 125 insertions(+), 92 deletions(-) diff --git a/glib/gmain.c b/glib/gmain.c index 5fb2746f8..167521883 100644 --- a/glib/gmain.c +++ b/glib/gmain.c @@ -374,14 +374,11 @@ struct _GChildWatchSource { GSource source; GPid pid; - /* On Unix this is a wait status, which is the thing you pass to WEXITSTATUS() - * to get the status returned from the process’ main() or passed to exit(): */ - gint child_status; /* @poll is always used on Windows, and used on Unix iff @using_pidfd is set: */ GPollFD poll; #ifndef G_OS_WIN32 - gboolean child_exited; /* (atomic); not used iff @using_pidfd is set */ - gboolean using_pidfd; + gboolean child_maybe_exited; /* (atomic) */ + gboolean using_pidfd; #endif /* G_OS_WIN32 */ }; @@ -5503,7 +5500,7 @@ siginfo_t_to_wait_status (const siginfo_t *info) return W_STOPCODE (info->si_status); } } -#endif /* HAVE_PIDFD */ +#endif /* HAVE_PIDFD */ static gboolean g_child_watch_prepare (GSource *source, @@ -5511,19 +5508,19 @@ g_child_watch_prepare (GSource *source, { #ifdef G_OS_WIN32 return FALSE; -#else /* G_OS_WIN32 */ +#else /* G_OS_WIN32 */ { GChildWatchSource *child_watch_source; child_watch_source = (GChildWatchSource *) source; - return g_atomic_int_get (&child_watch_source->child_exited); + return !child_watch_source->using_pidfd && g_atomic_int_get (&child_watch_source->child_maybe_exited); } #endif /* G_OS_WIN32 */ } static gboolean -g_child_watch_check (GSource *source) +g_child_watch_check (GSource *source) { GChildWatchSource *child_watch_source; gboolean child_exited; @@ -5532,57 +5529,15 @@ g_child_watch_check (GSource *source) #ifdef G_OS_WIN32 child_exited = child_watch_source->poll.revents & G_IO_IN; - - if (child_exited) - { - DWORD child_status; - - /* - * Note: We do _not_ check for the special value of STILL_ACTIVE - * since we know that the process has exited and doing so runs into - * problems if the child process "happens to return STILL_ACTIVE(259)" - * as Microsoft's Platform SDK puts it. - */ - if (!GetExitCodeProcess (child_watch_source->pid, &child_status)) - { - gchar *emsg = g_win32_error_message (GetLastError ()); - g_warning (G_STRLOC ": GetExitCodeProcess() failed: %s", emsg); - g_free (emsg); - - child_watch_source->child_status = -1; - } - else - child_watch_source->child_status = child_status; - } #else /* G_OS_WIN32 */ #ifdef HAVE_PIDFD if (child_watch_source->using_pidfd) { child_exited = child_watch_source->poll.revents & G_IO_IN; - - if (child_exited) - { - siginfo_t child_info = { 0, }; - - /* Get the exit status */ - if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 && - child_info.si_pid != 0) - { - /* waitid() helpfully provides the wait status in a decomposed - * form which is quite useful. Unfortunately we have to report it - * to the #GChildWatchFunc as a waitpid()-style platform-specific - * wait status, so that the user code in #GChildWatchFunc can then - * call WIFEXITED() (etc.) on it. That means re-composing the - * status information. */ - child_watch_source->child_status = siginfo_t_to_wait_status (&child_info); - child_watch_source->child_exited = TRUE; - } - } - return child_exited; } -#endif /* HAVE_PIDFD */ - child_exited = g_atomic_int_get (&child_watch_source->child_exited); +#endif /* HAVE_PIDFD */ + child_exited = g_atomic_int_get (&child_watch_source->child_maybe_exited); #endif /* G_OS_WIN32 */ return child_exited; @@ -5691,30 +5646,9 @@ dispatch_unix_signals_unlocked (void) for (node = unix_child_watches; node; node = node->next) { GChildWatchSource *source = node->data; - pid_t pid; - if (g_atomic_int_get (&source->child_exited)) - continue; - - do - { - g_assert (source->pid > 0); - - pid = waitpid (source->pid, &source->child_status, WNOHANG); - if (pid > 0) - { - g_atomic_int_set (&source->child_exited, TRUE); - wake_source ((GSource *) source); - } - else if (pid == -1 && errno == ECHILD) - { - g_warning ("GChildWatchSource: Exit status of a child process was requested but ECHILD was received by waitpid(). See the documentation of g_child_watch_source_new() for possible causes."); - source->child_status = 0; - g_atomic_int_set (&source->child_exited, TRUE); - wake_source ((GSource *) source); - } - } - while (pid == -1 && errno == EINTR); + if (g_atomic_int_compare_and_exchange (&source->child_maybe_exited, FALSE, TRUE)) + wake_source ((GSource *) source); } } @@ -5927,9 +5861,106 @@ g_child_watch_dispatch (GSource *source, { GChildWatchSource *child_watch_source; GChildWatchFunc child_watch_callback = (GChildWatchFunc) callback; + int wait_status; child_watch_source = (GChildWatchSource *) source; + /* We only (try to) reap the child process right before dispatching the callback. + * That way, the caller can rely that the process is there until the callback + * is invoked; or, if the caller calls g_source_destroy() without the callback + * being dispatched, the process is still not reaped. */ + +#ifdef G_OS_WIN32 + { + DWORD child_status; + + /* + * Note: We do _not_ check for the special value of STILL_ACTIVE + * since we know that the process has exited and doing so runs into + * problems if the child process "happens to return STILL_ACTIVE(259)" + * as Microsoft's Platform SDK puts it. + */ + if (!GetExitCodeProcess (child_watch_source->pid, &child_status)) + { + gchar *emsg = g_win32_error_message (GetLastError ()); + g_warning (G_STRLOC ": GetExitCodeProcess() failed: %s", emsg); + g_free (emsg); + + /* Unknown error. We got signaled that the process might be exited, + * but now we failed to reap it? Assume the process is gone and proceed. */ + wait_status = -1; + } + else + wait_status = child_status; + } +#else /* G_OS_WIN32 */ + { + gboolean child_exited = FALSE; + + wait_status = -1; + +#ifdef HAVE_PIDFD + if (child_watch_source->using_pidfd) + { + siginfo_t child_info = { + 0, + }; + + /* Get the exit status */ + if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 && + child_info.si_pid != 0) + { + /* waitid() helpfully provides the wait status in a decomposed + * form which is quite useful. Unfortunately we have to report it + * to the #GChildWatchFunc as a waitpid()-style platform-specific + * wait status, so that the user code in #GChildWatchFunc can then + * call WIFEXITED() (etc.) on it. That means re-composing the + * status information. */ + wait_status = siginfo_t_to_wait_status (&child_info); + } + else + { + /* Unknown error. We got signaled that the process might be exited, + * but now we failed to reap it? Assume the process is gone and proceed. */ + g_warning (G_STRLOC ": pidfd signaled ready but failed"); + } + child_exited = TRUE; + } +#endif /* HAVE_PIDFD*/ + + if (!child_exited) + { + pid_t pid; + int wstatus; + + waitpid_again: + + /* We must reset the flag before waitpid(). Otherwise, there would be a + * race. */ + g_atomic_int_set (&child_watch_source->child_maybe_exited, FALSE); + + pid = waitpid (child_watch_source->pid, &wstatus, WNOHANG); + + if (pid == 0) + { + /* Not exited yet. Wait longer. */ + return TRUE; + } + + if (pid > 0) + wait_status = wstatus; + else if (errno == ECHILD) + g_warning ("GChildWatchSource: Exit status of a child process was requested but ECHILD was received by waitpid(). See the documentation of g_child_watch_source_new() for possible causes."); + else if (errno == EINTR) + goto waitpid_again; + else + { + /* Unexpected error. Whatever happened, we are done waiting for this child. */ + } + } + } +#endif /* G_OS_WIN32 */ + if (!callback) { g_warning ("Child watch source dispatched without callback. " @@ -5937,7 +5968,7 @@ g_child_watch_dispatch (GSource *source, return FALSE; } - (child_watch_callback) (child_watch_source->pid, child_watch_source->child_status, user_data); + (child_watch_callback) (child_watch_source->pid, wait_status, user_data); /* We never keep a child watch source around as the child is gone */ return FALSE; @@ -5996,6 +6027,14 @@ g_unix_signal_handler (int signum) * mechanism, including `waitpid(pid, ...)` or a second child-watch * source for the same @pid * * the application must not ignore `SIGCHLD` + * * Before 2.78, the application could not send a signal (`kill()`) to the + * watched @pid in a race free manner. Since 2.78, you can do that while the + * associated #GMainContext is acquired. + * * Before 2.78, even after destroying the #GSource, you could not + * be sure that @pid wasn't already reaped. Hence, it was also not + * safe to `kill()` or `waitpid()` on the process ID after the child watch + * source was gone. Destroying the source before it fired made it + * impossible to reliably reap the process. * * If any of those conditions are not met, this and related APIs will * not work correctly. This can often be diagnosed via a GLib warning @@ -6056,19 +6095,19 @@ g_child_watch_source_new (GPid pid) return source; } - else - { - g_debug ("pidfd_open(%" G_PID_FORMAT ") failed with error: %s", - pid, g_strerror (errsv)); - /* Fall through; likely the kernel isn’t new enough to support pidfd_open() */ - } -#endif /* HAVE_PIDFD */ + + g_debug ("pidfd_open(%" G_PID_FORMAT ") failed with error: %s", + pid, g_strerror (errsv)); + /* Fall through; likely the kernel isn’t new enough to support pidfd_open() */ +#endif /* HAVE_PIDFD */ + + /* We can do that without atomic, as the source is not yet added in + * unix_child_watches (which we do next under a lock). */ + child_watch_source->child_maybe_exited = TRUE; G_LOCK (unix_signal_lock); ref_unix_signal_handler_unlocked (SIGCHLD); unix_child_watches = g_slist_prepend (unix_child_watches, child_watch_source); - if (waitpid (pid, &child_watch_source->child_status, WNOHANG) > 0) - child_watch_source->child_exited = TRUE; G_UNLOCK (unix_signal_lock); #endif /* !G_OS_WIN32 */ diff --git a/glib/tests/unix.c b/glib/tests/unix.c index 0d922367f..90345eda0 100644 --- a/glib/tests/unix.c +++ b/glib/tests/unix.c @@ -456,12 +456,6 @@ test_child_wait (void) g_assert_cmpint (errsv, ==, ECHILD); g_assert_cmpint (pid2, <, 0); } - else if (errsv == ECHILD) - { - /* FIXME: This is a bug. We didn't get the callback from the child - * watcher, but still the child is already reaped. */ - g_assert_cmpint (pid2, <, 0); - } else { g_assert_cmpint (errsv, ==, 0); From d5d6ef8f1b004159db9204b549f144ff0cedbb72 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Mar 2023 19:53:02 +0200 Subject: [PATCH 7/8] gmain: drop redundant using_pidfd field from GChildWatchSource It's redundant, which leads to impossible code like: if (child_watch_source->using_pidfd) { if (child_watch_source->poll.fd >= 0) close (child_watch_source->poll.fd); --- glib/gmain.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/glib/gmain.c b/glib/gmain.c index 167521883..7272d8485 100644 --- a/glib/gmain.c +++ b/glib/gmain.c @@ -374,11 +374,11 @@ struct _GChildWatchSource { GSource source; GPid pid; - /* @poll is always used on Windows, and used on Unix iff @using_pidfd is set: */ + /* @poll is always used on Windows. + * On Unix, poll.fd will be negative if PIDFD is unavailable. */ GPollFD poll; #ifndef G_OS_WIN32 gboolean child_maybe_exited; /* (atomic) */ - gboolean using_pidfd; #endif /* G_OS_WIN32 */ }; @@ -5514,7 +5514,10 @@ g_child_watch_prepare (GSource *source, child_watch_source = (GChildWatchSource *) source; - return !child_watch_source->using_pidfd && g_atomic_int_get (&child_watch_source->child_maybe_exited); + if (child_watch_source->poll.fd >= 0) + return FALSE; + + return g_atomic_int_get (&child_watch_source->child_maybe_exited); } #endif /* G_OS_WIN32 */ } @@ -5531,7 +5534,7 @@ g_child_watch_check (GSource *source) child_exited = child_watch_source->poll.revents & G_IO_IN; #else /* G_OS_WIN32 */ #ifdef HAVE_PIDFD - if (child_watch_source->using_pidfd) + if (child_watch_source->poll.fd >= 0) { child_exited = child_watch_source->poll.revents & G_IO_IN; return child_exited; @@ -5549,10 +5552,9 @@ g_child_watch_finalize (GSource *source) #ifndef G_OS_WIN32 GChildWatchSource *child_watch_source = (GChildWatchSource *) source; - if (child_watch_source->using_pidfd) + if (child_watch_source->poll.fd >= 0) { - if (child_watch_source->poll.fd >= 0) - close (child_watch_source->poll.fd); + close (child_watch_source->poll.fd); return; } @@ -5900,7 +5902,7 @@ g_child_watch_dispatch (GSource *source, wait_status = -1; #ifdef HAVE_PIDFD - if (child_watch_source->using_pidfd) + if (child_watch_source->poll.fd >= 0) { siginfo_t child_info = { 0, @@ -6085,17 +6087,15 @@ g_child_watch_source_new (GPid pid) * better than SIGCHLD. */ child_watch_source->poll.fd = (int) syscall (SYS_pidfd_open, pid, 0); - errsv = errno; if (child_watch_source->poll.fd >= 0) { - child_watch_source->using_pidfd = TRUE; child_watch_source->poll.events = G_IO_IN; g_source_add_poll (source, &child_watch_source->poll); - return source; } + errsv = errno; g_debug ("pidfd_open(%" G_PID_FORMAT ") failed with error: %s", pid, g_strerror (errsv)); /* Fall through; likely the kernel isn’t new enough to support pidfd_open() */ @@ -6104,6 +6104,7 @@ g_child_watch_source_new (GPid pid) /* We can do that without atomic, as the source is not yet added in * unix_child_watches (which we do next under a lock). */ child_watch_source->child_maybe_exited = TRUE; + child_watch_source->poll.fd = -1; G_LOCK (unix_signal_lock); ref_unix_signal_handler_unlocked (SIGCHLD); From 3694dac9832f6839500b9c4b48611432ac83eb9b Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Wed, 29 Mar 2023 07:51:26 +0200 Subject: [PATCH 8/8] gmain: ensure boolean value in g_child_watch_check() is strictly 0 or 1 No problem in practice, but it seems nice to ensure that a gboolean is always either FALSE or TRUE. --- glib/gmain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/glib/gmain.c b/glib/gmain.c index 7272d8485..1a753753c 100644 --- a/glib/gmain.c +++ b/glib/gmain.c @@ -5531,12 +5531,12 @@ g_child_watch_check (GSource *source) child_watch_source = (GChildWatchSource *) source; #ifdef G_OS_WIN32 - child_exited = child_watch_source->poll.revents & G_IO_IN; + child_exited = !!(child_watch_source->poll.revents & G_IO_IN); #else /* G_OS_WIN32 */ #ifdef HAVE_PIDFD if (child_watch_source->poll.fd >= 0) { - child_exited = child_watch_source->poll.revents & G_IO_IN; + child_exited = !!(child_watch_source->poll.revents & G_IO_IN); return child_exited; } #endif /* HAVE_PIDFD */