From 8d78fa7887d035cd19df40d18bf7a4299d0e2d8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20=C3=85dahl?= Date: Fri, 19 May 2023 12:20:14 +0200 Subject: [PATCH 1/2] main: Don't treat si_pid from pidfd as child exiting We might repeatedly get si_pid == 0 for a child that hasn't exited, meaning we won't get a correct exit status. This seems to happen when the glib application tracks a ptrace():ed child process; the correct exit status of the process using e.g. a BPF program, where one can observe that glib appears to get it wrong. Fixes: #3071 --- glib/gmain.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/glib/gmain.c b/glib/gmain.c index 906e947ac..e4378177d 100644 --- a/glib/gmain.c +++ b/glib/gmain.c @@ -5907,24 +5907,34 @@ g_child_watch_dispatch (GSource *source, }; /* Get the exit status */ - if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 && - child_info.si_pid != 0) + if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0) { - /* waitid() helpfully provides the wait status in a decomposed - * form which is quite useful. Unfortunately we have to report it - * to the #GChildWatchFunc as a waitpid()-style platform-specific - * wait status, so that the user code in #GChildWatchFunc can then - * call WIFEXITED() (etc.) on it. That means re-composing the - * status information. */ - wait_status = siginfo_t_to_wait_status (&child_info); + if (child_info.si_pid != 0) + { + /* waitid() helpfully provides the wait status in a decomposed + * form which is quite useful. Unfortunately we have to report it + * to the #GChildWatchFunc as a waitpid()-style platform-specific + * wait status, so that the user code in #GChildWatchFunc can then + * call WIFEXITED() (etc.) on it. That means re-composing the + * status information. */ + wait_status = siginfo_t_to_wait_status (&child_info); + child_exited = TRUE; + } + else + { + g_debug (G_STRLOC ": pidfd signaled but pid %d didn't exit", + child_watch_source->pid); + return TRUE; + } } else { /* Unknown error. We got signaled that the process might be exited, * but now we failed to reap it? Assume the process is gone and proceed. */ - g_warning (G_STRLOC ": pidfd signaled ready but failed"); + g_warning (G_STRLOC ": pidfd signaled ready but failed for pid %d", + child_watch_source->pid); + child_exited = TRUE; } - child_exited = TRUE; } #endif /* HAVE_PIDFD*/ From cf55c31170a5d79beb1119164c6f5ea3c4ea06a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20=C3=85dahl?= Date: Tue, 16 May 2023 12:38:47 +0200 Subject: [PATCH 2/2] gio/tests: Add test case for exit status when being ptraced The test case will fail with the g_assert_false (g_subprocess_get_successful (proc)); assert failing. Without the fix, it'll hit sometimes, but rather unreliably. When running `meson test --repeat 100`, it'll reproduce anywhere between the first or much later, but mostly before the 20th iteration on my system. Helps: #3071 --- gio/tests/gsubprocess-testprog.c | 67 ++++++++++++++++++++ gio/tests/gsubprocess.c | 104 ++++++++++++++++++++++++++++++- 2 files changed, 170 insertions(+), 1 deletion(-) diff --git a/gio/tests/gsubprocess-testprog.c b/gio/tests/gsubprocess-testprog.c index eee759dcd..610b8f3be 100644 --- a/gio/tests/gsubprocess-testprog.c +++ b/gio/tests/gsubprocess-testprog.c @@ -5,6 +5,7 @@ #include #ifdef G_OS_UNIX #include +#include #else #include #endif @@ -241,6 +242,66 @@ printenv_mode (int argc, char **argv) return 0; } +#ifdef G_OS_UNIX +static void +on_sleep_exited (GObject *object, + GAsyncResult *result, + gpointer user_data) +{ + GSubprocess *subprocess = G_SUBPROCESS (object); + gboolean *done = user_data; + GError *local_error = NULL; + gboolean ret; + + ret = g_subprocess_wait_finish (subprocess, result, &local_error); + g_assert_no_error (local_error); + g_assert_true (ret); + + *done = TRUE; + g_main_context_wakeup (NULL); +} + +static int +sleep_and_kill (int argc, char **argv) +{ + GPtrArray *args = NULL; + GSubprocessLauncher *launcher = NULL; + GSubprocess *proc = NULL; + GError *local_error = NULL; + pid_t sleep_pid; + gboolean done = FALSE; + + args = g_ptr_array_new_with_free_func (g_free); + + /* Run sleep "forever" in a shell; this will trigger PTRACE_EVENT_EXEC */ + g_ptr_array_add (args, g_strdup ("sh")); + g_ptr_array_add (args, g_strdup ("-c")); + g_ptr_array_add (args, g_strdup ("sleep infinity")); + g_ptr_array_add (args, NULL); + launcher = g_subprocess_launcher_new (G_SUBPROCESS_FLAGS_NONE); + proc = g_subprocess_launcher_spawnv (launcher, (const gchar **) args->pdata, &local_error); + g_assert_no_error (local_error); + g_assert_nonnull (proc); + + sleep_pid = atoi (g_subprocess_get_identifier (proc)); + + g_subprocess_wait_async (proc, NULL, on_sleep_exited, &done); + + kill (sleep_pid, SIGKILL); + + while (!done) + g_main_context_iteration (NULL, TRUE); + + g_assert_false (g_subprocess_get_successful (proc)); + + g_clear_pointer (&args, g_ptr_array_unref); + g_clear_object (&launcher); + g_clear_object (&proc); + + return EXIT_SUCCESS; +} +#endif + int main (int argc, char **argv) { @@ -267,6 +328,8 @@ main (int argc, char **argv) return 1; } + g_log_writer_default_set_use_stderr (TRUE); + mode = argv[1]; if (strcmp (mode, "noop") == 0) return 0; @@ -297,6 +360,10 @@ main (int argc, char **argv) return cwd_mode (argc, argv); else if (strcmp (mode, "printenv") == 0) return printenv_mode (argc, argv); +#ifdef G_OS_UNIX + else if (strcmp (mode, "sleep-and-kill") == 0) + return sleep_and_kill (argc, argv); +#endif else { g_printerr ("Unknown MODE %s\n", argv[1]); diff --git a/gio/tests/gsubprocess.c b/gio/tests/gsubprocess.c index 30947596e..c7f2ea489 100644 --- a/gio/tests/gsubprocess.c +++ b/gio/tests/gsubprocess.c @@ -2,6 +2,7 @@ #include #ifdef G_OS_UNIX +#include #include #include #include @@ -1989,7 +1990,107 @@ test_fd_conflation_child_err_report_fd (void) do_test_fd_conflation (G_SUBPROCESS_FLAGS_NONE, empty_child_setup, TRUE); } -#endif +/* Handle ptrace events on @main_child, and assert that when it exits, it does + * so with status %EXIT_SUCCESS, rather than signalling. Other than that, this + * just calls %PTRACE_CONT for all trace events. */ +static void +trace_children (pid_t main_child) +{ + int wstatus; + + g_assert_no_errno (waitpid (main_child, &wstatus, 0)); + g_assert_no_errno (ptrace (PTRACE_SETOPTIONS, main_child, NULL, + (PTRACE_O_TRACEFORK | + PTRACE_O_EXITKILL | + PTRACE_O_TRACEVFORK | + PTRACE_O_TRACECLONE | + PTRACE_O_TRACEEXEC))); + g_assert_no_errno (ptrace (PTRACE_CONT, main_child, NULL, 0)); + + while (TRUE) + { + pid_t pid; + int wstatus; + int stop_signum; + int ptrace_event; + + pid = waitpid (-1, &wstatus, 0); + if (pid == -1 && errno == ECHILD) + break; + + g_assert_cmpint (errno, ==, 0); + g_assert_cmpint (pid, >=, 0); + + if (WIFSTOPPED (wstatus)) + stop_signum = WSTOPSIG (wstatus); + else + stop_signum = 0; + + switch (stop_signum) + { + case SIGTRAP: + ptrace_event = (wstatus >> 16) & 0xffff; + switch (ptrace_event) + { + case 0: + g_assert_no_errno (ptrace (PTRACE_CONT, pid, NULL, stop_signum)); + break; + default: + g_assert_no_errno (ptrace (PTRACE_CONT, pid, NULL, 0)); + break; + } + break; + case SIGSTOP: + g_assert_no_errno (ptrace (PTRACE_CONT, pid, NULL, 0)); + break; + default: + if (!WIFEXITED (wstatus) && !WIFSIGNALED (wstatus)) + g_assert_no_errno (ptrace (PTRACE_CONT, pid, NULL, stop_signum)); + break; + } + + if (pid == main_child) + { + g_assert_false (WIFSIGNALED (wstatus)); + if (WIFEXITED (wstatus)) + { + g_assert_cmpint (WEXITSTATUS (wstatus), ==, EXIT_SUCCESS); + break; + } + } + } +} + +static void +test_exit_status_trapped (void) +{ + GPtrArray *args = NULL; + pid_t test_child; + + g_test_summary ("Test that exit status is reported correctly for ptrace()d child processes"); + g_test_bug ("https://gitlab.gnome.org/GNOME/glib/-/merge_requests/3433"); + + /* Call fork() directly here, rather than using #GSubprocess, so that we can + * safely call waitpid() on it ourselves without interfering with the internals + * of #GSubprocess. + * See https://gitlab.gnome.org/GNOME/glib/-/merge_requests/3433#note_1749055 */ + args = get_test_subprocess_args ("sleep-and-kill", NULL); + test_child = fork (); + if (test_child == 0) + { + /* Between fork() and exec() we can only call async-signal-safe functions. */ + if (ptrace (PTRACE_TRACEME, 0, NULL, NULL) < 0) + abort (); + + g_assert_no_errno (execvp (args->pdata[0], (char * const *) args->pdata)); + } + + trace_children (test_child); + + g_clear_pointer (&args, g_ptr_array_unref); +} + +#endif /* G_OS_UNIX */ static void test_launcher_environment (void) @@ -2133,6 +2234,7 @@ main (int argc, char **argv) g_test_add_func ("/gsubprocess/fd-conflation/empty-child-setup", test_fd_conflation_empty_child_setup); g_test_add_func ("/gsubprocess/fd-conflation/inherit-fds", test_fd_conflation_inherit_fds); g_test_add_func ("/gsubprocess/fd-conflation/child-err-report-fd", test_fd_conflation_child_err_report_fd); + g_test_add_func ("/gsubprocess/exit-status/trapped", test_exit_status_trapped); #endif g_test_add_func ("/gsubprocess/launcher-environment", test_launcher_environment);