gsubprocess: Add UTF-8 variants of communicate()

Over many years of writing code interacting with subprocesses, a pattern
that comes up a lot is to run a child and get its output as UTF-8, to
put inside a JSON document or render in a GtkTextBuffer, etc.

It's very important to validate at the boundaries, and not say deep
inside Pango.

We could do this a bit more efficiently if done in a streaming fashion,
but realistically this should be OK for now.
This commit is contained in:
Colin Walters 2013-10-15 15:04:54 +01:00 committed by Ryan Lortie
parent 0e1a3ee345
commit 9318d5a429
3 changed files with 296 additions and 11 deletions

View File

@ -1189,6 +1189,8 @@ typedef struct
gsize stdin_length;
gsize stdin_offset;
gboolean add_nul;
GInputStream *stdin_buf;
GMemoryOutputStream *stdout_buf;
GMemoryOutputStream *stderr_buf;
@ -1224,7 +1226,25 @@ g_subprocess_communicate_made_progress (GObject *source_object,
source == state->stdout_buf ||
source == state->stderr_buf)
{
(void) g_output_stream_splice_finish ((GOutputStream*)source, result, &error);
if (!g_output_stream_splice_finish ((GOutputStream*)source, result, &error))
goto out;
if (source == state->stdout_buf ||
source == state->stderr_buf)
{
/* This is a memory stream, so it can't be cancelled or return
* an error really.
*/
if (state->add_nul)
{
gsize bytes_written;
if (!g_output_stream_write_all (source, "\0", 1, &bytes_written,
NULL, &error))
goto out;
}
if (!g_output_stream_close (source, NULL, &error))
goto out;
}
}
else if (source == subprocess)
{
@ -1233,6 +1253,7 @@ g_subprocess_communicate_made_progress (GObject *source_object,
else
g_assert_not_reached ();
out:
if (error)
{
/* Only report the first error we see.
@ -1286,6 +1307,7 @@ g_subprocess_communicate_state_free (gpointer data)
static CommunicateState *
g_subprocess_communicate_internal (GSubprocess *subprocess,
gboolean add_nul,
GBytes *stdin_buf,
GCancellable *cancellable,
GAsyncReadyCallback callback,
@ -1299,6 +1321,7 @@ g_subprocess_communicate_internal (GSubprocess *subprocess,
g_task_set_task_data (task, state, g_subprocess_communicate_state_free);
state->cancellable = g_cancellable_new ();
state->add_nul = add_nul;
if (cancellable)
{
@ -1323,7 +1346,7 @@ g_subprocess_communicate_internal (GSubprocess *subprocess,
{
state->stdout_buf = (GMemoryOutputStream*)g_memory_output_stream_new_resizable ();
g_output_stream_splice_async ((GOutputStream*)state->stdout_buf, subprocess->stdout_pipe,
G_OUTPUT_STREAM_SPLICE_CLOSE_SOURCE | G_OUTPUT_STREAM_SPLICE_CLOSE_TARGET,
G_OUTPUT_STREAM_SPLICE_CLOSE_SOURCE,
G_PRIORITY_DEFAULT, state->cancellable,
g_subprocess_communicate_made_progress, g_object_ref (task));
state->outstanding_ops++;
@ -1333,7 +1356,7 @@ g_subprocess_communicate_internal (GSubprocess *subprocess,
{
state->stderr_buf = (GMemoryOutputStream*)g_memory_output_stream_new_resizable ();
g_output_stream_splice_async ((GOutputStream*)state->stderr_buf, subprocess->stderr_pipe,
G_OUTPUT_STREAM_SPLICE_CLOSE_SOURCE | G_OUTPUT_STREAM_SPLICE_CLOSE_TARGET,
G_OUTPUT_STREAM_SPLICE_CLOSE_SOURCE,
G_PRIORITY_DEFAULT, state->cancellable,
g_subprocess_communicate_made_progress, g_object_ref (task));
state->outstanding_ops++;
@ -1418,7 +1441,8 @@ g_subprocess_communicate (GSubprocess *subprocess,
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
g_subprocess_sync_setup ();
g_subprocess_communicate_internal (subprocess, stdin_buf, cancellable, g_subprocess_sync_done, &result);
g_subprocess_communicate_internal (subprocess, FALSE, stdin_buf, cancellable,
g_subprocess_sync_done, &result);
g_subprocess_sync_complete (&result);
success = g_subprocess_communicate_finish (subprocess, result, stdout_buf, stderr_buf, error);
g_object_unref (result);
@ -1448,7 +1472,7 @@ g_subprocess_communicate_async (GSubprocess *subprocess,
g_return_if_fail (stdin_buf == NULL || (subprocess->flags & G_SUBPROCESS_FLAGS_STDIN_PIPE));
g_return_if_fail (cancellable == NULL || G_IS_CANCELLABLE (cancellable));
g_subprocess_communicate_internal (subprocess, stdin_buf, cancellable, callback, user_data);
g_subprocess_communicate_internal (subprocess, FALSE, stdin_buf, cancellable, callback, user_data);
}
/**
@ -1491,3 +1515,150 @@ g_subprocess_communicate_finish (GSubprocess *subprocess,
g_object_unref (result);
return success;
}
/**
* g_subprocess_communicate_utf8:
* @self: a #GSubprocess
* @stdin_buf: data to send to the stdin of the subprocess, or %NULL
* @cancellable: a #GCancellable
* @stdout_buf: (out): data read from the subprocess stdout
* @stderr_buf: (out): data read from the subprocess stderr
* @error: a pointer to a %NULL #GError pointer, or %NULL
*
* Like g_subprocess_communicate(), but validates the output of the
* process as UTF-8, and returns it as a regular NUL terminated string.
*/
gboolean
g_subprocess_communicate_utf8 (GSubprocess *subprocess,
const char *stdin_buf,
GCancellable *cancellable,
char **stdout_buf,
char **stderr_buf,
GError **error)
{
GAsyncResult *result = NULL;
gboolean success;
GBytes *stdin_bytes;
g_return_val_if_fail (G_IS_SUBPROCESS (subprocess), FALSE);
g_return_val_if_fail (stdin_buf == NULL || (subprocess->flags & G_SUBPROCESS_FLAGS_STDIN_PIPE), FALSE);
g_return_val_if_fail (cancellable == NULL || G_IS_CANCELLABLE (cancellable), FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
stdin_bytes = g_bytes_new (stdin_buf, strlen (stdin_buf));
g_subprocess_sync_setup ();
g_subprocess_communicate_internal (subprocess, TRUE, stdin_bytes, cancellable,
g_subprocess_sync_done, &result);
g_subprocess_sync_complete (&result);
success = g_subprocess_communicate_utf8_finish (subprocess, result, stdout_buf, stderr_buf, error);
g_object_unref (result);
g_bytes_unref (stdin_bytes);
return success;
}
/**
* g_subprocess_communicate_utf8_async:
* @subprocess: Self
* @stdin_buf: Input data
* @cancellable: Cancellable
* @callback: Callback
* @user_data: User data
*
* Asynchronous version of g_subprocess_communicate_utf(). Complete
* invocation with g_subprocess_communicate_utf8_finish().
*/
void
g_subprocess_communicate_utf8_async (GSubprocess *subprocess,
const char *stdin_buf,
GCancellable *cancellable,
GAsyncReadyCallback callback,
gpointer user_data)
{
GBytes *stdin_bytes;
g_return_if_fail (G_IS_SUBPROCESS (subprocess));
g_return_if_fail (stdin_buf == NULL || (subprocess->flags & G_SUBPROCESS_FLAGS_STDIN_PIPE));
g_return_if_fail (cancellable == NULL || G_IS_CANCELLABLE (cancellable));
stdin_bytes = g_bytes_new (stdin_buf, strlen (stdin_buf));
g_subprocess_communicate_internal (subprocess, TRUE, stdin_bytes, cancellable, callback, user_data);
g_bytes_unref (stdin_bytes);
}
static gboolean
communicate_result_validate_utf8 (const char *stream_name,
char **return_location,
GMemoryOutputStream *buffer,
GError **error)
{
if (return_location == NULL)
return TRUE;
if (buffer)
{
const char *end;
*return_location = g_memory_output_stream_steal_data (buffer);
if (!g_utf8_validate (*return_location, -1, &end))
{
g_free (*return_location);
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
"Invalid UTF-8 in child %s at offset %lu",
stream_name,
(unsigned long) (end - *return_location));
return FALSE;
}
}
else
*return_location = NULL;
return TRUE;
}
/**
* g_subprocess_communicate_utf8_finish:
* @subprocess: Self
* @result: Result
* @stdout_buf: (out): Return location for stdout data
* @stderr_buf: (out): Return location for stderr data
* @error: Error
*
* Complete an invocation of g_subprocess_communicate_utf8_async().
*/
gboolean
g_subprocess_communicate_utf8_finish (GSubprocess *subprocess,
GAsyncResult *result,
char **stdout_buf,
char **stderr_buf,
GError **error)
{
gboolean ret = FALSE;
CommunicateState *state;
g_return_val_if_fail (G_IS_SUBPROCESS (subprocess), FALSE);
g_return_val_if_fail (g_task_is_valid (result, subprocess), FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
g_object_ref (result);
state = g_task_get_task_data ((GTask*)result);
if (!g_task_propagate_boolean ((GTask*)result, error))
goto out;
/* TODO - validate UTF-8 while streaming, rather than all at once.
*/
if (!communicate_result_validate_utf8 ("stdout", stdout_buf,
state->stdout_buf,
error))
goto out;
if (!communicate_result_validate_utf8 ("stderr", stderr_buf,
state->stderr_buf,
error))
goto out;
ret = TRUE;
out:
g_object_unref (result);
return ret;
}

View File

@ -143,6 +143,27 @@ gboolean g_subprocess_communicate_finish (GSubprocess *s
GBytes **stderr_buf,
GError **error);
GLIB_AVAILABLE_IN_2_40
gboolean g_subprocess_communicate_utf8 (GSubprocess *subprocess,
const char *stdin_buf,
GCancellable *cancellable,
char **stdout_buf,
char **stderr_buf,
GError **error);
GLIB_AVAILABLE_IN_2_40
void g_subprocess_communicate_utf8_async (GSubprocess *subprocess,
const char *stdin_buf,
GCancellable *cancellable,
GAsyncReadyCallback callback,
gpointer user_data);
GLIB_AVAILABLE_IN_2_40
gboolean g_subprocess_communicate_utf8_finish (GSubprocess *subprocess,
GAsyncResult *result,
char **stdout_buf,
char **stderr_buf,
GError **error);
G_END_DECLS
#endif /* __G_SUBPROCESS_H__ */

View File

@ -546,6 +546,8 @@ test_multi_1 (void)
}
typedef struct {
gboolean is_utf8;
gboolean is_invalid_utf8;
gboolean running;
GError *error;
} TestAsyncCommunicateData;
@ -556,21 +558,41 @@ on_communicate_complete (GObject *proc,
gpointer user_data)
{
TestAsyncCommunicateData *data = user_data;
GBytes *stdout;
GBytes *stdout = NULL;
char *stdout_str = NULL;
const guint8 *stdout_data;
gsize stdout_len;
data->running = FALSE;
(void) g_subprocess_communicate_finish ((GSubprocess*)proc, result,
&stdout, NULL, &data->error);
if (data->is_utf8)
(void) g_subprocess_communicate_utf8_finish ((GSubprocess*)proc, result,
&stdout_str, NULL, &data->error);
else
(void) g_subprocess_communicate_finish ((GSubprocess*)proc, result,
&stdout, NULL, &data->error);
if (data->is_invalid_utf8)
{
g_assert_error (data->error, G_IO_ERROR, G_IO_ERROR_FAILED);
return;
}
g_assert_no_error (data->error);
stdout_data = g_bytes_get_data (stdout, &stdout_len);
if (!data->is_utf8)
{
stdout_data = g_bytes_get_data (stdout, &stdout_len);
}
else
{
stdout_data = (guint8*)stdout_str;
stdout_len = strlen (stdout_str);
}
g_assert_cmpint (stdout_len, ==, 11);
g_assert (memcmp (stdout_data, "hello world", 11) == 0);
g_bytes_unref (stdout);
if (stdout)
g_bytes_unref (stdout);
g_free (stdout_str);
}
static void
@ -583,6 +605,7 @@ test_communicate (void)
GSubprocess *proc;
GCancellable *cancellable = NULL;
GBytes *input;
const char *hellostring;
args = get_test_subprocess_args ("cat", NULL);
proc = g_subprocess_newv ((const gchar* const*)args->pdata,
@ -591,7 +614,8 @@ test_communicate (void)
g_assert_no_error (local_error);
g_ptr_array_free (args, TRUE);
input = g_bytes_new_static ("hello world", strlen ("hello world"));
hellostring = "hello world";
input = g_bytes_new_static (hellostring, strlen (hellostring));
data.error = local_error;
g_subprocess_communicate_async (proc, input,
@ -608,6 +632,73 @@ test_communicate (void)
g_object_unref (proc);
}
static void
test_communicate_utf8 (void)
{
GError *local_error = NULL;
GError **error = &local_error;
GPtrArray *args;
TestAsyncCommunicateData data = { 0, };
GSubprocess *proc;
GCancellable *cancellable = NULL;
args = get_test_subprocess_args ("cat", NULL);
proc = g_subprocess_newv ((const gchar* const*)args->pdata,
G_SUBPROCESS_FLAGS_STDIN_PIPE | G_SUBPROCESS_FLAGS_STDOUT_PIPE,
error);
g_assert_no_error (local_error);
g_ptr_array_free (args, TRUE);
data.error = local_error;
data.is_utf8 = TRUE;
g_subprocess_communicate_utf8_async (proc, "hello world",
cancellable,
on_communicate_complete,
&data);
data.running = TRUE;
while (data.running)
g_main_context_iteration (NULL, TRUE);
g_assert_no_error (local_error);
g_object_unref (proc);
}
static void
test_communicate_utf8_invalid (void)
{
GError *local_error = NULL;
GError **error = &local_error;
GPtrArray *args;
TestAsyncCommunicateData data = { 0, };
GSubprocess *proc;
GCancellable *cancellable = NULL;
args = get_test_subprocess_args ("cat", NULL);
proc = g_subprocess_newv ((const gchar* const*)args->pdata,
G_SUBPROCESS_FLAGS_STDIN_PIPE | G_SUBPROCESS_FLAGS_STDOUT_PIPE,
error);
g_assert_no_error (local_error);
g_ptr_array_free (args, TRUE);
data.error = local_error;
data.is_utf8 = TRUE;
data.is_invalid_utf8 = TRUE;
g_subprocess_communicate_utf8_async (proc, "\xFF\xFF",
cancellable,
on_communicate_complete,
&data);
data.running = TRUE;
while (data.running)
g_main_context_iteration (NULL, TRUE);
g_assert_no_error (local_error);
g_object_unref (proc);
}
static gboolean
send_terminate (gpointer user_data)
{
@ -905,6 +996,8 @@ main (int argc, char **argv)
g_test_add_func ("/gsubprocess/cat-eof", test_cat_eof);
g_test_add_func ("/gsubprocess/multi1", test_multi_1);
g_test_add_func ("/gsubprocess/communicate", test_communicate);
g_test_add_func ("/gsubprocess/communicate-utf8", test_communicate_utf8);
g_test_add_func ("/gsubprocess/communicate-utf8-invalid", test_communicate_utf8_invalid);
g_test_add_func ("/gsubprocess/terminate", test_terminate);
#ifdef G_OS_UNIX
g_test_add_func ("/gsubprocess/stdout-file", test_stdout_file);