GDataInputStream: Add _utf8() variants of _read_line

These will validate the resulting line, and throw a conversion error.
In practice these will likely be used by bindings, but it's good
for even C apps too that don't want to explode if that text file
they're reading into Pango actually has invalid UTF-8.

https://bugzilla.gnome.org/show_bug.cgi?id=652758
This commit is contained in:
Colin Walters 2011-06-16 14:00:36 -04:00
parent ff2f46a7f4
commit 28254a38a7
4 changed files with 166 additions and 0 deletions

View File

@ -813,6 +813,49 @@ g_data_input_stream_read_line (GDataInputStream *stream,
return line; return line;
} }
/**
* g_data_input_stream_read_line_utf8:
* @stream: a given #GDataInputStream.
* @length: (out): a #gsize to get the length of the data read in.
* @cancellable: (allow-none): optional #GCancellable object, %NULL to ignore.
* @error: #GError for error reporting.
*
* Reads a UTF-8 encoded line from the data input stream.
*
* If @cancellable is not %NULL, then the operation can be cancelled by
* triggering the cancellable object from another thread. If the operation
* was cancelled, the error %G_IO_ERROR_CANCELLED will be returned.
*
* Returns: (transfer full): a NUL terminated UTF-8 string with the
* line that was read in (without the newlines). Set @length to a
* #gsize to get the length of the read line. On an error, it will
* return %NULL and @error will be set. For UTF-8 conversion errors,
* the set error domain is %G_CONVERT_ERROR. If there's no content to
* read, it will still return %NULL, but @error won't be set.
**/
char *
g_data_input_stream_read_line_utf8 (GDataInputStream *stream,
gsize *length,
GCancellable *cancellable,
GError **error)
{
char *res;
res = g_data_input_stream_read_line (stream, length, cancellable, error);
if (!res)
return NULL;
if (!g_utf8_validate (res, -1, NULL))
{
g_set_error_literal (error, G_CONVERT_ERROR,
G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
_("Invalid byte sequence in conversion input"));
g_free (res);
return NULL;
}
return res;
}
static gssize static gssize
scan_for_chars (GDataInputStream *stream, scan_for_chars (GDataInputStream *stream,
gsize *checked_out, gsize *checked_out,
@ -1211,6 +1254,45 @@ g_data_input_stream_read_line_finish (GDataInputStream *stream,
return g_data_input_stream_read_finish (stream, result, length, error); return g_data_input_stream_read_finish (stream, result, length, error);
} }
/**
* g_data_input_stream_read_line_finish_utf8:
* @stream: a given #GDataInputStream.
* @result: the #GAsyncResult that was provided to the callback.
* @length: (out): a #gsize to get the length of the data read in.
* @error: #GError for error reporting.
*
* Finish an asynchronous call started by
* g_data_input_stream_read_line_async().
*
* Returns: (transfer full): a string with the line that was read in
* (without the newlines). Set @length to a #gsize to get the length
* of the read line. On an error, it will return %NULL and @error
* will be set. For UTF-8 conversion errors, the set error domain is
* %G_CONVERT_ERROR. If there's no content to read, it will still
* return %NULL, but @error won't be set.
*
* Since: 2.20
*/
gchar *
g_data_input_stream_read_line_finish_utf8 (GDataInputStream *stream,
GAsyncResult *result,
gsize *length,
GError **error)
{
gchar *res;
res = g_data_input_stream_read_line_finish (stream, result, length, error);
if (!g_utf8_validate (res, -1, NULL))
{
g_set_error_literal (error, G_CONVERT_ERROR,
G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
_("Invalid byte sequence in conversion input"));
g_free (res);
return NULL;
}
return res;
}
/** /**
* g_data_input_stream_read_until_finish: * g_data_input_stream_read_until_finish:
* @stream: a given #GDataInputStream. * @stream: a given #GDataInputStream.

View File

@ -102,6 +102,10 @@ char * g_data_input_stream_read_line (GDataInputStrea
gsize *length, gsize *length,
GCancellable *cancellable, GCancellable *cancellable,
GError **error); GError **error);
char * g_data_input_stream_read_line_utf8 (GDataInputStream *stream,
gsize *length,
GCancellable *cancellable,
GError **error);
void g_data_input_stream_read_line_async (GDataInputStream *stream, void g_data_input_stream_read_line_async (GDataInputStream *stream,
gint io_priority, gint io_priority,
GCancellable *cancellable, GCancellable *cancellable,
@ -111,6 +115,10 @@ char * g_data_input_stream_read_line_finish (GDataInputStrea
GAsyncResult *result, GAsyncResult *result,
gsize *length, gsize *length,
GError **error); GError **error);
char * g_data_input_stream_read_line_finish_utf8(GDataInputStream *stream,
GAsyncResult *result,
gsize *length,
GError **error);
char * g_data_input_stream_read_until (GDataInputStream *stream, char * g_data_input_stream_read_until (GDataInputStream *stream,
const gchar *stop_chars, const gchar *stop_chars,
gsize *length, gsize *length,

View File

@ -180,8 +180,10 @@ g_data_input_stream_read_uint32
g_data_input_stream_read_int64 g_data_input_stream_read_int64
g_data_input_stream_read_uint64 g_data_input_stream_read_uint64
g_data_input_stream_read_line g_data_input_stream_read_line
g_data_input_stream_read_line_utf8
g_data_input_stream_read_line_async g_data_input_stream_read_line_async
g_data_input_stream_read_line_finish g_data_input_stream_read_line_finish
g_data_input_stream_read_line_finish_utf8
g_data_input_stream_read_until g_data_input_stream_read_until
g_data_input_stream_read_until_async g_data_input_stream_read_until_async
g_data_input_stream_read_until_finish g_data_input_stream_read_until_finish

View File

@ -150,6 +150,78 @@ test_read_lines_any (void)
test_read_lines (G_DATA_STREAM_NEWLINE_TYPE_ANY); test_read_lines (G_DATA_STREAM_NEWLINE_TYPE_ANY);
} }
static void
test_read_lines_LF_valid_utf8 (void)
{
GInputStream *stream;
GInputStream *base_stream;
GError *error = NULL;
char *line;
guint n_lines = 0;
base_stream = g_memory_input_stream_new ();
stream = G_INPUT_STREAM (g_data_input_stream_new (base_stream));
g_memory_input_stream_add_data (G_MEMORY_INPUT_STREAM (base_stream),
"foo\nthis is valid UTF-8 ☺!\nbar\n", -1, NULL);
/* Test read line */
error = NULL;
while (TRUE)
{
gsize length = -1;
line = g_data_input_stream_read_line_utf8 (G_DATA_INPUT_STREAM (stream), &length, NULL, &error);
g_assert_no_error (error);
if (line == NULL)
break;
n_lines++;
g_free (line);
}
g_assert_cmpint (n_lines, ==, 3);
g_object_unref (base_stream);
g_object_unref (stream);
}
static void
test_read_lines_LF_invalid_utf8 (void)
{
GInputStream *stream;
GInputStream *base_stream;
GError *error = NULL;
char *line;
guint n_lines = 0;
base_stream = g_memory_input_stream_new ();
stream = G_INPUT_STREAM (g_data_input_stream_new (base_stream));
g_memory_input_stream_add_data (G_MEMORY_INPUT_STREAM (base_stream),
"foo\nthis is not valid UTF-8 \xE5 =(\nbar\n", -1, NULL);
/* Test read line */
error = NULL;
while (TRUE)
{
gsize length = -1;
line = g_data_input_stream_read_line_utf8 (G_DATA_INPUT_STREAM (stream), &length, NULL, &error);
if (n_lines == 0)
g_assert_no_error (error);
else
{
g_assert (error != NULL);
g_clear_error (&error);
g_free (line);
break;
}
n_lines++;
g_free (line);
}
g_assert_cmpint (n_lines, ==, 1);
g_object_unref (base_stream);
g_object_unref (stream);
}
static void static void
test_read_until (void) test_read_until (void)
{ {
@ -417,6 +489,8 @@ main (int argc,
g_test_add_func ("/data-input-stream/basic", test_basic); g_test_add_func ("/data-input-stream/basic", test_basic);
g_test_add_func ("/data-input-stream/read-lines-LF", test_read_lines_LF); g_test_add_func ("/data-input-stream/read-lines-LF", test_read_lines_LF);
g_test_add_func ("/data-input-stream/read-lines-LF-valid-utf8", test_read_lines_LF_valid_utf8);
g_test_add_func ("/data-input-stream/read-lines-LF-invalid-utf8", test_read_lines_LF_invalid_utf8);
g_test_add_func ("/data-input-stream/read-lines-CR", test_read_lines_CR); g_test_add_func ("/data-input-stream/read-lines-CR", test_read_lines_CR);
g_test_add_func ("/data-input-stream/read-lines-CR-LF", test_read_lines_CR_LF); g_test_add_func ("/data-input-stream/read-lines-CR-LF", test_read_lines_CR_LF);
g_test_add_func ("/data-input-stream/read-lines-any", test_read_lines_any); g_test_add_func ("/data-input-stream/read-lines-any", test_read_lines_any);