Merge branch 'wip/otte/large-files' into 'main'

Handle files >4GB in g_file_load_contents()

Closes #3397

See merge request GNOME/glib!3373
This commit is contained in:
Michael Catanzaro 2024-06-17 15:44:37 +00:00
commit 496d8a164c
3 changed files with 206 additions and 28 deletions

View File

@ -8086,7 +8086,8 @@ g_file_load_contents (GFile *file,
GError **error)
{
GFileInputStream *in;
GByteArray *content;
char *data;
gsize size;
gsize pos;
gssize res;
GFileInfo *info;
@ -8098,17 +8099,22 @@ g_file_load_contents (GFile *file,
if (in == NULL)
return FALSE;
content = g_byte_array_new ();
size = GET_CONTENT_BLOCK_SIZE;
data = g_malloc (GET_CONTENT_BLOCK_SIZE);
pos = 0;
g_byte_array_set_size (content, pos + GET_CONTENT_BLOCK_SIZE + 1);
while ((res = g_input_stream_read (G_INPUT_STREAM (in),
content->data + pos,
data + pos,
GET_CONTENT_BLOCK_SIZE,
cancellable, error)) > 0)
{
pos += res;
g_byte_array_set_size (content, pos + GET_CONTENT_BLOCK_SIZE + 1);
if (size - pos < GET_CONTENT_BLOCK_SIZE)
{
g_assert (size <= G_MAXSIZE / 2);
size *= 2;
data = g_realloc (data, size);
}
}
if (etag_out)
@ -8133,17 +8139,19 @@ g_file_load_contents (GFile *file,
if (res < 0)
{
/* error is set already */
g_byte_array_free (content, TRUE);
g_free (data);
return FALSE;
}
if (length)
*length = pos;
/* Zero terminate (we got an extra byte allocated for this */
content->data[pos] = 0;
/* Zero terminate (allocating extra bytes if needed) */
if (pos >= size)
data = g_realloc (data, pos + 1);
data[pos] = 0;
*contents = (char *)g_byte_array_free (content, FALSE);
*contents = g_steal_pointer (&data);
return TRUE;
}
@ -8151,7 +8159,8 @@ g_file_load_contents (GFile *file,
typedef struct {
GTask *task;
GFileReadMoreCallback read_more_callback;
GByteArray *content;
char *data;
gsize size;
gsize pos;
char *etag;
} LoadContentsData;
@ -8160,12 +8169,31 @@ typedef struct {
static void
load_contents_data_free (LoadContentsData *data)
{
if (data->content)
g_byte_array_free (data->content, TRUE);
g_clear_pointer (&data->data, g_free);
g_free (data->etag);
g_free (data);
}
static void
load_contents_data_ensure_space (LoadContentsData *data,
gsize space)
{
if (data->size - data->pos < space)
{
if (data->data == NULL)
{
data->size = space;
data->data = g_malloc (space);
}
else
{
g_assert (data->size <= G_MAXSIZE / 2);
data->size *= 2;
data->data = g_realloc (data->data, data->size);
}
}
}
static void
load_contents_close_callback (GObject *obj,
GAsyncResult *close_res,
@ -8238,12 +8266,10 @@ load_contents_read_callback (GObject *obj,
{
data->pos += read_size;
g_byte_array_set_size (data->content,
data->pos + GET_CONTENT_BLOCK_SIZE);
load_contents_data_ensure_space (data, GET_CONTENT_BLOCK_SIZE);
if (data->read_more_callback &&
!data->read_more_callback ((char *)data->content->data, data->pos,
!data->read_more_callback (data->data, data->pos,
g_async_result_get_user_data (G_ASYNC_RESULT (data->task))))
g_file_input_stream_query_info_async (G_FILE_INPUT_STREAM (stream),
G_FILE_ATTRIBUTE_ETAG_VALUE,
@ -8253,7 +8279,7 @@ load_contents_read_callback (GObject *obj,
data);
else
g_input_stream_read_async (stream,
data->content->data + data->pos,
data->data + data->pos,
GET_CONTENT_BLOCK_SIZE,
0,
g_task_get_cancellable (data->task),
@ -8276,10 +8302,9 @@ load_contents_open_callback (GObject *obj,
if (stream)
{
g_byte_array_set_size (data->content,
data->pos + GET_CONTENT_BLOCK_SIZE);
load_contents_data_ensure_space (data, GET_CONTENT_BLOCK_SIZE);
g_input_stream_read_async (G_INPUT_STREAM (stream),
data->content->data + data->pos,
data->data + data->pos,
GET_CONTENT_BLOCK_SIZE,
0,
g_task_get_cancellable (data->task),
@ -8329,7 +8354,6 @@ g_file_load_partial_contents_async (GFile *file,
data = g_new0 (LoadContentsData, 1);
data->read_more_callback = read_more_callback;
data->content = g_byte_array_new ();
data->task = g_task_new (file, cancellable, callback, user_data);
g_task_set_source_tag (data->task, g_file_load_partial_contents_async);
@ -8398,11 +8422,10 @@ g_file_load_partial_contents_finish (GFile *file,
}
/* Zero terminate */
g_byte_array_set_size (data->content, data->pos + 1);
data->content->data[data->pos] = 0;
load_contents_data_ensure_space (data, 1);
data->data[data->pos] = 0;
*contents = (char *)g_byte_array_free (data->content, FALSE);
data->content = NULL;
*contents = g_steal_pointer (&data->data);
return TRUE;
}

View File

@ -6,6 +6,7 @@
#include <stdlib.h>
#include <gio/gio.h>
#include <gio/gfiledescriptorbased.h>
#include <glib/gstdio.h>
#ifdef G_OS_UNIX
#include <sys/stat.h>
#endif
@ -2855,7 +2856,8 @@ test_load_bytes (void)
len = strlen ("test_load_bytes");
ret = write (fd, "test_load_bytes", len);
g_assert_cmpint (ret, ==, len);
close (fd);
g_clear_fd (&fd, &error);
g_assert_no_error (error);
file = g_file_new_for_path (filename);
bytes = g_file_load_bytes (file, NULL, NULL, &error);
@ -2898,6 +2900,7 @@ test_load_bytes_async (void)
{
LoadBytesAsyncData data = { 0 };
gchar filename[] = "g_file_load_bytes_XXXXXX";
GError *error = NULL;
int len;
int fd;
int ret;
@ -2907,7 +2910,8 @@ test_load_bytes_async (void)
len = strlen ("test_load_bytes_async");
ret = write (fd, "test_load_bytes_async", len);
g_assert_cmpint (ret, ==, len);
close (fd);
g_clear_fd (&fd, &error);
g_assert_no_error (error);
data.main_loop = g_main_loop_new (NULL, FALSE);
data.file = g_file_new_for_path (filename);
@ -2924,6 +2928,153 @@ test_load_bytes_async (void)
g_main_loop_unref (data.main_loop);
}
static const gsize testfile_4gb_size = ((gsize) 1 << 32) + (1 << 16); /* 4GB + a bit */
/* @filename will be modified as per g_mkstemp() */
static gboolean
create_testfile_4gb_or_skip (char *filename)
{
GError *error = NULL;
int fd;
int ret;
/* Reading each 4GB test file takes about 5s on a fast machine, and another 7s
* to compare its contents once its been read. Thats too slow for a normal
* test run, and theres no way to speed it up. */
if (!g_test_slow ())
{
g_test_skip ("Skipping slow >4GB file test");
return FALSE;
}
fd = g_mkstemp (filename);
g_assert_cmpint (fd, !=, -1);
ret = ftruncate (fd, testfile_4gb_size);
g_clear_fd (&fd, &error);
g_assert_no_error (error);
if (ret == 1)
{
g_test_skip ("Could not create testfile >4GB");
g_assert_no_errno (g_unlink (filename));
return FALSE;
}
return TRUE;
}
static void
check_testfile_4gb_contents (const char *data,
gsize len)
{
gsize i;
g_assert_nonnull (data);
g_assert_cmpuint (testfile_4gb_size, ==, len);
for (i = 0; i < testfile_4gb_size; i++)
{
if (data[i] != 0)
break;
}
g_assert_cmpint (i, ==, testfile_4gb_size);
}
static void
test_load_contents_4gb (void)
{
char filename[] = "g_file_load_contents_4gb_XXXXXX";
GError *error = NULL;
gboolean result;
char *data;
gsize len;
GFile *file;
if (!create_testfile_4gb_or_skip (filename))
return;
file = g_file_new_for_path (filename);
result = g_file_load_contents (file, NULL, &data, &len, NULL, &error);
g_assert_no_error (error);
g_assert_true (result);
check_testfile_4gb_contents (data, len);
g_file_delete (file, NULL, NULL);
g_free (data);
g_object_unref (file);
}
static void
load_contents_4gb_cb (GObject *object,
GAsyncResult *result,
gpointer user_data)
{
GAsyncResult **result_out = user_data;
g_assert (*result_out == NULL);
*result_out = g_object_ref (result);
g_main_context_wakeup (NULL);
}
static void
test_load_contents_4gb_async (void)
{
char filename[] = "g_file_load_contents_4gb_async_XXXXXX";
GFile *file;
GAsyncResult *async_result = NULL;
GError *error = NULL;
char *data;
gsize len;
gboolean ret;
if (!create_testfile_4gb_or_skip (filename))
return;
file = g_file_new_for_path (filename);
g_file_load_contents_async (file, NULL, load_contents_4gb_cb, &async_result);
while (async_result == NULL)
g_main_context_iteration (NULL, TRUE);
ret = g_file_load_contents_finish (file, async_result, &data, &len, NULL, &error);
g_assert_no_error (error);
g_assert_true (ret);
check_testfile_4gb_contents (data, len);
g_file_delete (file, NULL, NULL);
g_free (data);
g_object_unref (async_result);
g_object_unref (file);
}
static void
test_load_bytes_4gb (void)
{
char filename[] = "g_file_load_bytes_4gb_XXXXXX";
GError *error = NULL;
GBytes *bytes;
GFile *file;
if (!create_testfile_4gb_or_skip (filename))
return;
file = g_file_new_for_path (filename);
bytes = g_file_load_bytes (file, NULL, NULL, &error);
g_assert_no_error (error);
g_assert_true (bytes);
check_testfile_4gb_contents (g_bytes_get_data (bytes, NULL), g_bytes_get_size (bytes));
g_file_delete (file, NULL, NULL);
g_bytes_unref (bytes);
g_object_unref (file);
}
static void
test_writev_helper (GOutputVector *vectors,
gsize n_vectors,
@ -4221,11 +4372,14 @@ main (int argc, char *argv[])
g_test_add_func ("/file/async-make-symlink", test_async_make_symlink);
g_test_add_func ("/file/copy-preserve-mode", test_copy_preserve_mode);
g_test_add_func ("/file/copy/progress", test_copy_progress);
g_test_add_func ("/file/copy-async-with-closurse", test_copy_async_with_closures);
g_test_add_func ("/file/copy-async-with-closures", test_copy_async_with_closures);
g_test_add_func ("/file/measure", test_measure);
g_test_add_func ("/file/measure-async", test_measure_async);
g_test_add_func ("/file/load-bytes", test_load_bytes);
g_test_add_func ("/file/load-bytes-async", test_load_bytes_async);
g_test_add_func ("/file/load-bytes-4gb", test_load_bytes_4gb);
g_test_add_func ("/file/load-contents-4gb", test_load_contents_4gb);
g_test_add_func ("/file/load-contents-4gb-async", test_load_contents_4gb_async);
g_test_add_func ("/file/writev", test_writev);
g_test_add_func ("/file/writev/no-bytes-written", test_writev_no_bytes_written);
g_test_add_func ("/file/writev/no-vectors", test_writev_no_vectors);

View File

@ -1068,6 +1068,7 @@ g_array_maybe_expand (GRealArray *array,
if (want_len > array->elt_capacity)
{
gsize want_alloc = g_nearest_pow (g_array_elt_len (array, want_len));
g_assert (want_alloc >= g_array_elt_len (array, want_len));
want_alloc = MAX (want_alloc, MIN_ARRAY_SIZE);
array->data = g_realloc (array->data, want_alloc);