Merge branch 'th/datalist-shrink' into 'main'

[th/datalist-shrink] shrink the interal buffer of `GData`

See merge request GNOME/glib!3873
This commit is contained in:
Philip Withnall 2024-02-05 15:10:33 +00:00
commit bcc22d48b0
2 changed files with 230 additions and 107 deletions

View File

@ -40,6 +40,7 @@
#include "gslice.h" #include "gslice.h"
#include "gdatasetprivate.h" #include "gdatasetprivate.h"
#include "gutilsprivate.h"
#include "ghash.h" #include "ghash.h"
#include "gquark.h" #include "gquark.h"
#include "gstrfuncs.h" #include "gstrfuncs.h"
@ -164,18 +165,26 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify
GData *d; GData *d;
d = *data; d = *data;
if (!d) if (!d)
{ {
d = g_malloc (sizeof (GData)); d = g_malloc (G_STRUCT_OFFSET (GData, data) + 2u * sizeof (GDataElt));
d->len = 0; d->len = 0;
d->alloc = 1; d->alloc = 2u;
*data = d; *data = d;
reallocated = TRUE; reallocated = TRUE;
} }
else if (d->len == d->alloc) else if (d->len == d->alloc)
{ {
d->alloc = d->alloc * 2u; d->alloc = d->alloc * 2u;
#if G_ENABLE_DEBUG
/* d->alloc is always a power of two. It thus overflows the first time
* when going to (G_MAXUINT32+1), or when requesting 2^31+1 elements.
*
* This is not handled, and we just crash. That's because we track the GData
* in a linear list, which horribly degrades long before we add 2 billion entries.
* Don't ever try to do that. */
g_assert (d->alloc > d->len);
#endif
d = g_realloc (d, G_STRUCT_OFFSET (GData, data) + d->alloc * sizeof (GDataElt)); d = g_realloc (d, G_STRUCT_OFFSET (GData, data) + d->alloc * sizeof (GDataElt));
*data = d; *data = d;
reallocated = TRUE; reallocated = TRUE;
@ -193,6 +202,78 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify
return reallocated; return reallocated;
} }
static void
datalist_remove (GData *data, guint32 idx)
{
#if G_ENABLE_DEBUG
g_assert (idx < data->len);
#endif
/* g_data_remove_internal() relies on the fact, that this function removes
* the entry similar to g_array_remove_index_fast(). That is, the entries up
* to @idx are left unchanged, and the last entry at moved to position @idx.
* */
data->len--;
if (idx != data->len)
data->data[idx] = data->data[data->len];
}
static gboolean
datalist_shrink (GData **data, GData **d_to_free)
{
guint32 alloc_by_4;
guint32 v;
GData *d;
d = *data;
alloc_by_4 = d->alloc / 4u;
if (G_LIKELY (d->len > alloc_by_4))
{
/* No shrinking */
return FALSE;
}
if (d->len == 0)
{
/* The list became empty. We drop the allocated memory altogether. */
/* The caller will free the buffer after releasing the lock, to minimize
* the time we hold the lock. Transfer it out. */
*d_to_free = d;
*data = NULL;
return TRUE;
}
/* If the buffer is filled not more than 25%. Shrink to double the current length. */
v = d->len;
if (v != alloc_by_4)
{
/* d->alloc is a power of two. Usually, we remove one element at a
* time, then we will just reach reach a quarter of that.
*
* However, with g_datalist_id_remove_multiple(), len can be smaller
* at once. In that case, find first the next power of two. */
v = g_nearest_pow (v);
}
v *= 2u;
#if G_ENABLE_DEBUG
g_assert (v > d->len);
g_assert (v <= d->alloc / 2u);
#endif
d->alloc = v;
d = g_realloc (d, G_STRUCT_OFFSET (GData, data) + (v * sizeof (GDataElt)));
*d_to_free = NULL;
*data = d;
return TRUE;
}
static GDataElt * static GDataElt *
datalist_find (GData *data, GQuark key_id, guint32 *out_idx) datalist_find (GData *data, GQuark key_id, guint32 *out_idx)
{ {
@ -234,10 +315,15 @@ g_datalist_clear (GData **datalist)
g_return_if_fail (datalist != NULL); g_return_if_fail (datalist != NULL);
data = g_datalist_lock_and_get (datalist); data = g_datalist_lock_and_get (datalist);
if (!data)
{
g_datalist_unlock (datalist);
return;
}
g_datalist_unlock_and_set (datalist, NULL); g_datalist_unlock_and_set (datalist, NULL);
if (data)
{
for (i = 0; i < data->len; i++) for (i = 0; i < data->len; i++)
{ {
if (data->data[i].data && data->data[i].destroy) if (data->data[i].data && data->data[i].destroy)
@ -246,7 +332,6 @@ g_datalist_clear (GData **datalist)
g_free (data); g_free (data);
} }
}
/* HOLDS: g_dataset_global_lock */ /* HOLDS: g_dataset_global_lock */
static inline GDataset* static inline GDataset*
@ -348,33 +433,26 @@ g_data_set_internal (GData **datalist,
{ {
if (data) if (data)
{ {
GData *d_to_free;
old = *data; old = *data;
if (idx != d->len - 1u)
*data = d->data[d->len - 1u];
d->len--;
/* We don't bother to shrink, but if all data are now gone datalist_remove (d, idx);
* we at least free the memory if (datalist_shrink (&d, &d_to_free))
*/
if (d->len == 0)
{ {
/* datalist may be situated in dataset, so must not be g_datalist_unlock_and_set (datalist, d);
* unlocked when we free it
*/
g_datalist_unlock_and_set (datalist, NULL);
g_free (d);
/* the dataset destruction *must* be done /* the dataset destruction *must* be done
* prior to invocation of the data destroy function * prior to invocation of the data destroy function
*/ */
if (dataset) if (dataset && !d)
g_dataset_destroy_internal (dataset); g_dataset_destroy_internal (dataset);
if (d_to_free)
g_free (d_to_free);
} }
else else
{
g_datalist_unlock (datalist); g_datalist_unlock (datalist);
}
/* We found and removed an old value /* We found and removed an old value
* the GData struct *must* already be unlinked * the GData struct *must* already be unlinked
@ -449,10 +527,10 @@ g_data_remove_internal (GData **datalist,
GData *d; GData *d;
GDataElt *old; GDataElt *old;
GDataElt *old_to_free = NULL; GDataElt *old_to_free = NULL;
GDataElt *data; GData *d_to_free;
GDataElt *data_end;
gsize found_keys; gsize found_keys;
gboolean free_d = FALSE; gsize i_keys;
guint32 i_data;
d = g_datalist_lock_and_get (datalist); d = g_datalist_lock_and_get (datalist);
@ -478,68 +556,50 @@ g_data_remove_internal (GData **datalist,
old = old_to_free; old = old_to_free;
} }
data = d->data; i_data = 0;
data_end = data + d->len;
found_keys = 0; found_keys = 0;
while (i_data < d->len && found_keys < n_keys)
while (data < data_end && found_keys < n_keys)
{ {
GDataElt *data = &d->data[i_data];
gboolean remove = FALSE; gboolean remove = FALSE;
for (gsize i = 0; i < n_keys; i++) for (i_keys = 0; i_keys < n_keys; i_keys++)
{ {
if (data->key == keys[i]) if (data->key == keys[i_keys])
{ {
old[i] = *data; /* We must invoke the destroy notifications in the order of @keys.
* Hence, build up the list @old at index @i_keys. */
old[i_keys] = *data;
found_keys++;
remove = TRUE; remove = TRUE;
break; break;
} }
} }
if (remove) if (!remove)
{ {
GDataElt *data_last = data_end - 1; i_data++;
continue;
found_keys++;
if (data < data_last)
*data = *data_last;
data_end--;
d->len--;
/* We don't bother to shrink, but if all data are now gone
* we at least free the memory
*/
if (d->len == 0)
{
free_d = TRUE;
break;
}
}
else
{
data++;
}
} }
if (free_d) datalist_remove (d, i_data);
}
if (found_keys > 0 && datalist_shrink (&d, &d_to_free))
{ {
g_datalist_unlock_and_set (datalist, NULL); g_datalist_unlock_and_set (datalist, d);
g_free (d); if (d_to_free)
g_free (d_to_free);
} }
else else
g_datalist_unlock (datalist); g_datalist_unlock (datalist);
if (found_keys > 0) if (found_keys > 0)
{ {
for (gsize i = 0; i < n_keys; i++) for (i_keys = 0; i_keys < n_keys; i_keys++)
{ {
/* If keys[i] was not found, then old[i].destroy is NULL. if (old[i_keys].destroy)
* Call old[i].destroy() only if keys[i] was found, and old[i_keys].destroy (old[i_keys].data);
* is associated with a destroy notifier: */
if (old[i].destroy)
old[i].destroy (old[i].data);
} }
} }
@ -736,13 +796,17 @@ g_datalist_id_set_data_full (GData **datalist,
* g_datalist_id_remove_multiple: * g_datalist_id_remove_multiple:
* @datalist: a datalist * @datalist: a datalist
* @keys: (array length=n_keys): keys to remove * @keys: (array length=n_keys): keys to remove
* @n_keys: length of @keys, must be <= 16 * @n_keys: length of @keys.
* *
* Removes multiple keys from a datalist. * Removes multiple keys from a datalist.
* *
* This is more efficient than calling g_datalist_id_remove_data() * This is more efficient than calling g_datalist_id_remove_data()
* multiple times in a row. * multiple times in a row.
* *
* Before 2.80, @n_keys had to be not larger than 16. Now it can be larger, but
* note that GData does a linear search, so an excessive number of keys will
* perform badly.
*
* Since: 2.74 * Since: 2.74
*/ */
void void
@ -750,8 +814,6 @@ g_datalist_id_remove_multiple (GData **datalist,
GQuark *keys, GQuark *keys,
gsize n_keys) gsize n_keys)
{ {
g_return_if_fail (n_keys <= 16);
g_data_remove_internal (datalist, keys, n_keys); g_data_remove_internal (datalist, keys, n_keys);
} }
@ -890,25 +952,19 @@ g_datalist_id_update_atomic (GData **datalist,
if (data && !new_data) if (data && !new_data)
{ {
GData *d_to_free;
/* Remove. The callback indicates to drop the entry. /* Remove. The callback indicates to drop the entry.
* *
* The old data->data was stolen by callback(). */ * The old data->data was stolen by callback(). */
d->len--; datalist_remove (d, idx);
if (datalist_shrink (&d, &d_to_free))
/* We don't bother to shrink, but if all data are now gone
* we at least free the memory
*/
if (d->len == 0)
{ {
g_datalist_unlock_and_set (datalist, NULL); g_datalist_unlock_and_set (datalist, d);
g_free (d); if (d_to_free)
g_free (d_to_free);
to_unlock = FALSE; to_unlock = FALSE;
} }
else
{
if (idx != d->len)
*data = d->data[d->len];
}
} }
else if (data) else if (data)
{ {
@ -1106,10 +1162,9 @@ g_datalist_id_replace_data (GData **datalist,
{ {
gpointer val = NULL; gpointer val = NULL;
GData *d; GData *d;
GData *new_d = NULL;
GDataElt *data; GDataElt *data;
gboolean free_d = FALSE; GData *d_to_free = NULL;
gboolean set_new_d = FALSE; gboolean set_d = FALSE;
guint32 idx; guint32 idx;
g_return_val_if_fail (datalist != NULL, FALSE); g_return_val_if_fail (datalist != NULL, FALSE);
@ -1135,18 +1190,9 @@ g_datalist_id_replace_data (GData **datalist,
} }
else else
{ {
if (idx != d->len - 1u) datalist_remove (d, idx);
*data = d->data[d->len - 1u]; if (datalist_shrink (&d, &d_to_free))
d->len--; set_d = TRUE;
/* We don't bother to shrink, but if all data are now gone
* we at least free the memory
*/
if (d->len == 0)
{
set_new_d = TRUE;
free_d = TRUE;
}
} }
} }
} }
@ -1155,18 +1201,17 @@ g_datalist_id_replace_data (GData **datalist,
{ {
if (datalist_append (&d, key_id, newval, destroy)) if (datalist_append (&d, key_id, newval, destroy))
{ {
new_d = d; set_d = TRUE;
set_new_d = TRUE;
} }
} }
if (set_new_d) if (set_d)
g_datalist_unlock_and_set (datalist, new_d); g_datalist_unlock_and_set (datalist, d);
else else
g_datalist_unlock (datalist); g_datalist_unlock (datalist);
if (free_d) if (d_to_free)
g_free (d); g_free (d_to_free);
return val == oldval; return val == oldval;
} }

View File

@ -285,6 +285,83 @@ test_datalist_id_remove_multiple (void)
g_assert_cmpint (destroy_count, ==, 0); g_assert_cmpint (destroy_count, ==, 0);
} }
static void
test_datalist_id_remove_multiple_resize (void)
{
GQuark *quarks;
GQuark *quarks2;
const guint N = 1000;
const guint PRIME = 1048583u;
guint i;
char sbuf[100];
GData *list = NULL;
guint i_run;
quarks = g_new (GQuark, N);
quarks2 = g_new (GQuark, N);
for (i = 0; i < N; i++)
{
g_snprintf (sbuf, sizeof (sbuf), "%d", i);
quarks[i] = g_quark_from_string (sbuf);
}
for (i = 0; i < N; i++)
g_datalist_id_set_data (&list, quarks[i], GINT_TO_POINTER (i));
/* Now we perform a list of random operations (remove/add quarks). */
for (i_run = 0; TRUE; i_run++)
{
int MODE = ((guint) g_test_rand_int ()) % 4;
guint n;
guint j;
n = ((guint) g_test_rand_int ()) % (N + 1);
j = ((guint) g_test_rand_int ()) % N;
if (i_run > 20)
{
/* After a few runs, we only remove elements, until the list
* is empty. */
if (!list)
break;
MODE = 0;
if (i_run > 30)
n = N;
}
switch (MODE)
{
case 0:
case 1:
case 2:
/* Mode: add or remove a number of random quarks. */
for (i = 0; i < n; i++)
{
j = (j + PRIME) % N;
if (MODE == 0)
g_datalist_id_remove_data (&list, quarks[j]);
else
g_datalist_id_set_data (&list, quarks[j], GINT_TO_POINTER (j));
}
break;
case 3:
/* Mode: remove a list of (random) quarks. */
for (i = 0; i < n; i++)
{
j = (j + PRIME) % N;
quarks2[i] = quarks[j];
}
g_datalist_id_remove_multiple (&list, quarks2, n);
break;
}
}
g_free (quarks);
g_free (quarks2);
}
static void static void
destroy_func (gpointer data) destroy_func (gpointer data)
{ {
@ -389,6 +466,7 @@ main (int argc, char *argv[])
g_test_add_func ("/datalist/id", test_datalist_id); g_test_add_func ("/datalist/id", test_datalist_id);
g_test_add_func ("/datalist/recursive-clear", test_datalist_clear); g_test_add_func ("/datalist/recursive-clear", test_datalist_clear);
g_test_add_func ("/datalist/id-remove-multiple", test_datalist_id_remove_multiple); g_test_add_func ("/datalist/id-remove-multiple", test_datalist_id_remove_multiple);
g_test_add_func ("/datalist/id-remove-multiple/resize", test_datalist_id_remove_multiple_resize);
g_test_add_func ("/datalist/id-remove-multiple-destroy-order", g_test_add_func ("/datalist/id-remove-multiple-destroy-order",
test_datalist_id_remove_multiple_destroy_order); test_datalist_id_remove_multiple_destroy_order);
g_test_add_func ("/datalist/update-atomic", test_datalist_update_atomic); g_test_add_func ("/datalist/update-atomic", test_datalist_update_atomic);