Merge branch 'th/datalist-shrink' into 'main'

[th/datalist-shrink] shrink the interal buffer of `GData`

See merge request GNOME/glib!3873
This commit is contained in:
Philip Withnall 2024-02-05 15:10:33 +00:00
commit bcc22d48b0
2 changed files with 230 additions and 107 deletions

View File

@ -40,6 +40,7 @@
#include "gslice.h"
#include "gdatasetprivate.h"
#include "gutilsprivate.h"
#include "ghash.h"
#include "gquark.h"
#include "gstrfuncs.h"
@ -164,18 +165,26 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify
GData *d;
d = *data;
if (!d)
{
d = g_malloc (sizeof (GData));
d = g_malloc (G_STRUCT_OFFSET (GData, data) + 2u * sizeof (GDataElt));
d->len = 0;
d->alloc = 1;
d->alloc = 2u;
*data = d;
reallocated = TRUE;
}
else if (d->len == d->alloc)
{
d->alloc = d->alloc * 2u;
#if G_ENABLE_DEBUG
/* d->alloc is always a power of two. It thus overflows the first time
* when going to (G_MAXUINT32+1), or when requesting 2^31+1 elements.
*
* This is not handled, and we just crash. That's because we track the GData
* in a linear list, which horribly degrades long before we add 2 billion entries.
* Don't ever try to do that. */
g_assert (d->alloc > d->len);
#endif
d = g_realloc (d, G_STRUCT_OFFSET (GData, data) + d->alloc * sizeof (GDataElt));
*data = d;
reallocated = TRUE;
@ -193,6 +202,78 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify
return reallocated;
}
static void
datalist_remove (GData *data, guint32 idx)
{
#if G_ENABLE_DEBUG
g_assert (idx < data->len);
#endif
/* g_data_remove_internal() relies on the fact, that this function removes
* the entry similar to g_array_remove_index_fast(). That is, the entries up
* to @idx are left unchanged, and the last entry at moved to position @idx.
* */
data->len--;
if (idx != data->len)
data->data[idx] = data->data[data->len];
}
static gboolean
datalist_shrink (GData **data, GData **d_to_free)
{
guint32 alloc_by_4;
guint32 v;
GData *d;
d = *data;
alloc_by_4 = d->alloc / 4u;
if (G_LIKELY (d->len > alloc_by_4))
{
/* No shrinking */
return FALSE;
}
if (d->len == 0)
{
/* The list became empty. We drop the allocated memory altogether. */
/* The caller will free the buffer after releasing the lock, to minimize
* the time we hold the lock. Transfer it out. */
*d_to_free = d;
*data = NULL;
return TRUE;
}
/* If the buffer is filled not more than 25%. Shrink to double the current length. */
v = d->len;
if (v != alloc_by_4)
{
/* d->alloc is a power of two. Usually, we remove one element at a
* time, then we will just reach reach a quarter of that.
*
* However, with g_datalist_id_remove_multiple(), len can be smaller
* at once. In that case, find first the next power of two. */
v = g_nearest_pow (v);
}
v *= 2u;
#if G_ENABLE_DEBUG
g_assert (v > d->len);
g_assert (v <= d->alloc / 2u);
#endif
d->alloc = v;
d = g_realloc (d, G_STRUCT_OFFSET (GData, data) + (v * sizeof (GDataElt)));
*d_to_free = NULL;
*data = d;
return TRUE;
}
static GDataElt *
datalist_find (GData *data, GQuark key_id, guint32 *out_idx)
{
@ -234,10 +315,15 @@ g_datalist_clear (GData **datalist)
g_return_if_fail (datalist != NULL);
data = g_datalist_lock_and_get (datalist);
if (!data)
{
g_datalist_unlock (datalist);
return;
}
g_datalist_unlock_and_set (datalist, NULL);
if (data)
{
for (i = 0; i < data->len; i++)
{
if (data->data[i].data && data->data[i].destroy)
@ -246,7 +332,6 @@ g_datalist_clear (GData **datalist)
g_free (data);
}
}
/* HOLDS: g_dataset_global_lock */
static inline GDataset*
@ -348,33 +433,26 @@ g_data_set_internal (GData **datalist,
{
if (data)
{
GData *d_to_free;
old = *data;
if (idx != d->len - 1u)
*data = d->data[d->len - 1u];
d->len--;
/* We don't bother to shrink, but if all data are now gone
* we at least free the memory
*/
if (d->len == 0)
datalist_remove (d, idx);
if (datalist_shrink (&d, &d_to_free))
{
/* datalist may be situated in dataset, so must not be
* unlocked when we free it
*/
g_datalist_unlock_and_set (datalist, NULL);
g_free (d);
g_datalist_unlock_and_set (datalist, d);
/* the dataset destruction *must* be done
* prior to invocation of the data destroy function
*/
if (dataset)
if (dataset && !d)
g_dataset_destroy_internal (dataset);
if (d_to_free)
g_free (d_to_free);
}
else
{
g_datalist_unlock (datalist);
}
/* We found and removed an old value
* the GData struct *must* already be unlinked
@ -449,10 +527,10 @@ g_data_remove_internal (GData **datalist,
GData *d;
GDataElt *old;
GDataElt *old_to_free = NULL;
GDataElt *data;
GDataElt *data_end;
GData *d_to_free;
gsize found_keys;
gboolean free_d = FALSE;
gsize i_keys;
guint32 i_data;
d = g_datalist_lock_and_get (datalist);
@ -478,68 +556,50 @@ g_data_remove_internal (GData **datalist,
old = old_to_free;
}
data = d->data;
data_end = data + d->len;
i_data = 0;
found_keys = 0;
while (data < data_end && found_keys < n_keys)
while (i_data < d->len && found_keys < n_keys)
{
GDataElt *data = &d->data[i_data];
gboolean remove = FALSE;
for (gsize i = 0; i < n_keys; i++)
for (i_keys = 0; i_keys < n_keys; i_keys++)
{
if (data->key == keys[i])
if (data->key == keys[i_keys])
{
old[i] = *data;
/* We must invoke the destroy notifications in the order of @keys.
* Hence, build up the list @old at index @i_keys. */
old[i_keys] = *data;
found_keys++;
remove = TRUE;
break;
}
}
if (remove)
if (!remove)
{
GDataElt *data_last = data_end - 1;
found_keys++;
if (data < data_last)
*data = *data_last;
data_end--;
d->len--;
/* We don't bother to shrink, but if all data are now gone
* we at least free the memory
*/
if (d->len == 0)
{
free_d = TRUE;
break;
}
}
else
{
data++;
}
i_data++;
continue;
}
if (free_d)
datalist_remove (d, i_data);
}
if (found_keys > 0 && datalist_shrink (&d, &d_to_free))
{
g_datalist_unlock_and_set (datalist, NULL);
g_free (d);
g_datalist_unlock_and_set (datalist, d);
if (d_to_free)
g_free (d_to_free);
}
else
g_datalist_unlock (datalist);
if (found_keys > 0)
{
for (gsize i = 0; i < n_keys; i++)
for (i_keys = 0; i_keys < n_keys; i_keys++)
{
/* If keys[i] was not found, then old[i].destroy is NULL.
* Call old[i].destroy() only if keys[i] was found, and
* is associated with a destroy notifier: */
if (old[i].destroy)
old[i].destroy (old[i].data);
if (old[i_keys].destroy)
old[i_keys].destroy (old[i_keys].data);
}
}
@ -736,13 +796,17 @@ g_datalist_id_set_data_full (GData **datalist,
* g_datalist_id_remove_multiple:
* @datalist: a datalist
* @keys: (array length=n_keys): keys to remove
* @n_keys: length of @keys, must be <= 16
* @n_keys: length of @keys.
*
* Removes multiple keys from a datalist.
*
* This is more efficient than calling g_datalist_id_remove_data()
* multiple times in a row.
*
* Before 2.80, @n_keys had to be not larger than 16. Now it can be larger, but
* note that GData does a linear search, so an excessive number of keys will
* perform badly.
*
* Since: 2.74
*/
void
@ -750,8 +814,6 @@ g_datalist_id_remove_multiple (GData **datalist,
GQuark *keys,
gsize n_keys)
{
g_return_if_fail (n_keys <= 16);
g_data_remove_internal (datalist, keys, n_keys);
}
@ -890,25 +952,19 @@ g_datalist_id_update_atomic (GData **datalist,
if (data && !new_data)
{
GData *d_to_free;
/* Remove. The callback indicates to drop the entry.
*
* The old data->data was stolen by callback(). */
d->len--;
/* We don't bother to shrink, but if all data are now gone
* we at least free the memory
*/
if (d->len == 0)
datalist_remove (d, idx);
if (datalist_shrink (&d, &d_to_free))
{
g_datalist_unlock_and_set (datalist, NULL);
g_free (d);
g_datalist_unlock_and_set (datalist, d);
if (d_to_free)
g_free (d_to_free);
to_unlock = FALSE;
}
else
{
if (idx != d->len)
*data = d->data[d->len];
}
}
else if (data)
{
@ -1106,10 +1162,9 @@ g_datalist_id_replace_data (GData **datalist,
{
gpointer val = NULL;
GData *d;
GData *new_d = NULL;
GDataElt *data;
gboolean free_d = FALSE;
gboolean set_new_d = FALSE;
GData *d_to_free = NULL;
gboolean set_d = FALSE;
guint32 idx;
g_return_val_if_fail (datalist != NULL, FALSE);
@ -1135,18 +1190,9 @@ g_datalist_id_replace_data (GData **datalist,
}
else
{
if (idx != d->len - 1u)
*data = d->data[d->len - 1u];
d->len--;
/* We don't bother to shrink, but if all data are now gone
* we at least free the memory
*/
if (d->len == 0)
{
set_new_d = TRUE;
free_d = TRUE;
}
datalist_remove (d, idx);
if (datalist_shrink (&d, &d_to_free))
set_d = TRUE;
}
}
}
@ -1155,18 +1201,17 @@ g_datalist_id_replace_data (GData **datalist,
{
if (datalist_append (&d, key_id, newval, destroy))
{
new_d = d;
set_new_d = TRUE;
set_d = TRUE;
}
}
if (set_new_d)
g_datalist_unlock_and_set (datalist, new_d);
if (set_d)
g_datalist_unlock_and_set (datalist, d);
else
g_datalist_unlock (datalist);
if (free_d)
g_free (d);
if (d_to_free)
g_free (d_to_free);
return val == oldval;
}

View File

@ -285,6 +285,83 @@ test_datalist_id_remove_multiple (void)
g_assert_cmpint (destroy_count, ==, 0);
}
static void
test_datalist_id_remove_multiple_resize (void)
{
GQuark *quarks;
GQuark *quarks2;
const guint N = 1000;
const guint PRIME = 1048583u;
guint i;
char sbuf[100];
GData *list = NULL;
guint i_run;
quarks = g_new (GQuark, N);
quarks2 = g_new (GQuark, N);
for (i = 0; i < N; i++)
{
g_snprintf (sbuf, sizeof (sbuf), "%d", i);
quarks[i] = g_quark_from_string (sbuf);
}
for (i = 0; i < N; i++)
g_datalist_id_set_data (&list, quarks[i], GINT_TO_POINTER (i));
/* Now we perform a list of random operations (remove/add quarks). */
for (i_run = 0; TRUE; i_run++)
{
int MODE = ((guint) g_test_rand_int ()) % 4;
guint n;
guint j;
n = ((guint) g_test_rand_int ()) % (N + 1);
j = ((guint) g_test_rand_int ()) % N;
if (i_run > 20)
{
/* After a few runs, we only remove elements, until the list
* is empty. */
if (!list)
break;
MODE = 0;
if (i_run > 30)
n = N;
}
switch (MODE)
{
case 0:
case 1:
case 2:
/* Mode: add or remove a number of random quarks. */
for (i = 0; i < n; i++)
{
j = (j + PRIME) % N;
if (MODE == 0)
g_datalist_id_remove_data (&list, quarks[j]);
else
g_datalist_id_set_data (&list, quarks[j], GINT_TO_POINTER (j));
}
break;
case 3:
/* Mode: remove a list of (random) quarks. */
for (i = 0; i < n; i++)
{
j = (j + PRIME) % N;
quarks2[i] = quarks[j];
}
g_datalist_id_remove_multiple (&list, quarks2, n);
break;
}
}
g_free (quarks);
g_free (quarks2);
}
static void
destroy_func (gpointer data)
{
@ -389,6 +466,7 @@ main (int argc, char *argv[])
g_test_add_func ("/datalist/id", test_datalist_id);
g_test_add_func ("/datalist/recursive-clear", test_datalist_clear);
g_test_add_func ("/datalist/id-remove-multiple", test_datalist_id_remove_multiple);
g_test_add_func ("/datalist/id-remove-multiple/resize", test_datalist_id_remove_multiple_resize);
g_test_add_func ("/datalist/id-remove-multiple-destroy-order",
test_datalist_id_remove_multiple_destroy_order);
g_test_add_func ("/datalist/update-atomic", test_datalist_update_atomic);