From 48a1d8c695594736d166ed7272049cdeae0d57e9 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 2 Feb 2024 18:52:01 +0100 Subject: [PATCH 1/6] dataset/tests: add test adding many queue data and remove them --- glib/tests/dataset.c | 79 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/glib/tests/dataset.c b/glib/tests/dataset.c index 9226b5132..190b4ff99 100644 --- a/glib/tests/dataset.c +++ b/glib/tests/dataset.c @@ -285,6 +285,84 @@ test_datalist_id_remove_multiple (void) g_assert_cmpint (destroy_count, ==, 0); } +static void +test_datalist_id_remove_multiple_resize (void) +{ + GQuark *quarks; + GQuark *quarks2; + const guint N = 1000; + const guint PRIME = 1048583u; + guint i; + char sbuf[100]; + GData *list = NULL; + guint i_run; + + quarks = g_new (GQuark, N); + quarks2 = g_new (GQuark, N); + + for (i = 0; i < N; i++) + { + g_snprintf (sbuf, sizeof (sbuf), "%d", i); + quarks[i] = g_quark_from_string (sbuf); + } + + for (i = 0; i < N; i++) + g_datalist_id_set_data (&list, quarks[i], GINT_TO_POINTER (i)); + + /* Now we perform a list of random operations (remove/add quarks). */ + for (i_run = 0; TRUE; i_run++) + { + int MODE = ((guint) g_test_rand_int ()) % 4; + guint n; + guint j; + + n = ((guint) g_test_rand_int ()) % (N + 1); + j = ((guint) g_test_rand_int ()) % N; + + if (i_run > 20) + { + /* After a few runs, we only remove elements, until the list + * is empty. */ + if (!list) + break; + MODE = 0; + if (i_run > 30) + n = N; + } + + switch (MODE) + { + case 0: + case 1: + case 2: + /* Mode: add or remove a number of random quarks. */ + for (i = 0; i < n; i++) + { + j = (j + PRIME) % N; + if (MODE == 0) + g_datalist_id_remove_data (&list, quarks[j]); + else + g_datalist_id_set_data (&list, quarks[j], GINT_TO_POINTER (j)); + } + break; + case 3: + /* Mode: remove a list of (random) quarks. */ + n = n % 16; + for (i = 0; i < n; i++) + { + j = (j + PRIME) % N; + quarks2[i] = quarks[j]; + } + + g_datalist_id_remove_multiple (&list, quarks2, n); + break; + } + } + + g_free (quarks); + g_free (quarks2); +} + static void destroy_func (gpointer data) { @@ -389,6 +467,7 @@ main (int argc, char *argv[]) g_test_add_func ("/datalist/id", test_datalist_id); g_test_add_func ("/datalist/recursive-clear", test_datalist_clear); g_test_add_func ("/datalist/id-remove-multiple", test_datalist_id_remove_multiple); + g_test_add_func ("/datalist/id-remove-multiple/resize", test_datalist_id_remove_multiple_resize); g_test_add_func ("/datalist/id-remove-multiple-destroy-order", test_datalist_id_remove_multiple_destroy_order); g_test_add_func ("/datalist/update-atomic", test_datalist_update_atomic); From 759ebf3663ba069c64f4cb04aae8e49f61c1c0ab Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 2 Feb 2024 19:52:55 +0100 Subject: [PATCH 2/6] gdatalist: remove restriction of number of keys in g_datalist_id_remove_multiple() If too many keys are requested, they temporary buffer is allocated on the heap. There is no problem in principle, to remove more than 16 keys. Well, the problem is that GData tracks entries in a linear list, so performance will degrade when it grows too much. That is a problem, and users should be careful to not add unreasonably many keys. But it's not the task of g_datalist_id_remove_multiple() to decide what is reasonable. This limitation was present from the beginning, in commit 0415bf94127c ('Add g_datalist_id_remove_multiple'). It's no longer necessary since commit eada6be364b4 ('gdataset: cleanup g_data_remove_internal()'). --- glib/gdataset.c | 8 +++++--- glib/tests/dataset.c | 1 - 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index cebca8fb6..1792a76bd 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -736,13 +736,17 @@ g_datalist_id_set_data_full (GData **datalist, * g_datalist_id_remove_multiple: * @datalist: a datalist * @keys: (array length=n_keys): keys to remove - * @n_keys: length of @keys, must be <= 16 + * @n_keys: length of @keys. * * Removes multiple keys from a datalist. * * This is more efficient than calling g_datalist_id_remove_data() * multiple times in a row. * + * Before 2.80, @n_keys had to be not larger than 16. Now it can be larger, but + * note that GData does a linear search, so an excessive number of keys will + * perform badly. + * * Since: 2.74 */ void @@ -750,8 +754,6 @@ g_datalist_id_remove_multiple (GData **datalist, GQuark *keys, gsize n_keys) { - g_return_if_fail (n_keys <= 16); - g_data_remove_internal (datalist, keys, n_keys); } diff --git a/glib/tests/dataset.c b/glib/tests/dataset.c index 190b4ff99..8c479857a 100644 --- a/glib/tests/dataset.c +++ b/glib/tests/dataset.c @@ -347,7 +347,6 @@ test_datalist_id_remove_multiple_resize (void) break; case 3: /* Mode: remove a list of (random) quarks. */ - n = n % 16; for (i = 0; i < n; i++) { j = (j + PRIME) % N; From dbae6b3484daf58233ed5eae97ac21b62a44b350 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 2 Feb 2024 18:17:33 +0100 Subject: [PATCH 3/6] gdatalist: rework g_datalist_clear() to return early g_datalist_unlock() is probably faster than g_datalist_unlock_and_set(). Move the "if (data)" check (that we anyway had) earlier, so we can call g_datalist_unlock() and return early. --- glib/gdataset.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index 1792a76bd..b10983a6d 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -234,18 +234,22 @@ g_datalist_clear (GData **datalist) g_return_if_fail (datalist != NULL); data = g_datalist_lock_and_get (datalist); + + if (!data) + { + g_datalist_unlock (datalist); + return; + } + g_datalist_unlock_and_set (datalist, NULL); - if (data) + for (i = 0; i < data->len; i++) { - for (i = 0; i < data->len; i++) - { - if (data->data[i].data && data->data[i].destroy) - data->data[i].destroy (data->data[i].data); - } - - g_free (data); + if (data->data[i].data && data->data[i].destroy) + data->data[i].destroy (data->data[i].data); } + + g_free (data); } /* HOLDS: g_dataset_global_lock */ From 927075277c74a47a47da8bcb71e3de88f0e20bd9 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 2 Feb 2024 16:43:56 +0100 Subject: [PATCH 4/6] gdatalist: extract helper function for removing element Extract helper functions datalist_remove() and datalist_shrink(). This is to reduce duplicate code, but also to have a default way how to do this. In particular, later datalist_shrink() might do more aggressive shrinking. We need to have that code in one place. --- glib/gdataset.c | 113 ++++++++++++++++++++++++++---------------------- 1 file changed, 62 insertions(+), 51 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index b10983a6d..53d8ee96e 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -193,6 +193,41 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify return reallocated; } +static void +datalist_remove (GData *data, guint32 idx) +{ +#if G_ENABLE_DEBUG + g_assert (idx < data->len); +#endif + + data->len--; + + if (idx != data->len) + data->data[idx] = data->data[data->len]; +} + +static gboolean +datalist_shrink (GData **data, GData **d_to_free) +{ + GData *d; + + d = *data; + + if (d->len == 0) + { + /* The list became empty. We drop the allocated memory altogether. */ + + /* The caller will free the buffer after releasing the lock, to minimize + * the time we hold the lock. Transfer it out. */ + *d_to_free = d; + *data = NULL; + return TRUE; + } + + /* not reallocated. */ + return FALSE; +} + static GDataElt * datalist_find (GData *data, GQuark key_id, guint32 *out_idx) { @@ -352,33 +387,26 @@ g_data_set_internal (GData **datalist, { if (data) { + GData *d_to_free; + old = *data; - if (idx != d->len - 1u) - *data = d->data[d->len - 1u]; - d->len--; - /* We don't bother to shrink, but if all data are now gone - * we at least free the memory - */ - if (d->len == 0) + datalist_remove (d, idx); + if (datalist_shrink (&d, &d_to_free)) { - /* datalist may be situated in dataset, so must not be - * unlocked when we free it - */ - g_datalist_unlock_and_set (datalist, NULL); - - g_free (d); + g_datalist_unlock_and_set (datalist, d); /* the dataset destruction *must* be done * prior to invocation of the data destroy function */ - if (dataset) + if (dataset && !d) g_dataset_destroy_internal (dataset); + + if (d_to_free) + g_free (d_to_free); } else - { - g_datalist_unlock (datalist); - } + g_datalist_unlock (datalist); /* We found and removed an old value * the GData struct *must* already be unlinked @@ -896,25 +924,19 @@ g_datalist_id_update_atomic (GData **datalist, if (data && !new_data) { + GData *d_to_free; + /* Remove. The callback indicates to drop the entry. * * The old data->data was stolen by callback(). */ - d->len--; - - /* We don't bother to shrink, but if all data are now gone - * we at least free the memory - */ - if (d->len == 0) + datalist_remove (d, idx); + if (datalist_shrink (&d, &d_to_free)) { - g_datalist_unlock_and_set (datalist, NULL); - g_free (d); + g_datalist_unlock_and_set (datalist, d); + if (d_to_free) + g_free (d_to_free); to_unlock = FALSE; } - else - { - if (idx != d->len) - *data = d->data[d->len]; - } } else if (data) { @@ -1112,10 +1134,9 @@ g_datalist_id_replace_data (GData **datalist, { gpointer val = NULL; GData *d; - GData *new_d = NULL; GDataElt *data; - gboolean free_d = FALSE; - gboolean set_new_d = FALSE; + GData *d_to_free = NULL; + gboolean set_d = FALSE; guint32 idx; g_return_val_if_fail (datalist != NULL, FALSE); @@ -1141,18 +1162,9 @@ g_datalist_id_replace_data (GData **datalist, } else { - if (idx != d->len - 1u) - *data = d->data[d->len - 1u]; - d->len--; - - /* We don't bother to shrink, but if all data are now gone - * we at least free the memory - */ - if (d->len == 0) - { - set_new_d = TRUE; - free_d = TRUE; - } + datalist_remove (d, idx); + if (datalist_shrink (&d, &d_to_free)) + set_d = TRUE; } } } @@ -1161,18 +1173,17 @@ g_datalist_id_replace_data (GData **datalist, { if (datalist_append (&d, key_id, newval, destroy)) { - new_d = d; - set_new_d = TRUE; + set_d = TRUE; } } - if (set_new_d) - g_datalist_unlock_and_set (datalist, new_d); + if (set_d) + g_datalist_unlock_and_set (datalist, d); else g_datalist_unlock (datalist); - if (free_d) - g_free (d); + if (d_to_free) + g_free (d_to_free); return val == oldval; } From 6c0d4c884f006f506fdf281b3b70241182c0a681 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 2 Feb 2024 17:45:31 +0100 Subject: [PATCH 5/6] gdatalist: rework g_data_remove_internal() to use datalist_shrink() The main point here is to reuse datalist_remove() and datalist_shrink(). Especially, datalist_shrink() will become more interesting next, when it actually shrinks the buffer. Also, I find the previous implementation with "data_end" confusing. Instead, only use index "i_data" to iterate over the data. --- glib/gdataset.c | 71 ++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 42 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index 53d8ee96e..d35f2b9a6 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -200,6 +200,11 @@ datalist_remove (GData *data, guint32 idx) g_assert (idx < data->len); #endif + /* g_data_remove_internal() relies on the fact, that this function removes + * the entry similar to g_array_remove_index_fast(). That is, the entries up + * to @idx are left unchanged, and the last entry at moved to position @idx. + * */ + data->len--; if (idx != data->len) @@ -481,10 +486,10 @@ g_data_remove_internal (GData **datalist, GData *d; GDataElt *old; GDataElt *old_to_free = NULL; - GDataElt *data; - GDataElt *data_end; + GData *d_to_free; gsize found_keys; - gboolean free_d = FALSE; + gsize i_keys; + guint32 i_data; d = g_datalist_lock_and_get (datalist); @@ -510,68 +515,50 @@ g_data_remove_internal (GData **datalist, old = old_to_free; } - data = d->data; - data_end = data + d->len; + i_data = 0; found_keys = 0; - - while (data < data_end && found_keys < n_keys) + while (i_data < d->len && found_keys < n_keys) { + GDataElt *data = &d->data[i_data]; gboolean remove = FALSE; - for (gsize i = 0; i < n_keys; i++) + for (i_keys = 0; i_keys < n_keys; i_keys++) { - if (data->key == keys[i]) + if (data->key == keys[i_keys]) { - old[i] = *data; + /* We must invoke the destroy notifications in the order of @keys. + * Hence, build up the list @old at index @i_keys. */ + old[i_keys] = *data; + found_keys++; remove = TRUE; break; } } - if (remove) + if (!remove) { - GDataElt *data_last = data_end - 1; - - found_keys++; - - if (data < data_last) - *data = *data_last; - - data_end--; - d->len--; - - /* We don't bother to shrink, but if all data are now gone - * we at least free the memory - */ - if (d->len == 0) - { - free_d = TRUE; - break; - } - } - else - { - data++; + i_data++; + continue; } + + datalist_remove (d, i_data); } - if (free_d) + if (found_keys > 0 && datalist_shrink (&d, &d_to_free)) { - g_datalist_unlock_and_set (datalist, NULL); - g_free (d); + g_datalist_unlock_and_set (datalist, d); + if (d_to_free) + g_free (d_to_free); } else g_datalist_unlock (datalist); if (found_keys > 0) { - for (gsize i = 0; i < n_keys; i++) + for (i_keys = 0; i_keys < n_keys; i_keys++) { - /* If keys[i] was not found, then old[i].destroy is NULL. - * Call old[i].destroy() only if keys[i] was found, and - * is associated with a destroy notifier: */ - if (old[i].destroy) - old[i].destroy (old[i].data); + if (old[i_keys].destroy) + old[i_keys].destroy (old[i_keys].data); } } From 29314690c75cdbd538091845feb580bfe7a3ed5f Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 2 Feb 2024 18:25:51 +0100 Subject: [PATCH 6/6] gdatalist: shrink the buffer when it becomes 75% empty The amount of used memory should stay in relation to the number of entries we have. If we delete most (75%) of the entries, let's also reallocate the buffer down to 50% of its size. datalist_append() now starts with 2 elements. This works together with the shrinking. If we only have one entry left, we will shrink the buffer back to size 2. In general, d->alloc is always a power of two (unless it overflows after G_MAXUINT32/2, which we assume will never happen). The previous buffer growth strategy of never shrinking is not necessarily bad. It has the advantage to not require any checks for shrinking, and it works well in cases where the amount of data actually does not shrink (as we'd often expect). Also, it's questionable what a realloc() to a smaller size really brings. Is that really gonna help and will the allocator do something useful? Anyway. This patch introduces shrinking. The check for whether to shrink changes from `if (d->len == 0)` to `if (d->len <= d->alloc / 4u)`, which is probably cheap even if most of the time we don't need to shrink. For most cases, that's the only change that this patch brings. However, once we find out that 75% of the buffer are empty, calling realloc() seems a sensible thing to do. --- glib/gdataset.c | 51 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index d35f2b9a6..442389b19 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -40,6 +40,7 @@ #include "gslice.h" #include "gdatasetprivate.h" +#include "gutilsprivate.h" #include "ghash.h" #include "gquark.h" #include "gstrfuncs.h" @@ -164,18 +165,26 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify GData *d; d = *data; - if (!d) { - d = g_malloc (sizeof (GData)); + d = g_malloc (G_STRUCT_OFFSET (GData, data) + 2u * sizeof (GDataElt)); d->len = 0; - d->alloc = 1; + d->alloc = 2u; *data = d; reallocated = TRUE; } else if (d->len == d->alloc) { d->alloc = d->alloc * 2u; +#if G_ENABLE_DEBUG + /* d->alloc is always a power of two. It thus overflows the first time + * when going to (G_MAXUINT32+1), or when requesting 2^31+1 elements. + * + * This is not handled, and we just crash. That's because we track the GData + * in a linear list, which horribly degrades long before we add 2 billion entries. + * Don't ever try to do that. */ + g_assert (d->alloc > d->len); +#endif d = g_realloc (d, G_STRUCT_OFFSET (GData, data) + d->alloc * sizeof (GDataElt)); *data = d; reallocated = TRUE; @@ -214,10 +223,20 @@ datalist_remove (GData *data, guint32 idx) static gboolean datalist_shrink (GData **data, GData **d_to_free) { + guint32 alloc_by_4; + guint32 v; GData *d; d = *data; + alloc_by_4 = d->alloc / 4u; + + if (G_LIKELY (d->len > alloc_by_4)) + { + /* No shrinking */ + return FALSE; + } + if (d->len == 0) { /* The list became empty. We drop the allocated memory altogether. */ @@ -229,8 +248,30 @@ datalist_shrink (GData **data, GData **d_to_free) return TRUE; } - /* not reallocated. */ - return FALSE; + /* If the buffer is filled not more than 25%. Shrink to double the current length. */ + + v = d->len; + if (v != alloc_by_4) + { + /* d->alloc is a power of two. Usually, we remove one element at a + * time, then we will just reach reach a quarter of that. + * + * However, with g_datalist_id_remove_multiple(), len can be smaller + * at once. In that case, find first the next power of two. */ + v = g_nearest_pow (v); + } + v *= 2u; + +#if G_ENABLE_DEBUG + g_assert (v > d->len); + g_assert (v <= d->alloc / 2u); +#endif + + d->alloc = v; + d = g_realloc (d, G_STRUCT_OFFSET (GData, data) + (v * sizeof (GDataElt))); + *d_to_free = NULL; + *data = d; + return TRUE; } static GDataElt *