From 6063cdb79fb945907fe7ce312be7a5e68997c08b Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 6 Feb 2024 08:54:55 +0100 Subject: [PATCH 1/8] gdataset/tests: improve "/datalist/id-remove-multiple/resize" test --- glib/tests/dataset.c | 63 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/glib/tests/dataset.c b/glib/tests/dataset.c index b0b8d947d..e97545152 100644 --- a/glib/tests/dataset.c +++ b/glib/tests/dataset.c @@ -305,6 +305,7 @@ test_datalist_id_remove_multiple_resize (void) { GQuark *quarks; GQuark *quarks2; + gboolean *has; const guint N = 1000; const guint PRIME = 1048583u; guint i; @@ -314,6 +315,7 @@ test_datalist_id_remove_multiple_resize (void) quarks = g_new (GQuark, N); quarks2 = g_new (GQuark, N); + has = g_new0 (gboolean, N); for (i = 0; i < N; i++) { @@ -322,12 +324,15 @@ test_datalist_id_remove_multiple_resize (void) } for (i = 0; i < N; i++) - g_datalist_id_set_data (&list, quarks[i], GINT_TO_POINTER (i)); + { + g_datalist_id_set_data (&list, quarks[i], (gpointer) g_quark_to_string (quarks[i])); + has[i] = TRUE; + } /* Now we perform a list of random operations (remove/add quarks). */ for (i_run = 0; TRUE; i_run++) { - int MODE = ((guint) g_test_rand_int ()) % 4; + int MODE = ((guint) g_test_rand_int ()) % 6; guint n; guint j; @@ -355,9 +360,15 @@ test_datalist_id_remove_multiple_resize (void) { j = (j + PRIME) % N; if (MODE == 0) - g_datalist_id_remove_data (&list, quarks[j]); + { + g_datalist_id_remove_data (&list, quarks[j]); + has[j] = FALSE; + } else - g_datalist_id_set_data (&list, quarks[j], GINT_TO_POINTER (j)); + { + g_datalist_id_set_data (&list, quarks[j], (gpointer) g_quark_to_string (quarks[j])); + has[j] = TRUE; + } } break; case 3: @@ -366,15 +377,59 @@ test_datalist_id_remove_multiple_resize (void) { j = (j + PRIME) % N; quarks2[i] = quarks[j]; + has[j] = FALSE; } g_datalist_id_remove_multiple (&list, quarks2, n); break; + case 4: + /* Mode: lookup string via g_datalist_get_data()*/ + for (i = 0; i < n; i++) + { + const char *data; + const char *data2; + const char *key; + + j = (j + PRIME) % N; + + key = g_quark_to_string (quarks[j]); + + data = g_datalist_id_get_data (&list, quarks[j]); + data2 = g_datalist_get_data (&list, key); + g_assert_true (data == data2); + if (data) + g_assert_true (data == key); + g_assert_true ((!!data) == has[j]); + } + break; + case 5: + /* Fill/empty the list completely. */ + switch (((guint) g_test_rand_int ()) % 5) + { + case 0: + g_datalist_clear (&list); + for (i = 0; i < N; i++) + has[i] = FALSE; + break; + case 1: + for (i = 0; i < N; i++) + { + j = (j + PRIME) % N; + g_datalist_id_set_data (&list, quarks[j], (gpointer) g_quark_to_string (quarks[j])); + has[i] = TRUE; + } + break; + default: + /* Most of the time we do nothing. The case where we fill/empty + * the list entirely is less interesting. */ + break; + } } } g_free (quarks); g_free (quarks2); + g_free (has); } static void From 29005752832ac5ab42faa8650349bdbce1f221d0 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Thu, 8 Feb 2024 09:02:46 +0100 Subject: [PATCH 2/8] gdataset: extract datalist_destroy() helper function for releasing GData Avoid duplicated code. Also, the "if (data->data[i].data && " check is unnecessary. The data pointer is never NULL, because g_datalist_id_set_data() will treat that as indication to remove the entry. --- glib/gdataset.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index 992e17368..b3e63ba1a 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -274,6 +274,22 @@ datalist_shrink (GData **data, GData **d_to_free) return TRUE; } +static void +datalist_destroy (GData *data) +{ + guint32 i; + + /* Must be called without lock. Will free @data and invoke the + * destroy() notifications. */ + for (i = 0; i < data->len; i++) + { + if (data->data[i].destroy) + data->data[i].destroy (data->data[i].data); + } + + g_free (data); +} + static GDataElt * datalist_find (GData *data, GQuark key_id, guint32 *out_idx) { @@ -310,7 +326,6 @@ void g_datalist_clear (GData **datalist) { GData *data; - guint i; g_return_if_fail (datalist != NULL); @@ -324,13 +339,7 @@ g_datalist_clear (GData **datalist) g_datalist_unlock_and_set (datalist, NULL); - for (i = 0; i < data->len; i++) - { - if (data->data[i].data && data->data[i].destroy) - data->data[i].destroy (data->data[i].data); - } - - g_free (data); + datalist_destroy (data); } /* HOLDS: g_dataset_global_lock */ @@ -359,7 +368,6 @@ g_dataset_destroy_internal (GDataset *dataset) while (dataset) { GData *data; - guint i; data = G_DATALIST_GET_POINTER (&dataset->datalist); @@ -376,12 +384,7 @@ g_dataset_destroy_internal (GDataset *dataset) G_UNLOCK (g_dataset_global); - for (i = 0; i < data->len; i++) - { - if (data->data[i].data && data->data[i].destroy) - data->data[i].destroy (data->data[i].data); - } - g_free (data); + datalist_destroy (data); G_LOCK (g_dataset_global); dataset = g_dataset_lookup (dataset_location); From 9a8e2ab2633c853fce31e856d2e4f6d0c47b5057 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Mon, 5 Feb 2024 21:24:26 +0100 Subject: [PATCH 3/8] gdataset: extract helper function for struct size --- glib/gdataset.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index b3e63ba1a..552cf7ce1 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -158,6 +158,13 @@ g_datalist_unlock_and_set (GData **datalist, gpointer ptr) g_pointer_bit_unlock_and_set ((void **) datalist, DATALIST_LOCK_BIT, ptr, G_DATALIST_FLAGS_MASK_INTERNAL); } +static gsize +datalist_alloc_size (guint32 alloc) +{ + return G_STRUCT_OFFSET (GData, data) + + (((gsize) alloc) * sizeof (GDataElt)); +} + static gboolean datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify destroy_func) { @@ -167,7 +174,7 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify d = *data; if (!d) { - d = g_malloc (G_STRUCT_OFFSET (GData, data) + 2u * sizeof (GDataElt)); + d = g_malloc (datalist_alloc_size (2u)); d->len = 0; d->alloc = 2u; *data = d; @@ -185,7 +192,7 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify * Don't ever try to do that. */ g_assert (d->alloc > d->len); #endif - d = g_realloc (d, G_STRUCT_OFFSET (GData, data) + d->alloc * sizeof (GDataElt)); + d = g_realloc (d, datalist_alloc_size (d->alloc)); *data = d; reallocated = TRUE; } @@ -268,7 +275,7 @@ datalist_shrink (GData **data, GData **d_to_free) #endif d->alloc = v; - d = g_realloc (d, G_STRUCT_OFFSET (GData, data) + (v * sizeof (GDataElt))); + d = g_realloc (d, datalist_alloc_size (v)); *d_to_free = NULL; *data = d; return TRUE; From cc53252c5b9ab6f0534a8059fa0a2308fcdfce47 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Mon, 5 Feb 2024 22:04:18 +0100 Subject: [PATCH 4/8] gdataset: add datalist_realloc() helper This will be more useful next. Also, try to detect whether realloc() actually moved the pointer. If it doesn't, we can optimize a bit (not use g_datalist_unlock_and_set()). Next, we will do more useful optimization in this case. Note that we cannot just compare dangling pointers. That's would be undefined behavior. In practice, we probably often compare dangling pointers, and this tends to work just fine. Still avoid this and compare only the guintptr values. --- glib/gdataset.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index 552cf7ce1..3c3c0d642 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -165,6 +165,30 @@ datalist_alloc_size (guint32 alloc) (((gsize) alloc) * sizeof (GDataElt)); } +static GData * +datalist_realloc (GData *data, guint32 alloc, gboolean *out_reallocated) +{ + guintptr data_old; + gboolean reallocated; + + data_old = (guintptr) ((gpointer) data); + + data = g_realloc (data, datalist_alloc_size (alloc)); + + /* Determine whether realloc() moves the pointer. After a move, the old + * pointer would be dangling and comparing it would be undefined behavior. + * Avoid that by casting to uintptr_t. + */ + reallocated = (((guintptr) ((gpointer) (data))) != data_old); + + data->alloc = alloc; + + if (out_reallocated) + *out_reallocated = reallocated; + + return data; +} + static gboolean datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify destroy_func) { @@ -182,7 +206,8 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify } else if (d->len == d->alloc) { - d->alloc = d->alloc * 2u; + guint32 alloc = d->alloc * 2u; + #if G_ENABLE_DEBUG /* d->alloc is always a power of two. It thus overflows the first time * when going to (G_MAXUINT32+1), or when requesting 2^31+1 elements. @@ -190,11 +215,10 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify * This is not handled, and we just crash. That's because we track the GData * in a linear list, which horribly degrades long before we add 2 billion entries. * Don't ever try to do that. */ - g_assert (d->alloc > d->len); + g_assert (alloc > d->len); #endif - d = g_realloc (d, datalist_alloc_size (d->alloc)); + d = datalist_realloc (d, alloc, &reallocated); *data = d; - reallocated = TRUE; } else reallocated = FALSE; @@ -230,6 +254,7 @@ datalist_remove (GData *data, guint32 idx) static gboolean datalist_shrink (GData **data, GData **d_to_free) { + gboolean reallocated; guint32 alloc_by_4; guint32 v; GData *d; @@ -274,11 +299,10 @@ datalist_shrink (GData **data, GData **d_to_free) g_assert (v <= d->alloc / 2u); #endif - d->alloc = v; - d = g_realloc (d, datalist_alloc_size (v)); + d = datalist_realloc (d, v, &reallocated); *d_to_free = NULL; *data = d; - return TRUE; + return reallocated; } static void From 0746df490670e15980d2faaa945f18f6a7d18efc Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Mon, 5 Feb 2024 22:03:05 +0100 Subject: [PATCH 5/8] gdataset: add GHashTable lookup index to GData GData tracks the entries in a linear array, and performs a linear search. That is very good, as long as the number of entries is small. However, when it grows, performance gets worse. It's not clear what the exact threshold is, or what the recommended maximum is. Also, GData is used to attach arbitrary user data to GObject. Which is a great feature, that a user might want to use. In that case, they may be less concerned about performance to get the benefits of the feature, and thus add more data than is best (though, it's unclear how much is too much). Also, GObject adds entries in its qdata. Hence, also for basic GObject operations, it's important that this performs well. Note that access to GData happens while holding a lock, we want to minimize the time while holding that lock. This patch ensures that access to GData is O(1) (and reasonably fast). It thus allows to use GData in ways that wasn't advised previously. There are alternatives, like using a binary search tree or always use a GHashTable. Instead, we keep the linear buffer. Only when the buffer grows to ALLOC_THRESHOLD_INDEX entries, we will start generating and maintaining a GHashTable index. So the common case where objects have few entries does not change. The memory overhead is only there, when the list grows. ALLOC_THRESHOLD_INDEX is set to 64 entries. We will allocate such a large buffer when adding the 33rd entry. During shrink, we will drop the buffer again when shrinking down to 16 entries. The reason for not always using the GHashTable is to save the memory in common cases where there are few entries. We use g_hash_table_add() to exploit the GHashTable optimization. We also let it point to the GDataElt from the linear array. The benefit is that we don't require individual allocations. The downside is that during reallocation we need to regenerate the entire index. In my tests this actually performs well. For example, following are timings for calling g_dataset_id_get_data() in a loop. This is the lookup time for an entry that doesn't exist. Obviously, the linear search would scale always best when we would only lookup the first entry in the list. num-entries time-before time-after 1 0.144 0.145 2 0.146 0.146 5 0.149 0.150 10 0.155 0.157 20 0.172 0.170 32 0.248 0.254 33 0.249 0.184 <== index in use 40 0.284 0.183 50 0.317 0.189 75 0.370 0.184 100 0.442 0.183 300 1.044 0.186 1000 3.170 0.184 10000 31.597 0.189 --- glib/gdataset.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 167 insertions(+), 6 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index 3c3c0d642..150186aa1 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -68,6 +68,22 @@ #define G_DATALIST_FLAGS_MASK_INTERNAL 0x7 +/* When GData.alloc grows to size ALLOC_THRESHOLD_INDEX, we reserve one additional + * GHashTable* at &data->data[data->alloc]. This will contain the index for fast + * lookup. See datalist_index*() helpers. + * + * Note that we grow the GData.data buffer by doubling the allocation size. So + * we first allocate 64 entries, when adding the 33 entry. + * + * Conversely, we exponentially shrink the buffer. That means, when we remove + * entries and reach 16 (or lower), we will shrink the buffer from 64 to 32 + * entries (and stop using the index). + * + * So we start using the index when adding >= 33 entries. And stop using it + * when removing to <= 16 entries. + */ +#define ALLOC_THRESHOLD_INDEX 64u + #define G_DATALIST_CLEAN_POINTER(ptr) \ ((GData *) ((gpointer) (((guintptr) (ptr)) & ~((guintptr) G_DATALIST_FLAGS_MASK_INTERNAL)))) @@ -161,8 +177,59 @@ g_datalist_unlock_and_set (GData **datalist, gpointer ptr) static gsize datalist_alloc_size (guint32 alloc) { + /* GDataElt also contains pointer. It thus is suitable aligned for pointers, + * and we can just append the pointer for the index at the end. */ return G_STRUCT_OFFSET (GData, data) + - (((gsize) alloc) * sizeof (GDataElt)); + (((gsize) alloc) * sizeof (GDataElt)) + + (G_UNLIKELY (alloc >= ALLOC_THRESHOLD_INDEX) ? sizeof (GHashTable *) : 0u); +} + +G_ALWAYS_INLINE static inline GHashTable ** +datalist_index_get_ptr (GData *data) +{ + if (G_LIKELY (data->alloc < ALLOC_THRESHOLD_INDEX)) + return NULL; + + return (gpointer) (&(data->data[data->alloc])); +} + +G_ALWAYS_INLINE static inline GHashTable * +datalist_index_get (GData *data) +{ + GHashTable **p_index; + + p_index = datalist_index_get_ptr (data); + +#if G_ENABLE_DEBUG + g_assert (!p_index || *p_index); +#endif + + return G_UNLIKELY (p_index) ? *p_index : NULL; +} + +static guint +_datalist_index_hash (gconstpointer key) +{ + const GQuark *ptr = key; + + G_STATIC_ASSERT (G_STRUCT_OFFSET (GDataElt, key) == 0); + + return *ptr; +} + +static gboolean +_datalist_index_equal (gconstpointer a, gconstpointer b) +{ + const GQuark *ptr_a = a; + const GQuark *ptr_b = b; + + return *ptr_a == *ptr_b; +} + +G_ALWAYS_INLINE static inline GHashTable * +datalist_index_new (void) +{ + return g_hash_table_new (_datalist_index_hash, _datalist_index_equal); } static GData * @@ -170,8 +237,12 @@ datalist_realloc (GData *data, guint32 alloc, gboolean *out_reallocated) { guintptr data_old; gboolean reallocated; + GHashTable *index; + GHashTable **p_index; + guint32 i; data_old = (guintptr) ((gpointer) data); + index = datalist_index_get (data); data = g_realloc (data, datalist_alloc_size (alloc)); @@ -186,12 +257,47 @@ datalist_realloc (GData *data, guint32 alloc, gboolean *out_reallocated) if (out_reallocated) *out_reallocated = reallocated; + /* Note that if data was @reallocated, then @index contains only dangling pointers. + * We can only destroy/remove-all, which we rely on not following those pointers. */ + + p_index = datalist_index_get_ptr (data); + + if (G_LIKELY (!p_index)) + { + if (G_UNLIKELY (index)) + g_hash_table_unref (index); + } + else if (!reallocated && index) + { + /* The index is still fine and the pointers are all still valid. We + * can keep it. */ + *p_index = index; + } + else + { + if (G_UNLIKELY (index)) + { + /* Note that the GHashTable's keys are now all dangling pointers! + * We rely on remove-all to not following them. */ + g_hash_table_remove_all (index); + } + else + index = datalist_index_new (); + + *p_index = index; + + for (i = 0; i < data->len; i++) + g_hash_table_add (index, &data->data[i]); + } + return data; } static gboolean datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify destroy_func) { + GDataElt *data_elt; + GHashTable *index; gboolean reallocated; GData *d; @@ -201,6 +307,10 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify d = g_malloc (datalist_alloc_size (2u)); d->len = 0; d->alloc = 2u; + + if (2u >= ALLOC_THRESHOLD_INDEX) + *(datalist_index_get_ptr (d)) = datalist_index_new (); + *data = d; reallocated = TRUE; } @@ -223,19 +333,26 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify else reallocated = FALSE; - d->data[d->len] = (GDataElt){ + data_elt = &d->data[d->len]; + *data_elt = (GDataElt){ .key = key_id, .data = new_data, .destroy = destroy_func, }; d->len++; + index = datalist_index_get (d); + if (G_UNLIKELY (index)) + g_hash_table_add (index, data_elt); + return reallocated; } static void datalist_remove (GData *data, guint32 idx) { + GHashTable *index; + #if G_ENABLE_DEBUG g_assert (idx < data->len); #endif @@ -245,10 +362,18 @@ datalist_remove (GData *data, guint32 idx) * to @idx are left unchanged, and the last entry is moved to position @idx. * */ + index = datalist_index_get (data); + if (G_UNLIKELY (index)) + g_hash_table_remove (index, &data->data[idx]); + data->len--; if (idx != data->len) - data->data[idx] = data->data[data->len]; + { + data->data[idx] = data->data[data->len]; + if (G_UNLIKELY (index)) + g_hash_table_add (index, &data->data[idx]); + } } static gboolean @@ -271,10 +396,17 @@ datalist_shrink (GData **data, GData **d_to_free) if (d->len == 0) { + GHashTable *index; + /* The list became empty. We drop the allocated memory altogether. */ /* The caller will free the buffer after releasing the lock, to minimize * the time we hold the lock. Transfer it out. */ + + index = datalist_index_get (d); + if (G_UNLIKELY (index)) + g_hash_table_unref (index); + *d_to_free = d; *data = NULL; return TRUE; @@ -308,10 +440,16 @@ datalist_shrink (GData **data, GData **d_to_free) static void datalist_destroy (GData *data) { + GHashTable *index; guint32 i; /* Must be called without lock. Will free @data and invoke the * destroy() notifications. */ + + index = datalist_index_get (data); + if (G_UNLIKELY (index)) + g_hash_table_unref (index); + for (i = 0; i < data->len; i++) { if (data->data[i].destroy) @@ -324,14 +462,21 @@ datalist_destroy (GData *data) static GDataElt * datalist_find (GData *data, GQuark key_id, guint32 *out_idx) { + GDataElt *data_elt; + GHashTable *index; guint32 i; - if (data) + if (!data) + goto out_not_found; + + index = datalist_index_get (data); + + if (G_LIKELY (!index)) { + /* We have no index. Do a linear search. */ for (i = 0; i < data->len; i++) { - GDataElt *data_elt = &data->data[i]; - + data_elt = &data->data[i]; if (data_elt->key == key_id) { if (out_idx) @@ -339,7 +484,23 @@ datalist_find (GData *data, GQuark key_id, guint32 *out_idx) return data_elt; } } + + goto out_not_found; } + + data_elt = g_hash_table_lookup (index, &key_id); + if (!data_elt) + goto out_not_found; + +#if G_ENABLE_DEBUG + g_assert (data_elt >= data->data && data_elt < &data->data[data->len]); +#endif + + if (out_idx) + *out_idx = (data_elt - data->data); + return data_elt; + +out_not_found: if (out_idx) *out_idx = G_MAXUINT32; return NULL; From 9abc4d9be6e8b48b8dadc1e67e2c1f14375020f9 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 6 Feb 2024 08:48:32 +0100 Subject: [PATCH 6/8] gdataset: use hash lookup in g_datalist_get_data() g_datalist_get_data() tries to avoid g_quark_try_string() and instead do the lock-less g_quark_to_string(). For small number of entries that may perform better. At least, this was an optimization from the past. However, if we have a GHashTable index, and presumably a larger number of entries, we should use the index instead. Unfortunately, that requires us again to use g_quark_try_string() and take a global lock. --- glib/gdataset.c | 61 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index 150186aa1..77f07e1e2 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -1423,37 +1423,56 @@ g_datalist_id_replace_data (GData **datalist, * is not found. **/ gpointer -g_datalist_get_data (GData **datalist, - const gchar *key) +g_datalist_get_data (GData **datalist, + const gchar *key) { + GQuark key_id; + GHashTable *index; gpointer res = NULL; + GDataElt *data_elt; GData *d; - GDataElt *data, *data_end; g_return_val_if_fail (datalist != NULL, NULL); d = g_datalist_lock_and_get (datalist); - if (d) + + if (!d) + goto out; + + index = datalist_index_get (d); + + if (G_LIKELY (!index)) { - data = d->data; - data_end = data + d->len; - while (data < data_end) - { - /* Here we intentionally compare by strings, instead of calling - * g_quark_try_string() first. - * - * See commit 1cceda49b60b ('Make g_datalist_get_data not look up the - * quark'). - */ - if (g_strcmp0 (g_quark_to_string (data->key), key) == 0) - { - res = data->data; - break; - } - data++; - } + guint32 i; + + for (i = 0; i < d->len; i++) + { + data_elt = &d->data[i]; + /* Here we intentionally compare by strings, instead of calling + * g_quark_try_string() first. + * + * See commit 1cceda49b60b ('Make g_datalist_get_data not look up the + * quark'). + */ + if (g_strcmp0 (g_quark_to_string (data_elt->key), key) == 0) + { + res = data_elt->data; + goto out; + } + } + goto out; } + key_id = g_quark_try_string (key); + if (key_id == 0 && key) + goto out; + + data_elt = g_hash_table_lookup (index, &key_id); + + if (data_elt) + res = data_elt->data; + +out: g_datalist_unlock (datalist); return res; From af4a43b064881173355f80a3191ff92fded66398 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 6 Feb 2024 09:03:39 +0100 Subject: [PATCH 7/8] gdataset: handle up to G_MAXUINT32 entries GData is a thread safe container. For that reason, it probably is anyway not suitable to add 4 billion entries. That is even with the new index, where we now scale with O(1). Still, the code should not just break down at half of what we theoretically could add. Handle overflow and huge sizes correctly. --- glib/gdataset.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index 77f07e1e2..362d4ae36 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -318,15 +318,12 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify { guint32 alloc = d->alloc * 2u; -#if G_ENABLE_DEBUG - /* d->alloc is always a power of two. It thus overflows the first time - * when going to (G_MAXUINT32+1), or when requesting 2^31+1 elements. - * - * This is not handled, and we just crash. That's because we track the GData - * in a linear list, which horribly degrades long before we add 2 billion entries. - * Don't ever try to do that. */ - g_assert (alloc > d->len); -#endif + if (G_UNLIKELY (alloc < d->alloc)) + { + if (d->alloc == G_MAXUINT32) + g_error ("GData cannot contain more than 4294967295 entries"); + alloc = G_MAXUINT32; + } d = datalist_realloc (d, alloc, &reallocated); *data = d; } @@ -417,8 +414,9 @@ datalist_shrink (GData **data, GData **d_to_free) v = d->len; if (v != alloc_by_4) { - /* d->alloc is a power of two. Usually, we remove one element at a - * time, then we will just reach reach a quarter of that. + /* d->alloc is a power of two (unless it's G_MAXUINT32). Usually, we + * remove one element at a time, then we will just reach reach a quarter + * of that. * * However, with g_datalist_id_remove_multiple(), len can be smaller * at once. In that case, find first the next power of two. */ @@ -428,7 +426,7 @@ datalist_shrink (GData **data, GData **d_to_free) #if G_ENABLE_DEBUG g_assert (v > d->len); - g_assert (v <= d->alloc / 2u); + g_assert (v <= (d->alloc == G_MAXUINT32 ? 0x80000000u : d->alloc / 2u)); #endif d = datalist_realloc (d, v, &reallocated); From 3437414dd4f781da32b654eb966a385891e624b3 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 6 Feb 2024 09:13:51 +0100 Subject: [PATCH 8/8] gdataset: use lookup index in g_datalist_id_remove_multiple() In order to reuse datalist_find(), we turn the loops around. First loop over the keys, and then (internally in datalist_find()) loop over the entries. --- glib/gdataset.c | 54 +++++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 33 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index 362d4ae36..42fd4cd68 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -723,7 +723,9 @@ g_data_remove_internal (GData **datalist, GData *d_to_free; gsize found_keys; gsize i_keys; - guint32 i_data; + + if (n_keys == 0) + return; d = g_datalist_lock_and_get (datalist); @@ -749,33 +751,23 @@ g_data_remove_internal (GData **datalist, old = old_to_free; } - i_data = 0; found_keys = 0; - while (i_data < d->len && found_keys < n_keys) + for (i_keys = 0; i_keys < n_keys; i_keys++) { - GDataElt *data = &d->data[i_data]; - gboolean remove = FALSE; + GDataElt *data_elt; + guint32 idx; - for (i_keys = 0; i_keys < n_keys; i_keys++) - { - if (data->key == keys[i_keys]) - { - /* We must invoke the destroy notifications in the order of @keys. - * Hence, build up the list @old at index @i_keys. */ - old[i_keys] = *data; - found_keys++; - remove = TRUE; - break; - } - } + data_elt = datalist_find (d, keys[i_keys], &idx); + if (!data_elt) + continue; - if (!remove) - { - i_data++; - continue; - } - - datalist_remove (d, i_data); + /* We must destroy the keys in the order in which they are specified. + * We achieve that here. + * + * Note that even if @keys contains duplicates, we correctly only + * find them once, as we remove the found entry right away. */ + old[found_keys++] = *data_elt; + datalist_remove (d, idx); } if (found_keys > 0 && datalist_shrink (&d, &d_to_free)) @@ -787,13 +779,10 @@ g_data_remove_internal (GData **datalist, else g_datalist_unlock (datalist); - if (found_keys > 0) + for (i_keys = 0; i_keys < found_keys; i_keys++) { - for (i_keys = 0; i_keys < n_keys; i_keys++) - { - if (old[i_keys].destroy) - old[i_keys].destroy (old[i_keys].data); - } + if (old[i_keys].destroy) + old[i_keys].destroy (old[i_keys].data); } if (G_UNLIKELY (old_to_free)) @@ -996,9 +985,8 @@ g_datalist_id_set_data_full (GData **datalist, * This is more efficient than calling g_datalist_id_remove_data() * multiple times in a row. * - * Before 2.80, @n_keys had to be not larger than 16. Now it can be larger, but - * note that GData does a linear search, so an excessive number of keys will - * perform badly. + * Before 2.80, @n_keys had to be not larger than 16. + * Since 2.84, performance is improved for larger number of keys. * * Since: 2.74 */