From 29314690c75cdbd538091845feb580bfe7a3ed5f Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 2 Feb 2024 18:25:51 +0100 Subject: [PATCH] gdatalist: shrink the buffer when it becomes 75% empty The amount of used memory should stay in relation to the number of entries we have. If we delete most (75%) of the entries, let's also reallocate the buffer down to 50% of its size. datalist_append() now starts with 2 elements. This works together with the shrinking. If we only have one entry left, we will shrink the buffer back to size 2. In general, d->alloc is always a power of two (unless it overflows after G_MAXUINT32/2, which we assume will never happen). The previous buffer growth strategy of never shrinking is not necessarily bad. It has the advantage to not require any checks for shrinking, and it works well in cases where the amount of data actually does not shrink (as we'd often expect). Also, it's questionable what a realloc() to a smaller size really brings. Is that really gonna help and will the allocator do something useful? Anyway. This patch introduces shrinking. The check for whether to shrink changes from `if (d->len == 0)` to `if (d->len <= d->alloc / 4u)`, which is probably cheap even if most of the time we don't need to shrink. For most cases, that's the only change that this patch brings. However, once we find out that 75% of the buffer are empty, calling realloc() seems a sensible thing to do. --- glib/gdataset.c | 51 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/glib/gdataset.c b/glib/gdataset.c index d35f2b9a6..442389b19 100644 --- a/glib/gdataset.c +++ b/glib/gdataset.c @@ -40,6 +40,7 @@ #include "gslice.h" #include "gdatasetprivate.h" +#include "gutilsprivate.h" #include "ghash.h" #include "gquark.h" #include "gstrfuncs.h" @@ -164,18 +165,26 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify GData *d; d = *data; - if (!d) { - d = g_malloc (sizeof (GData)); + d = g_malloc (G_STRUCT_OFFSET (GData, data) + 2u * sizeof (GDataElt)); d->len = 0; - d->alloc = 1; + d->alloc = 2u; *data = d; reallocated = TRUE; } else if (d->len == d->alloc) { d->alloc = d->alloc * 2u; +#if G_ENABLE_DEBUG + /* d->alloc is always a power of two. It thus overflows the first time + * when going to (G_MAXUINT32+1), or when requesting 2^31+1 elements. + * + * This is not handled, and we just crash. That's because we track the GData + * in a linear list, which horribly degrades long before we add 2 billion entries. + * Don't ever try to do that. */ + g_assert (d->alloc > d->len); +#endif d = g_realloc (d, G_STRUCT_OFFSET (GData, data) + d->alloc * sizeof (GDataElt)); *data = d; reallocated = TRUE; @@ -214,10 +223,20 @@ datalist_remove (GData *data, guint32 idx) static gboolean datalist_shrink (GData **data, GData **d_to_free) { + guint32 alloc_by_4; + guint32 v; GData *d; d = *data; + alloc_by_4 = d->alloc / 4u; + + if (G_LIKELY (d->len > alloc_by_4)) + { + /* No shrinking */ + return FALSE; + } + if (d->len == 0) { /* The list became empty. We drop the allocated memory altogether. */ @@ -229,8 +248,30 @@ datalist_shrink (GData **data, GData **d_to_free) return TRUE; } - /* not reallocated. */ - return FALSE; + /* If the buffer is filled not more than 25%. Shrink to double the current length. */ + + v = d->len; + if (v != alloc_by_4) + { + /* d->alloc is a power of two. Usually, we remove one element at a + * time, then we will just reach reach a quarter of that. + * + * However, with g_datalist_id_remove_multiple(), len can be smaller + * at once. In that case, find first the next power of two. */ + v = g_nearest_pow (v); + } + v *= 2u; + +#if G_ENABLE_DEBUG + g_assert (v > d->len); + g_assert (v <= d->alloc / 2u); +#endif + + d->alloc = v; + d = g_realloc (d, G_STRUCT_OFFSET (GData, data) + (v * sizeof (GDataElt))); + *d_to_free = NULL; + *data = d; + return TRUE; } static GDataElt *