mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-28 23:16:14 +01:00
gdataset: add GHashTable lookup index to GData
GData tracks the entries in a linear array, and performs a linear search. That is very good, as long as the number of entries is small. However, when it grows, performance gets worse. It's not clear what the exact threshold is, or what the recommended maximum is. Also, GData is used to attach arbitrary user data to GObject. Which is a great feature, that a user might want to use. In that case, they may be less concerned about performance to get the benefits of the feature, and thus add more data than is best (though, it's unclear how much is too much). Also, GObject adds entries in its qdata. Hence, also for basic GObject operations, it's important that this performs well. Note that access to GData happens while holding a lock, we want to minimize the time while holding that lock. This patch ensures that access to GData is O(1) (and reasonably fast). It thus allows to use GData in ways that wasn't advised previously. There are alternatives, like using a binary search tree or always use a GHashTable. Instead, we keep the linear buffer. Only when the buffer grows to ALLOC_THRESHOLD_INDEX entries, we will start generating and maintaining a GHashTable index. So the common case where objects have few entries does not change. The memory overhead is only there, when the list grows. ALLOC_THRESHOLD_INDEX is set to 64 entries. We will allocate such a large buffer when adding the 33rd entry. During shrink, we will drop the buffer again when shrinking down to 16 entries. The reason for not always using the GHashTable is to save the memory in common cases where there are few entries. We use g_hash_table_add() to exploit the GHashTable optimization. We also let it point to the GDataElt from the linear array. The benefit is that we don't require individual allocations. The downside is that during reallocation we need to regenerate the entire index. In my tests this actually performs well. For example, following are timings for calling g_dataset_id_get_data() in a loop. This is the lookup time for an entry that doesn't exist. Obviously, the linear search would scale always best when we would only lookup the first entry in the list. num-entries time-before time-after 1 0.144 0.145 2 0.146 0.146 5 0.149 0.150 10 0.155 0.157 20 0.172 0.170 32 0.248 0.254 33 0.249 0.184 <== index in use 40 0.284 0.183 50 0.317 0.189 75 0.370 0.184 100 0.442 0.183 300 1.044 0.186 1000 3.170 0.184 10000 31.597 0.189
This commit is contained in:
parent
cc53252c5b
commit
0746df4906
173
glib/gdataset.c
173
glib/gdataset.c
@ -68,6 +68,22 @@
|
||||
|
||||
#define G_DATALIST_FLAGS_MASK_INTERNAL 0x7
|
||||
|
||||
/* When GData.alloc grows to size ALLOC_THRESHOLD_INDEX, we reserve one additional
|
||||
* GHashTable* at &data->data[data->alloc]. This will contain the index for fast
|
||||
* lookup. See datalist_index*() helpers.
|
||||
*
|
||||
* Note that we grow the GData.data buffer by doubling the allocation size. So
|
||||
* we first allocate 64 entries, when adding the 33 entry.
|
||||
*
|
||||
* Conversely, we exponentially shrink the buffer. That means, when we remove
|
||||
* entries and reach 16 (or lower), we will shrink the buffer from 64 to 32
|
||||
* entries (and stop using the index).
|
||||
*
|
||||
* So we start using the index when adding >= 33 entries. And stop using it
|
||||
* when removing to <= 16 entries.
|
||||
*/
|
||||
#define ALLOC_THRESHOLD_INDEX 64u
|
||||
|
||||
#define G_DATALIST_CLEAN_POINTER(ptr) \
|
||||
((GData *) ((gpointer) (((guintptr) (ptr)) & ~((guintptr) G_DATALIST_FLAGS_MASK_INTERNAL))))
|
||||
|
||||
@ -161,8 +177,59 @@ g_datalist_unlock_and_set (GData **datalist, gpointer ptr)
|
||||
static gsize
|
||||
datalist_alloc_size (guint32 alloc)
|
||||
{
|
||||
/* GDataElt also contains pointer. It thus is suitable aligned for pointers,
|
||||
* and we can just append the pointer for the index at the end. */
|
||||
return G_STRUCT_OFFSET (GData, data) +
|
||||
(((gsize) alloc) * sizeof (GDataElt));
|
||||
(((gsize) alloc) * sizeof (GDataElt)) +
|
||||
(G_UNLIKELY (alloc >= ALLOC_THRESHOLD_INDEX) ? sizeof (GHashTable *) : 0u);
|
||||
}
|
||||
|
||||
G_ALWAYS_INLINE static inline GHashTable **
|
||||
datalist_index_get_ptr (GData *data)
|
||||
{
|
||||
if (G_LIKELY (data->alloc < ALLOC_THRESHOLD_INDEX))
|
||||
return NULL;
|
||||
|
||||
return (gpointer) (&(data->data[data->alloc]));
|
||||
}
|
||||
|
||||
G_ALWAYS_INLINE static inline GHashTable *
|
||||
datalist_index_get (GData *data)
|
||||
{
|
||||
GHashTable **p_index;
|
||||
|
||||
p_index = datalist_index_get_ptr (data);
|
||||
|
||||
#if G_ENABLE_DEBUG
|
||||
g_assert (!p_index || *p_index);
|
||||
#endif
|
||||
|
||||
return G_UNLIKELY (p_index) ? *p_index : NULL;
|
||||
}
|
||||
|
||||
static guint
|
||||
_datalist_index_hash (gconstpointer key)
|
||||
{
|
||||
const GQuark *ptr = key;
|
||||
|
||||
G_STATIC_ASSERT (G_STRUCT_OFFSET (GDataElt, key) == 0);
|
||||
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
_datalist_index_equal (gconstpointer a, gconstpointer b)
|
||||
{
|
||||
const GQuark *ptr_a = a;
|
||||
const GQuark *ptr_b = b;
|
||||
|
||||
return *ptr_a == *ptr_b;
|
||||
}
|
||||
|
||||
G_ALWAYS_INLINE static inline GHashTable *
|
||||
datalist_index_new (void)
|
||||
{
|
||||
return g_hash_table_new (_datalist_index_hash, _datalist_index_equal);
|
||||
}
|
||||
|
||||
static GData *
|
||||
@ -170,8 +237,12 @@ datalist_realloc (GData *data, guint32 alloc, gboolean *out_reallocated)
|
||||
{
|
||||
guintptr data_old;
|
||||
gboolean reallocated;
|
||||
GHashTable *index;
|
||||
GHashTable **p_index;
|
||||
guint32 i;
|
||||
|
||||
data_old = (guintptr) ((gpointer) data);
|
||||
index = datalist_index_get (data);
|
||||
|
||||
data = g_realloc (data, datalist_alloc_size (alloc));
|
||||
|
||||
@ -186,12 +257,47 @@ datalist_realloc (GData *data, guint32 alloc, gboolean *out_reallocated)
|
||||
if (out_reallocated)
|
||||
*out_reallocated = reallocated;
|
||||
|
||||
/* Note that if data was @reallocated, then @index contains only dangling pointers.
|
||||
* We can only destroy/remove-all, which we rely on not following those pointers. */
|
||||
|
||||
p_index = datalist_index_get_ptr (data);
|
||||
|
||||
if (G_LIKELY (!p_index))
|
||||
{
|
||||
if (G_UNLIKELY (index))
|
||||
g_hash_table_unref (index);
|
||||
}
|
||||
else if (!reallocated && index)
|
||||
{
|
||||
/* The index is still fine and the pointers are all still valid. We
|
||||
* can keep it. */
|
||||
*p_index = index;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (G_UNLIKELY (index))
|
||||
{
|
||||
/* Note that the GHashTable's keys are now all dangling pointers!
|
||||
* We rely on remove-all to not following them. */
|
||||
g_hash_table_remove_all (index);
|
||||
}
|
||||
else
|
||||
index = datalist_index_new ();
|
||||
|
||||
*p_index = index;
|
||||
|
||||
for (i = 0; i < data->len; i++)
|
||||
g_hash_table_add (index, &data->data[i]);
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify destroy_func)
|
||||
{
|
||||
GDataElt *data_elt;
|
||||
GHashTable *index;
|
||||
gboolean reallocated;
|
||||
GData *d;
|
||||
|
||||
@ -201,6 +307,10 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify
|
||||
d = g_malloc (datalist_alloc_size (2u));
|
||||
d->len = 0;
|
||||
d->alloc = 2u;
|
||||
|
||||
if (2u >= ALLOC_THRESHOLD_INDEX)
|
||||
*(datalist_index_get_ptr (d)) = datalist_index_new ();
|
||||
|
||||
*data = d;
|
||||
reallocated = TRUE;
|
||||
}
|
||||
@ -223,19 +333,26 @@ datalist_append (GData **data, GQuark key_id, gpointer new_data, GDestroyNotify
|
||||
else
|
||||
reallocated = FALSE;
|
||||
|
||||
d->data[d->len] = (GDataElt){
|
||||
data_elt = &d->data[d->len];
|
||||
*data_elt = (GDataElt){
|
||||
.key = key_id,
|
||||
.data = new_data,
|
||||
.destroy = destroy_func,
|
||||
};
|
||||
d->len++;
|
||||
|
||||
index = datalist_index_get (d);
|
||||
if (G_UNLIKELY (index))
|
||||
g_hash_table_add (index, data_elt);
|
||||
|
||||
return reallocated;
|
||||
}
|
||||
|
||||
static void
|
||||
datalist_remove (GData *data, guint32 idx)
|
||||
{
|
||||
GHashTable *index;
|
||||
|
||||
#if G_ENABLE_DEBUG
|
||||
g_assert (idx < data->len);
|
||||
#endif
|
||||
@ -245,10 +362,18 @@ datalist_remove (GData *data, guint32 idx)
|
||||
* to @idx are left unchanged, and the last entry is moved to position @idx.
|
||||
* */
|
||||
|
||||
index = datalist_index_get (data);
|
||||
if (G_UNLIKELY (index))
|
||||
g_hash_table_remove (index, &data->data[idx]);
|
||||
|
||||
data->len--;
|
||||
|
||||
if (idx != data->len)
|
||||
data->data[idx] = data->data[data->len];
|
||||
{
|
||||
data->data[idx] = data->data[data->len];
|
||||
if (G_UNLIKELY (index))
|
||||
g_hash_table_add (index, &data->data[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
static gboolean
|
||||
@ -271,10 +396,17 @@ datalist_shrink (GData **data, GData **d_to_free)
|
||||
|
||||
if (d->len == 0)
|
||||
{
|
||||
GHashTable *index;
|
||||
|
||||
/* The list became empty. We drop the allocated memory altogether. */
|
||||
|
||||
/* The caller will free the buffer after releasing the lock, to minimize
|
||||
* the time we hold the lock. Transfer it out. */
|
||||
|
||||
index = datalist_index_get (d);
|
||||
if (G_UNLIKELY (index))
|
||||
g_hash_table_unref (index);
|
||||
|
||||
*d_to_free = d;
|
||||
*data = NULL;
|
||||
return TRUE;
|
||||
@ -308,10 +440,16 @@ datalist_shrink (GData **data, GData **d_to_free)
|
||||
static void
|
||||
datalist_destroy (GData *data)
|
||||
{
|
||||
GHashTable *index;
|
||||
guint32 i;
|
||||
|
||||
/* Must be called without lock. Will free @data and invoke the
|
||||
* destroy() notifications. */
|
||||
|
||||
index = datalist_index_get (data);
|
||||
if (G_UNLIKELY (index))
|
||||
g_hash_table_unref (index);
|
||||
|
||||
for (i = 0; i < data->len; i++)
|
||||
{
|
||||
if (data->data[i].destroy)
|
||||
@ -324,14 +462,21 @@ datalist_destroy (GData *data)
|
||||
static GDataElt *
|
||||
datalist_find (GData *data, GQuark key_id, guint32 *out_idx)
|
||||
{
|
||||
GDataElt *data_elt;
|
||||
GHashTable *index;
|
||||
guint32 i;
|
||||
|
||||
if (data)
|
||||
if (!data)
|
||||
goto out_not_found;
|
||||
|
||||
index = datalist_index_get (data);
|
||||
|
||||
if (G_LIKELY (!index))
|
||||
{
|
||||
/* We have no index. Do a linear search. */
|
||||
for (i = 0; i < data->len; i++)
|
||||
{
|
||||
GDataElt *data_elt = &data->data[i];
|
||||
|
||||
data_elt = &data->data[i];
|
||||
if (data_elt->key == key_id)
|
||||
{
|
||||
if (out_idx)
|
||||
@ -339,7 +484,23 @@ datalist_find (GData *data, GQuark key_id, guint32 *out_idx)
|
||||
return data_elt;
|
||||
}
|
||||
}
|
||||
|
||||
goto out_not_found;
|
||||
}
|
||||
|
||||
data_elt = g_hash_table_lookup (index, &key_id);
|
||||
if (!data_elt)
|
||||
goto out_not_found;
|
||||
|
||||
#if G_ENABLE_DEBUG
|
||||
g_assert (data_elt >= data->data && data_elt < &data->data[data->len]);
|
||||
#endif
|
||||
|
||||
if (out_idx)
|
||||
*out_idx = (data_elt - data->data);
|
||||
return data_elt;
|
||||
|
||||
out_not_found:
|
||||
if (out_idx)
|
||||
*out_idx = G_MAXUINT32;
|
||||
return NULL;
|
||||
|
Loading…
Reference in New Issue
Block a user