mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-04-17 04:48:49 +02:00
ghash: Significantly reduce peak memory use
When resizing, we were keeping both the old and new hash, key and value arrays around while we reinserted entries, resulting in a peak memory overhead of 50%. Using a temporary bookkeeping array with one bit per entry we can now grow and shrink the main arrays using realloc() and an eviction scheme, reducing the overhead to .625% (assuming 64-bit keys and values). Tests show the CPU overhead is negligible.
This commit is contained in:
parent
dc983d74cc
commit
7eaf018b29
207
glib/ghash.c
207
glib/ghash.c
@ -374,6 +374,16 @@ g_hash_table_alloc_key_or_value_array (guint size, gboolean is_big G_GNUC_UNUSED
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline gpointer
|
||||||
|
g_hash_table_realloc_key_or_value_array (gpointer a, guint size, gboolean is_big G_GNUC_UNUSED)
|
||||||
|
{
|
||||||
|
#ifdef USE_SMALL_ARRAYS
|
||||||
|
return g_realloc (a, size * (is_big ? BIG_ENTRY_SIZE : SMALL_ENTRY_SIZE));
|
||||||
|
#else
|
||||||
|
return g_renew (gpointer, a, size);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static inline gpointer
|
static inline gpointer
|
||||||
g_hash_table_fetch_key_or_value (gpointer a, guint index, gboolean is_big)
|
g_hash_table_fetch_key_or_value (gpointer a, guint index, gboolean is_big)
|
||||||
{
|
{
|
||||||
@ -395,6 +405,26 @@ g_hash_table_assign_key_or_value (gpointer a, guint index, gboolean is_big, gpoi
|
|||||||
*(((guint *) a) + index) = GPOINTER_TO_UINT (v);
|
*(((guint *) a) + index) = GPOINTER_TO_UINT (v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline gpointer
|
||||||
|
g_hash_table_evict_key_or_value (gpointer a, guint index, gboolean is_big, gpointer v)
|
||||||
|
{
|
||||||
|
#ifndef USE_SMALL_ARRAYS
|
||||||
|
is_big = TRUE;
|
||||||
|
#endif
|
||||||
|
if (is_big)
|
||||||
|
{
|
||||||
|
gpointer r = *(((gpointer *) a) + index);
|
||||||
|
*(((gpointer *) a) + index) = v;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gpointer r = GUINT_TO_POINTER (*(((guint *) a) + index));
|
||||||
|
*(((guint *) a) + index) = GPOINTER_TO_UINT (v);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline guint
|
static inline guint
|
||||||
g_hash_table_hash_to_index (GHashTable *hash_table, guint hash)
|
g_hash_table_hash_to_index (GHashTable *hash_table, guint hash)
|
||||||
{
|
{
|
||||||
@ -642,6 +672,125 @@ g_hash_table_remove_all_nodes (GHashTable *hash_table,
|
|||||||
g_free (old_hashes);
|
g_free (old_hashes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
realloc_arrays (GHashTable *hash_table, gboolean is_a_set)
|
||||||
|
{
|
||||||
|
hash_table->hashes = g_renew (guint, hash_table->hashes, hash_table->size);
|
||||||
|
hash_table->keys = g_hash_table_realloc_key_or_value_array (hash_table->keys, hash_table->size, hash_table->have_big_keys);
|
||||||
|
|
||||||
|
if (is_a_set)
|
||||||
|
hash_table->values = hash_table->keys;
|
||||||
|
else
|
||||||
|
hash_table->values = g_hash_table_realloc_key_or_value_array (hash_table->values, hash_table->size, hash_table->have_big_values);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* When resizing the table in place, we use a temporary bit array to keep
|
||||||
|
* track of which entries have been assigned a proper location in the new
|
||||||
|
* table layout.
|
||||||
|
*
|
||||||
|
* Each bit corresponds to a bucket. A bit is set if an entry was assigned
|
||||||
|
* its corresponding location during the resize and thus should not be
|
||||||
|
* evicted. The array starts out cleared to zero. */
|
||||||
|
|
||||||
|
static inline gboolean
|
||||||
|
get_status_bit (const guint32 *bitmap, guint index)
|
||||||
|
{
|
||||||
|
return (bitmap[index / 32] >> (index % 32)) & 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
set_status_bit (guint32 *bitmap, guint index)
|
||||||
|
{
|
||||||
|
bitmap[index / 32] |= 1 << (index % 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* By calling dedicated resize functions for sets and maps, we avoid 2x
|
||||||
|
* test-and-branch per key in the inner loop. This yields a small
|
||||||
|
* performance improvement at the cost of a bit of macro gunk. */
|
||||||
|
|
||||||
|
#define DEFINE_RESIZE_FUNC(fname) \
|
||||||
|
static void fname (GHashTable *hash_table, guint old_size, guint32 *reallocated_buckets_bitmap) \
|
||||||
|
{ \
|
||||||
|
guint i; \
|
||||||
|
\
|
||||||
|
for (i = 0; i < old_size; i++) \
|
||||||
|
{ \
|
||||||
|
guint node_hash = hash_table->hashes[i]; \
|
||||||
|
gpointer key, value G_GNUC_UNUSED; \
|
||||||
|
\
|
||||||
|
if (!HASH_IS_REAL (node_hash)) \
|
||||||
|
{ \
|
||||||
|
/* Clear tombstones */ \
|
||||||
|
hash_table->hashes[i] = UNUSED_HASH_VALUE; \
|
||||||
|
continue; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
/* Skip entries relocated through eviction */ \
|
||||||
|
if (get_status_bit (reallocated_buckets_bitmap, i)) \
|
||||||
|
continue; \
|
||||||
|
\
|
||||||
|
hash_table->hashes[i] = UNUSED_HASH_VALUE; \
|
||||||
|
EVICT_KEYVAL (hash_table, i, NULL, NULL, key, value); \
|
||||||
|
\
|
||||||
|
for (;;) \
|
||||||
|
{ \
|
||||||
|
guint hash_val; \
|
||||||
|
guint replaced_hash; \
|
||||||
|
guint step = 0; \
|
||||||
|
\
|
||||||
|
hash_val = g_hash_table_hash_to_index (hash_table, node_hash); \
|
||||||
|
\
|
||||||
|
while (get_status_bit (reallocated_buckets_bitmap, hash_val)) \
|
||||||
|
{ \
|
||||||
|
step++; \
|
||||||
|
hash_val += step; \
|
||||||
|
hash_val &= hash_table->mask; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
set_status_bit (reallocated_buckets_bitmap, hash_val); \
|
||||||
|
\
|
||||||
|
replaced_hash = hash_table->hashes[hash_val]; \
|
||||||
|
hash_table->hashes[hash_val] = node_hash; \
|
||||||
|
if (!HASH_IS_REAL (replaced_hash)) \
|
||||||
|
{ \
|
||||||
|
ASSIGN_KEYVAL (hash_table, hash_val, key, value); \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
node_hash = replaced_hash; \
|
||||||
|
EVICT_KEYVAL (hash_table, hash_val, key, value, key, value); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ASSIGN_KEYVAL(ht, index, key, value) G_STMT_START{ \
|
||||||
|
g_hash_table_assign_key_or_value ((ht)->keys, (index), (ht)->have_big_keys, (key)); \
|
||||||
|
g_hash_table_assign_key_or_value ((ht)->values, (index), (ht)->have_big_values, (value)); \
|
||||||
|
}G_STMT_END
|
||||||
|
|
||||||
|
#define EVICT_KEYVAL(ht, index, key, value, outkey, outvalue) G_STMT_START{ \
|
||||||
|
(outkey) = g_hash_table_evict_key_or_value ((ht)->keys, (index), (ht)->have_big_keys, (key)); \
|
||||||
|
(outvalue) = g_hash_table_evict_key_or_value ((ht)->values, (index), (ht)->have_big_values, (value)); \
|
||||||
|
}G_STMT_END
|
||||||
|
|
||||||
|
DEFINE_RESIZE_FUNC (resize_map)
|
||||||
|
|
||||||
|
#undef ASSIGN_KEYVAL
|
||||||
|
#undef EVICT_KEYVAL
|
||||||
|
|
||||||
|
#define ASSIGN_KEYVAL(ht, index, key, value) G_STMT_START{ \
|
||||||
|
g_hash_table_assign_key_or_value ((ht)->keys, (index), (ht)->have_big_keys, (key)); \
|
||||||
|
}G_STMT_END
|
||||||
|
|
||||||
|
#define EVICT_KEYVAL(ht, index, key, value, outkey, outvalue) G_STMT_START{ \
|
||||||
|
(outkey) = g_hash_table_evict_key_or_value ((ht)->keys, (index), (ht)->have_big_keys, (key)); \
|
||||||
|
}G_STMT_END
|
||||||
|
|
||||||
|
DEFINE_RESIZE_FUNC (resize_set)
|
||||||
|
|
||||||
|
#undef ASSIGN_KEYVAL
|
||||||
|
#undef EVICT_KEYVAL
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* g_hash_table_resize:
|
* g_hash_table_resize:
|
||||||
* @hash_table: our #GHashTable
|
* @hash_table: our #GHashTable
|
||||||
@ -658,54 +807,36 @@ g_hash_table_remove_all_nodes (GHashTable *hash_table,
|
|||||||
static void
|
static void
|
||||||
g_hash_table_resize (GHashTable *hash_table)
|
g_hash_table_resize (GHashTable *hash_table)
|
||||||
{
|
{
|
||||||
gpointer new_keys;
|
guint32 *reallocated_buckets_bitmap;
|
||||||
gpointer new_values;
|
guint old_size;
|
||||||
guint *new_hashes;
|
gboolean is_a_set;
|
||||||
gint old_size;
|
|
||||||
gint i;
|
|
||||||
|
|
||||||
old_size = hash_table->size;
|
old_size = hash_table->size;
|
||||||
|
is_a_set = hash_table->keys == hash_table->values;
|
||||||
|
|
||||||
g_hash_table_set_shift_from_size (hash_table, hash_table->nnodes * 2);
|
g_hash_table_set_shift_from_size (hash_table, hash_table->nnodes * 2);
|
||||||
|
|
||||||
new_keys = g_hash_table_alloc_key_or_value_array (hash_table->size, hash_table->have_big_keys);
|
if (hash_table->size > old_size)
|
||||||
if (hash_table->keys == hash_table->values)
|
{
|
||||||
new_values = new_keys;
|
realloc_arrays (hash_table, is_a_set);
|
||||||
|
memset (&hash_table->hashes[old_size], 0, (hash_table->size - old_size) * sizeof (guint));
|
||||||
|
|
||||||
|
reallocated_buckets_bitmap = g_new0 (guint32, (hash_table->size + 31) / 32);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
new_values = g_hash_table_alloc_key_or_value_array (hash_table->size, hash_table->have_big_values);
|
|
||||||
new_hashes = g_new0 (guint, hash_table->size);
|
|
||||||
|
|
||||||
for (i = 0; i < old_size; i++)
|
|
||||||
{
|
{
|
||||||
guint node_hash = hash_table->hashes[i];
|
reallocated_buckets_bitmap = g_new0 (guint32, (old_size + 31) / 32);
|
||||||
guint hash_val;
|
|
||||||
guint step = 0;
|
|
||||||
|
|
||||||
if (!HASH_IS_REAL (node_hash))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
hash_val = g_hash_table_hash_to_index (hash_table, node_hash);
|
|
||||||
|
|
||||||
while (!HASH_IS_UNUSED (new_hashes[hash_val]))
|
|
||||||
{
|
|
||||||
step++;
|
|
||||||
hash_val += step;
|
|
||||||
hash_val &= hash_table->mask;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
new_hashes[hash_val] = hash_table->hashes[i];
|
if (is_a_set)
|
||||||
g_hash_table_assign_key_or_value (new_keys, hash_val, hash_table->have_big_keys, g_hash_table_fetch_key_or_value (hash_table->keys, i, hash_table->have_big_keys));
|
resize_set (hash_table, old_size, reallocated_buckets_bitmap);
|
||||||
g_hash_table_assign_key_or_value (new_values, hash_val, hash_table->have_big_values, g_hash_table_fetch_key_or_value (hash_table->values, i, hash_table->have_big_values));
|
else
|
||||||
}
|
resize_map (hash_table, old_size, reallocated_buckets_bitmap);
|
||||||
|
|
||||||
if (hash_table->keys != hash_table->values)
|
g_free (reallocated_buckets_bitmap);
|
||||||
g_free (hash_table->values);
|
|
||||||
|
|
||||||
g_free (hash_table->keys);
|
if (hash_table->size < old_size)
|
||||||
g_free (hash_table->hashes);
|
realloc_arrays (hash_table, is_a_set);
|
||||||
|
|
||||||
hash_table->keys = new_keys;
|
|
||||||
hash_table->values = new_values;
|
|
||||||
hash_table->hashes = new_hashes;
|
|
||||||
|
|
||||||
hash_table->noccupied = hash_table->nnodes;
|
hash_table->noccupied = hash_table->nnodes;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user