GHash: introduce a "set" mode

Make hash tables start out in a mode in which they don't store
values at all, until the first insertion of a non-identical
key-value pair.

This reduces memory requirements by 1/3 when using hash tables
to store sets.

Based on a patch by Morten Welinder,
https://bugzilla.gnome.org/show_bug.cgi?id=644437
This commit is contained in:
Matthias Clasen 2011-04-30 22:28:34 -04:00
parent 0ae6bc3aaa
commit be991170fa
2 changed files with 103 additions and 6 deletions

View File

@ -34,6 +34,7 @@
#include "ghash.h"
#include "gstrfuncs.h"
#include "gatomic.h"
#include "gtestutils.h"
@ -69,6 +70,9 @@
* To lookup a value corresponding to a given key, use
* g_hash_table_lookup() and g_hash_table_lookup_extended().
*
* g_hash_table_lookup_extended() can also be used to simply
* check if a key is present in the hash table.
*
* To remove a key and value, use g_hash_table_remove().
*
* To call a function for each key and value pair use
@ -76,7 +80,49 @@
* key/value pairs in the hash table, see #GHashTableIter.
*
* To destroy a #GHashTable use g_hash_table_destroy().
**/
*
* <example>
* <title>Using a GHashTable as a set</title>
* <para>
* A common use-case for hash tables is to store information about
* a set of keys, without associating any particular value with each
* key. GHashTable optimizes one way of doing so: If you store only
* key-value pairs where key == value, then GHashTable does not
* allocate memory to store the values, which can be a considerable
* space saving, if your set is large.
* </para>
* <programlisting>
* GHashTable *
* set_new (GHashFunc hash_func,
* GEqualFunc equal_func,
* GDestroyNotify destroy)
* {
* return g_hash_table_new_full (hash_func, equal_func, destroy);
* }
*
* void
* set_insert (GHashTable *set,
* gpointer element)
* {
* g_hash_table_insert (set, element, element);
* }
*
* gboolean
* set_contains (GHashTable *set,
* gpointer element)
* {
* return g_hash_table_lookup_extended (set, element, NULL, NULL);
* }
*
* gboolean
* set_remove (GHashTable *set,
* gpointer element)
* {
* return g_hash_table_remove (set, element);
* }
* </programlisting>
* </example>
*/
/**
* GHashTable:
@ -459,8 +505,11 @@ g_hash_table_resize (GHashTable *hash_table)
old_size = hash_table->size;
g_hash_table_set_shift_from_size (hash_table, hash_table->nnodes * 2);
new_keys = g_new0 (gpointer, hash_table->size);
new_values = g_new0 (gpointer, hash_table->size);
new_keys = g_new0 (gpointer, hash_table->size);
if (hash_table->keys == hash_table->values)
new_values = new_keys;
else
new_values = g_new0 (gpointer, hash_table->size);
new_hashes = g_new0 (guint, hash_table->size);
for (i = 0; i < old_size; i++)
@ -486,8 +535,10 @@ g_hash_table_resize (GHashTable *hash_table)
new_values[hash_val] = hash_table->values[i];
}
if (hash_table->keys != hash_table->values)
g_free (hash_table->values);
g_free (hash_table->keys);
g_free (hash_table->values);
g_free (hash_table->hashes);
hash_table->keys = new_keys;
@ -582,7 +633,7 @@ g_hash_table_new_full (GHashFunc hash_func,
hash_table->key_destroy_func = key_destroy_func;
hash_table->value_destroy_func = value_destroy_func;
hash_table->keys = g_new0 (gpointer, hash_table->size);
hash_table->values = g_new0 (gpointer, hash_table->size);
hash_table->values = hash_table->keys;
hash_table->hashes = g_new0 (guint, hash_table->size);
return hash_table;
@ -793,8 +844,9 @@ g_hash_table_unref (GHashTable *hash_table)
if (g_atomic_int_exchange_and_add (&hash_table->ref_count, -1) - 1 == 0)
{
g_hash_table_remove_all_nodes (hash_table, TRUE);
if (hash_table->keys != hash_table->values)
g_free (hash_table->values);
g_free (hash_table->keys);
g_free (hash_table->values);
g_free (hash_table->hashes);
g_slice_free (GHashTable, hash_table);
}
@ -921,6 +973,9 @@ g_hash_table_insert_internal (GHashTable *hash_table,
g_return_if_fail (hash_table != NULL);
g_return_if_fail (hash_table->ref_count > 0);
if (G_UNLIKELY (hash_table->keys == hash_table->values && key != value))
hash_table->values = g_memdup (hash_table->keys, sizeof (gpointer) * hash_table->size);
node_index = g_hash_table_lookup_node (hash_table, key, &key_hash);
old_hash = hash_table->hashes[node_index];

View File

@ -486,6 +486,47 @@ string_hash_test (void)
g_hash_table_destroy (h);
}
static void
set_check (gpointer key,
gpointer value,
gpointer user_data)
{
int *pi = user_data;
if (key != value)
g_assert_not_reached ();
g_assert_cmpint (atoi (key) % 7, ==, 2);
(*pi)++;
}
static void
set_hash_test (void)
{
GHashTable *hash_table =
g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
int i;
for (i = 2; i < 5000; i += 7)
{
char *s = g_strdup_printf ("%d", i);
g_hash_table_insert (hash_table, s, s);
}
i = 0;
g_hash_table_foreach (hash_table, set_check, &i);
g_assert_cmpint (i, ==, g_hash_table_size (hash_table));
/* this will cause the hash table to loose set nature */
g_hash_table_insert (hash_table, g_strdup ("a"), "b");
g_assert_cmpstr (g_hash_table_lookup (hash_table, "2"), ==, "2");
g_assert_cmpstr (g_hash_table_lookup (hash_table, "a"), ==, "b");
g_hash_table_destroy (hash_table);
}
static void
test_hash_misc (void)
{
@ -706,6 +747,7 @@ main (int argc, char *argv[])
g_test_add_func ("/hash/int64", int64_hash_test);
g_test_add_func ("/hash/double", double_hash_test);
g_test_add_func ("/hash/string", string_hash_test);
g_test_add_func ("/hash/set", set_hash_test);
g_test_add_func ("/hash/ref", test_hash_ref);
g_test_add_func ("/hash/remove-all", test_remove_all);