Fix the implementation of interned refstrings

The global hash table we use for interned strings should not own a
reference on the strings themselves, as otherwise we'd leak them all
over the place.

Instead, it should keep a "weak" reference to them; once the last
strong reference goes away, we drop remove the weak reference from the
hash table.
This commit is contained in:
Emmanuele Bassi 2018-07-03 17:21:38 +01:00
parent 501a8e96e8
commit 4248b4b300

View File

@ -27,6 +27,60 @@
* instead of copying the string among callers; when the last reference on * instead of copying the string among callers; when the last reference on
* the string is released, the resources allocated for it are freed. * the string is released, the resources allocated for it are freed.
* *
* Typically, reference counted strings can be used when parsing data from
* files and storing them into data structures that are passed to various
* callers:
*
* |[<!-- language="C" -->
* PersonDetails *
* person_details_from_data (const char *data)
* {
* // Use g_autoptr() to simplify error cases
* g_autoptr(GRefString) full_name = NULL;
* g_autoptr(GRefString) address = NULL;
* g_autoptr(GRefString) city = NULL;
* g_autoptr(GRefString) state = NULL;
* g_autoptr(GRefString) zip_code = NULL;
*
* // parse_person_details() is defined elsewhere; returns refcounted strings
* if (!parse_person_details (data, &full_name, &address, &city, &state, &zip_code))
* return NULL;
*
* if (!validate_zip_code (zip_code))
* return NULL;
*
* // add_address_to_cache() and add_full_name_to_cache() are defined
* // elsewhere; they add strings to various caches, using refcounted
* // strings to avoid copying data over and over again
* add_address_to_cache (address, city, state, zip_code);
* add_full_name_to_cache (full_name);
*
* // person_details_new() is defined elsewhere; it takes a reference
* // on each string
* PersonDetails *res = person_details_new (full_name,
* address,
* city,
* state,
* zip_code);
*
* return res;
* }
* ]|
*
* In the example above, we have multiple functions taking the same strings
* for different uses; with typical C strings, we'd have to copy the strings
* every time the life time rules of the data differ from the life time of
* the string parsed from the original buffer. With reference counted strings,
* each caller can take a reference on the data, and keep it as long as it
* needs to own the string.
*
* Reference counted strings can also be "interned" inside a global table
* owned by GLib; while an interned string has at least a reference, creating
* a new interned reference counted string with the same contents will return
* a reference to the existing string instead of creating a new reference
* counted string instance. Once the string loses its last reference, it will
* be automatically removed from the global interned strings table.
*
* Since: 2.58 * Since: 2.58
*/ */
@ -41,6 +95,11 @@
#include <string.h> #include <string.h>
/* A global table of refcounted strings; the hash table does not own
* the strings, just a pointer to them. Strings are interned as long
* as they are alive; once their reference count drops to zero, they
* are removed from the table
*/
G_LOCK_DEFINE_STATIC (interned_ref_strings); G_LOCK_DEFINE_STATIC (interned_ref_strings);
static GHashTable *interned_ref_strings; static GHashTable *interned_ref_strings;
@ -71,6 +130,26 @@ g_ref_string_new (const char *str)
return res; return res;
} }
/* interned_str_equal: variant of g_str_equal() that compares
* pointers as well as contents; this avoids running strcmp()
* on arbitrarily long strings, as it's more likely to have
* g_ref_string_new_intern() being called on the same refcounted
* string instance, than on a different string with the same
* contents
*/
static gboolean
interned_str_equal (gconstpointer v1,
gconstpointer v2)
{
const char *str1 = v1;
const char *str2 = v2;
if (v1 == v2)
return TRUE;
return strcmp (str1, str2) == 0;
}
/** /**
* g_ref_string_new_intern: * g_ref_string_new_intern:
* @str: (not nullable): a NUL-terminated string * @str: (not nullable): a NUL-terminated string
@ -82,15 +161,14 @@ g_ref_string_new (const char *str)
* the same contents of @str, it will return a new reference, instead of * the same contents of @str, it will return a new reference, instead of
* creating a new string. * creating a new string.
* *
* Returns: (not nullable): the newly created reference counted string, or * Returns: (transfer full) (not nullable): the newly created reference
* a new reference to an existing string * counted string, or a new reference to an existing string
* *
* Since: 2.58 * Since: 2.58
*/ */
char * char *
g_ref_string_new_intern (const char *str) g_ref_string_new_intern (const char *str)
{ {
gsize len;
char *res; char *res;
g_return_val_if_fail (str != NULL && *str != '\0', NULL); g_return_val_if_fail (str != NULL && *str != '\0', NULL);
@ -98,23 +176,23 @@ g_ref_string_new_intern (const char *str)
G_LOCK (interned_ref_strings); G_LOCK (interned_ref_strings);
if (G_UNLIKELY (interned_ref_strings == NULL)) if (G_UNLIKELY (interned_ref_strings == NULL))
interned_ref_strings = g_hash_table_new_full (g_str_hash, g_str_equal, interned_ref_strings = g_hash_table_new (g_str_hash, interned_str_equal);
(GDestroyNotify) g_atomic_rc_box_release,
NULL);
res = g_hash_table_lookup (interned_ref_strings, str); res = g_hash_table_lookup (interned_ref_strings, str);
if (res != NULL) if (res != NULL)
{ {
/* We acquire the reference while holding the lock, to
* avoid a potential race between releasing the lock on
* the hash table and another thread releasing the reference
* on the same string
*/
g_atomic_rc_box_acquire (res);
G_UNLOCK (interned_ref_strings); G_UNLOCK (interned_ref_strings);
return g_atomic_rc_box_acquire (res); return res;
} }
len = strlen (str); res = g_ref_string_new (str);
res = (char *) g_atomic_rc_box_dup (sizeof (char) * len + 1, str); g_hash_table_add (interned_ref_strings, res);
res[len] = '\0';
g_hash_table_add (interned_ref_strings, g_atomic_rc_box_acquire (res));
G_UNLOCK (interned_ref_strings); G_UNLOCK (interned_ref_strings);
return res; return res;
@ -147,8 +225,7 @@ remove_if_interned (gpointer data)
if (G_LIKELY (interned_ref_strings != NULL)) if (G_LIKELY (interned_ref_strings != NULL))
{ {
if (g_hash_table_contains (interned_ref_strings, str)) g_hash_table_remove (interned_ref_strings, str);
g_hash_table_remove (interned_ref_strings, str);
if (g_hash_table_size (interned_ref_strings) == 0) if (g_hash_table_size (interned_ref_strings) == 0)
g_clear_pointer (&interned_ref_strings, g_hash_table_destroy); g_clear_pointer (&interned_ref_strings, g_hash_table_destroy);