diff --git a/gitypelib-internal.h b/gitypelib-internal.h index de2f131b6..227d6da29 100644 --- a/gitypelib-internal.h +++ b/gitypelib-internal.h @@ -1145,6 +1145,23 @@ gboolean g_typelib_validate (GITypelib *typelib, AttributeBlob *_attribute_blob_find_first (GIBaseInfo *info, guint32 blob_offset); +typedef struct _GITypelibHashBuilder GITypelibHashBuilder; + +GITypelibHashBuilder * _gi_typelib_hash_builder_new (void); + +void _gi_typelib_hash_builder_add_string (GITypelibHashBuilder *builder, const char *str, guint16 value); + +gboolean _gi_typelib_hash_builder_prepare (GITypelibHashBuilder *builder); + +guint32 _gi_typelib_hash_builder_get_buffer_size (GITypelibHashBuilder *builder); + +void _gi_typelib_hash_builder_pack (GITypelibHashBuilder *builder, guint8* mem, guint32 size); + +void _gi_typelib_hash_builder_destroy (GITypelibHashBuilder *builder); + +guint16 _gi_typelib_hash_search (guint8* memory, const char *str); + + G_END_DECLS #endif /* __G_TYPELIB_H__ */ diff --git a/gthash-test.c b/gthash-test.c new file mode 100644 index 000000000..a2e3cf82b --- /dev/null +++ b/gthash-test.c @@ -0,0 +1,65 @@ +/* GObject introspection: Test typelib hashing + * + * Copyright (C) 2010 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include +#include "gitypelib-internal.h" + +static void +test_build_retrieve (void) +{ + GITypelibHashBuilder *builder; + guint32 bufsize; + guint8* buf; + + builder = _gi_typelib_hash_builder_new (); + + _gi_typelib_hash_builder_add_string (builder, "Action", 0); + _gi_typelib_hash_builder_add_string (builder, "ZLibDecompressor", 42); + _gi_typelib_hash_builder_add_string (builder, "VolumeMonitor", 9); + _gi_typelib_hash_builder_add_string (builder, "FileMonitorFlags", 31); + + if (!_gi_typelib_hash_builder_prepare (builder)) + g_assert_not_reached (); + + bufsize = _gi_typelib_hash_builder_get_buffer_size (builder); + + buf = g_malloc (bufsize); + + _gi_typelib_hash_builder_pack (builder, buf, bufsize); + + _gi_typelib_hash_builder_destroy (builder); + + g_assert (_gi_typelib_hash_search (buf, "Action") == 0); + g_assert (_gi_typelib_hash_search (buf, "ZLibDecompressor") == 42); + g_assert (_gi_typelib_hash_search (buf, "VolumeMonitor") == 9); + g_assert (_gi_typelib_hash_search (buf, "FileMonitorFlags") == 31); +} + +int +main(int argc, char **argv) +{ + g_type_init (); + g_test_init (&argc, &argv, NULL); + + g_test_add_func ("/gthash/build-retrieve", test_build_retrieve); + + return g_test_run (); +} + diff --git a/gthash.c b/gthash.c new file mode 100644 index 000000000..5042ebe3e --- /dev/null +++ b/gthash.c @@ -0,0 +1,210 @@ +/* GObject introspection: Typelib hashing + * + * Copyright (C) 2010 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include +#include +#include + +#include "cmph/cmph.h" +#include "gitypelib-internal.h" + +#define ALIGN_VALUE(this, boundary) \ + (( ((unsigned long)(this)) + (((unsigned long)(boundary)) -1)) & (~(((unsigned long)(boundary))-1))) + +/** + * String hashing in the typelib. We have a set of static (fixed) strings, + * and given one, we need to find its index number. This problem is perfect + * hashing: http://en.wikipedia.org/wiki/Perfect_hashing + * + * I chose CMPH (http://cmph.sourceforge.net/) as it seemed high + * quality, well documented, and easy to embed. + * + * CMPH provides a number of algorithms; I chose BDZ, because while CHD + * appears to be the "best", the simplicitly of BDZ appealed, and really, + * we're only talking about thousands of strings here, not millions, so + * a few microseconds is no big deal. + * + * In memory, the format is: + * INT32 mph_size + * MPH (mph_size bytes) + * (padding for alignment to uint32 if necessary) + * INDEX (array of guint16) + * + * Because BDZ is not order preserving, we need a lookaside table which + * maps the hash value into the directory index. + */ + +struct _GITypelibHashBuilder { + gboolean prepared; + gboolean buildable; + cmph_t *c; + GHashTable *strings; + guint32 dirmap_offset; + guint32 packed_size; +}; + +GITypelibHashBuilder * +_gi_typelib_hash_builder_new (void) +{ + GITypelibHashBuilder *builder = g_slice_new0 (GITypelibHashBuilder); + builder->c = NULL; + builder->strings = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL); + return builder; +} + +void +_gi_typelib_hash_builder_add_string (GITypelibHashBuilder *builder, + const char *str, + guint16 value) +{ + g_return_if_fail (builder->c == NULL); + g_hash_table_insert (builder->strings, g_strdup (str), GUINT_TO_POINTER ((guint) value)); +} + +gboolean +_gi_typelib_hash_builder_prepare (GITypelibHashBuilder *builder) +{ + char **strs; + GHashTableIter hashiter; + gpointer key, value; + cmph_io_adapter_t *io; + cmph_config_t *config; + guint32 num_elts; + guint32 offset; + guint i; + + if (builder->prepared) + return builder->buildable; + g_assert (builder->c == NULL); + + num_elts = g_hash_table_size (builder->strings); + g_assert (num_elts <= 65536); + + strs = (char**) g_new (char *, num_elts + 1); + + i = 0; + g_hash_table_iter_init (&hashiter, builder->strings); + while (g_hash_table_iter_next (&hashiter, &key, &value)) + { + const char *str = key; + + strs[i++] = g_strdup (str); + } + strs[i++] = NULL; + + io = cmph_io_vector_adapter (strs, num_elts); + config = cmph_config_new (io); + cmph_config_set_algo (config, CMPH_BDZ); + + builder->c = cmph_new (config); + builder->prepared = TRUE; + if (!builder->c) + { + builder->buildable = FALSE; + goto out; + } + builder->buildable = TRUE; + g_assert (cmph_size (builder->c) == num_elts); + + /* Pack a size counter at front */ + offset = sizeof(guint32) + cmph_packed_size (builder->c); + builder->dirmap_offset = ALIGN_VALUE (offset, 4); + builder->packed_size = builder->dirmap_offset + (num_elts * sizeof(guint16)); + out: + return builder->buildable; +} + +guint32 +_gi_typelib_hash_builder_get_buffer_size (GITypelibHashBuilder *builder) +{ + g_return_val_if_fail (builder != NULL, 0); + g_return_val_if_fail (builder->prepared, 0); + g_return_val_if_fail (builder->buildable, 0 ); + + return builder->packed_size; +} + +void +_gi_typelib_hash_builder_pack (GITypelibHashBuilder *builder, guint8* mem, guint32 len) +{ + guint16 *table; + GHashTableIter hashiter; + gpointer key, value; + guint32 num_elts; + guint8 *packed_mem; + + g_return_if_fail (builder != NULL); + g_return_if_fail (builder->prepared); + g_return_if_fail (builder->buildable); + + g_assert (len >= builder->packed_size); + g_assert ((((unsigned long)mem) & 0x3) == 0); + + *((guint32*) mem) = builder->dirmap_offset; + packed_mem = (guint8*)(mem + sizeof(guint32)); + cmph_pack (builder->c, packed_mem); + + table = (guint16*) (mem + builder->dirmap_offset); + + num_elts = g_hash_table_size (builder->strings); + g_hash_table_iter_init (&hashiter, builder->strings); + while (g_hash_table_iter_next (&hashiter, &key, &value)) + { + const char *str = key; + guint16 strval = (guint16)GPOINTER_TO_UINT(value); + guint32 hashv; + + hashv = cmph_search_packed (packed_mem, str, strlen (str)); + g_assert (hashv >= 0 && hashv < num_elts); + table[hashv] = strval; + } +} + +void +_gi_typelib_hash_builder_destroy (GITypelibHashBuilder *builder) +{ + if (builder->c) + { + cmph_destroy (builder->c); + builder->c = NULL; + } + g_hash_table_destroy (builder->strings); + g_slice_free (GITypelibHashBuilder, builder); +} + +guint16 +_gi_typelib_hash_search (guint8* memory, const char *str) +{ + guint32 *mph; + guint16 *table; + guint32 dirmap_offset; + guint32 offset; + + g_assert ((((unsigned long)memory) & 0x3) == 0); + mph = ((guint32*)memory)+1; + + offset = cmph_search_packed (mph, str, strlen (str)); + + dirmap_offset = *((guint32*)memory); + table = (guint16*) (memory + dirmap_offset); + + return table[offset]; +} +