glib/gvdb-reader.c

737 lines
20 KiB
C
Raw Normal View History

2010-04-16 03:26:34 +02:00
/*
* Copyright © 2010 Codethink Limited
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
2010-04-16 03:26:34 +02:00
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
2010-04-16 03:26:34 +02:00
*
* Author: Ryan Lortie <desrt@desrt.ca>
*/
#include "gvdb-reader.h"
#include "gvdb-format.h"
#include <string.h>
struct _GvdbTable {
GBytes *bytes;
2010-04-16 03:26:34 +02:00
const gchar *data;
gsize size;
gboolean byteswapped;
gboolean trusted;
const guint32_le *bloom_words;
2010-04-16 03:26:34 +02:00
guint32 n_bloom_words;
guint bloom_shift;
const guint32_le *hash_buckets;
2010-04-16 03:26:34 +02:00
guint32 n_buckets;
struct gvdb_hash_item *hash_items;
guint32 n_hash_items;
};
static const gchar *
gvdb_table_item_get_key (GvdbTable *file,
const struct gvdb_hash_item *item,
gsize *size)
{
guint32 start, end;
start = guint32_from_le (item->key_start);
*size = guint16_from_le (item->key_size);
end = start + *size;
if G_UNLIKELY (start > end || end > file->size)
return NULL;
return file->data + start;
}
static gconstpointer
gvdb_table_dereference (GvdbTable *file,
const struct gvdb_pointer *pointer,
gint alignment,
gsize *size)
{
guint32 start, end;
start = guint32_from_le (pointer->start);
end = guint32_from_le (pointer->end);
if G_UNLIKELY (start > end || end > file->size || start & (alignment - 1))
return NULL;
*size = end - start;
return file->data + start;
}
static void
gvdb_table_setup_root (GvdbTable *file,
const struct gvdb_pointer *pointer)
{
const struct gvdb_hash_header *header;
guint32 n_bloom_words;
guint32 n_buckets;
gsize size;
header = gvdb_table_dereference (file, pointer, 4, &size);
if G_UNLIKELY (header == NULL || size < sizeof *header)
return;
size -= sizeof *header;
n_bloom_words = guint32_from_le (header->n_bloom_words);
n_buckets = guint32_from_le (header->n_buckets);
n_bloom_words &= (1u << 27) - 1;
if G_UNLIKELY (n_bloom_words * sizeof (guint32_le) > size)
return;
file->bloom_words = (gpointer) (header + 1);
size -= n_bloom_words * sizeof (guint32_le);
file->n_bloom_words = n_bloom_words;
if G_UNLIKELY (n_buckets > G_MAXUINT / sizeof (guint32_le) ||
n_buckets * sizeof (guint32_le) > size)
return;
file->hash_buckets = file->bloom_words + file->n_bloom_words;
size -= n_buckets * sizeof (guint32_le);
file->n_buckets = n_buckets;
if G_UNLIKELY (size % sizeof (struct gvdb_hash_item))
return;
file->hash_items = (gpointer) (file->hash_buckets + n_buckets);
file->n_hash_items = size / sizeof (struct gvdb_hash_item);
}
/**
* gvdb_table_new_from_bytes:
* @bytes: the #GBytes with the data
* @trusted: if the contents of @bytes are trusted
* @error: %NULL, or a pointer to a %NULL #GError
*
* Creates a new #GvdbTable from the contents of @bytes.
*
* This call can fail if the header contained in @bytes is invalid or if @bytes
* is empty; if so, %G_FILE_ERROR_INVAL will be returned.
*
* You should call gvdb_table_free() on the return result when you no
* longer require it.
*
* Returns: a new #GvdbTable
**/
GvdbTable *
gvdb_table_new_from_bytes (GBytes *bytes,
gboolean trusted,
GError **error)
2010-04-16 03:26:34 +02:00
{
const struct gvdb_header *header;
2010-04-16 03:26:34 +02:00
GvdbTable *file;
file = g_slice_new0 (GvdbTable);
file->bytes = g_bytes_ref (bytes);
file->data = g_bytes_get_data (bytes, &file->size);
2010-04-16 03:26:34 +02:00
file->trusted = trusted;
if (file->size < sizeof (struct gvdb_header))
goto invalid;
header = (gpointer) file->data;
if (header->signature[0] == GVDB_SIGNATURE0 &&
header->signature[1] == GVDB_SIGNATURE1 &&
guint32_from_le (header->version) == 0)
file->byteswapped = FALSE;
else if (header->signature[0] == GVDB_SWAPPED_SIGNATURE0 &&
header->signature[1] == GVDB_SWAPPED_SIGNATURE1 &&
guint32_from_le (header->version) == 0)
file->byteswapped = TRUE;
else
goto invalid;
gvdb_table_setup_root (file, &header->root);
2010-04-16 03:26:34 +02:00
return file;
invalid:
g_set_error_literal (error, G_FILE_ERROR, G_FILE_ERROR_INVAL, "invalid gvdb header");
g_bytes_unref (file->bytes);
g_slice_free (GvdbTable, file);
return NULL;
2010-04-16 03:26:34 +02:00
}
/**
* gvdb_table_new:
* @filename: a filename
* @trusted: if the contents of @bytes are trusted
* @error: %NULL, or a pointer to a %NULL #GError
*
* Creates a new #GvdbTable using the #GMappedFile for @filename as the
* #GBytes.
*
* This function will fail if the file cannot be opened.
* In that case, the #GError that is returned will be an error from
* g_mapped_file_new().
*
* An empty or corrupt file will result in %G_FILE_ERROR_INVAL.
*
* Returns: a new #GvdbTable
**/
GvdbTable *
gvdb_table_new (const gchar *filename,
gboolean trusted,
GError **error)
{
GMappedFile *mapped;
GvdbTable *table;
GBytes *bytes;
mapped = g_mapped_file_new (filename, FALSE, error);
if (!mapped)
return NULL;
bytes = g_mapped_file_get_bytes (mapped);
table = gvdb_table_new_from_bytes (bytes, trusted, error);
g_mapped_file_unref (mapped);
g_bytes_unref (bytes);
g_prefix_error (error, "%s: ", filename);
return table;
}
2010-04-16 03:26:34 +02:00
static gboolean
gvdb_table_bloom_filter (GvdbTable *file,
guint32 hash_value)
{
guint32 word, mask;
if (file->n_bloom_words == 0)
return TRUE;
word = (hash_value / 32) % file->n_bloom_words;
mask = 1 << (hash_value & 31);
mask |= 1 << ((hash_value >> file->bloom_shift) & 31);
return (guint32_from_le (file->bloom_words[word]) & mask) == mask;
2010-04-16 03:26:34 +02:00
}
static gboolean
gvdb_table_check_name (GvdbTable *file,
struct gvdb_hash_item *item,
const gchar *key,
guint key_length)
{
const gchar *this_key;
gsize this_size;
guint32 parent;
this_key = gvdb_table_item_get_key (file, item, &this_size);
if G_UNLIKELY (this_key == NULL || this_size > key_length)
return FALSE;
key_length -= this_size;
if G_UNLIKELY (memcmp (this_key, key + key_length, this_size) != 0)
return FALSE;
parent = guint32_from_le (item->parent);
if (key_length == 0 && parent == 0xffffffffu)
2010-04-16 03:26:34 +02:00
return TRUE;
if G_LIKELY (parent < file->n_hash_items && this_size > 0)
return gvdb_table_check_name (file,
&file->hash_items[parent],
key, key_length);
return FALSE;
}
static const struct gvdb_hash_item *
2010-04-16 03:26:34 +02:00
gvdb_table_lookup (GvdbTable *file,
2010-04-28 19:39:02 +02:00
const gchar *key,
gchar type)
2010-04-16 03:26:34 +02:00
{
guint32 hash_value = 5381;
guint key_length;
guint32 bucket;
guint32 lastno;
guint32 itemno;
if G_UNLIKELY (file->n_buckets == 0 || file->n_hash_items == 0)
return NULL;
for (key_length = 0; key[key_length]; key_length++)
hash_value = (hash_value * 33) + ((signed char *) key)[key_length];
2010-04-16 03:26:34 +02:00
if (!gvdb_table_bloom_filter (file, hash_value))
return NULL;
bucket = hash_value % file->n_buckets;
itemno = guint32_from_le (file->hash_buckets[bucket]);
2010-04-16 03:26:34 +02:00
if (bucket == file->n_buckets - 1 ||
(lastno = guint32_from_le(file->hash_buckets[bucket + 1])) > file->n_hash_items)
2010-04-16 03:26:34 +02:00
lastno = file->n_hash_items;
while G_LIKELY (itemno < lastno)
{
struct gvdb_hash_item *item = &file->hash_items[itemno];
if (hash_value == guint32_from_le (item->hash_value))
if G_LIKELY (gvdb_table_check_name (file, item, key, key_length))
if G_LIKELY (item->type == type)
return item;
itemno++;
}
return NULL;
}
2010-04-28 19:39:02 +02:00
static gboolean
gvdb_table_list_from_item (GvdbTable *table,
const struct gvdb_hash_item *item,
const guint32_le **list,
guint *length)
{
gsize size;
*list = gvdb_table_dereference (table, &item->value.pointer, 4, &size);
if G_LIKELY (*list == NULL || size % 4)
return FALSE;
*length = size / 4;
return TRUE;
}
/**
* gvdb_table_get_names:
* @table: a #GvdbTable
* @length: (optional): the number of items returned, or %NULL
*
* Gets a list of all names contained in @table.
*
* No call to gvdb_table_get_table(), gvdb_table_list() or
* gvdb_table_get_value() will succeed unless it is for one of the
* names returned by this function.
*
* Note that some names that are returned may still fail for all of the
* above calls in the case of the corrupted file. Note also that the
* returned strings may not be utf8.
*
* Returns: (array length=length): a %NULL-terminated list of strings, of length @length
**/
gchar **
gvdb_table_get_names (GvdbTable *table,
gsize *length)
{
gchar **names;
guint n_names;
guint filled;
guint total;
guint i;
/* We generally proceed by iterating over the list of items in the
* hash table (in order of appearance) recording them into an array.
*
* Each item has a parent item (except root items). The parent item
* forms part of the name of the item. We could go fetching the
* parent item chain at the point that we encounter each item but then
* we would need to implement some sort of recursion along with checks
* for self-referential items.
*
* Instead, we do a number of passes. Each pass will build up one
* level of names (starting from the root). We continue to do passes
* until no more items are left. The first pass will only add root
* items and each further pass will only add items whose direct parent
* is an item added in the immediately previous pass. It's also
* possible that items get filled if they follow their parent within a
* particular pass.
*
* At most we will have a number of passes equal to the depth of the
* tree. Self-referential items will never be filled in (since their
* parent will have never been filled in). We continue until we have
* a pass that fills in no additional items.
*
* This takes an O(n) algorithm and turns it into O(n*m) where m is
* the depth of the tree, but in all sane cases the tree won't be very
* deep and the constant factor of this algorithm is lower (and the
* complexity of coding it, as well).
*/
n_names = table->n_hash_items;
names = g_new0 (gchar *, n_names + 1);
/* 'names' starts out all-NULL. On each pass we record the number
* of items changed from NULL to non-NULL in 'filled' so we know if we
* should repeat the loop. 'total' counts the total number of items
* filled. If 'total' ends up equal to 'n_names' then we know that
* 'names' has been completely filled.
*/
total = 0;
do
{
/* Loop until we have filled no more entries */
filled = 0;
for (i = 0; i < n_names; i++)
{
const struct gvdb_hash_item *item = &table->hash_items[i];
const gchar *name;
gsize name_length;
guint32 parent;
/* already got it on a previous pass */
if (names[i] != NULL)
continue;
parent = guint32_from_le (item->parent);
if (parent == 0xffffffffu)
{
/* it's a root item */
name = gvdb_table_item_get_key (table, item, &name_length);
if (name != NULL)
{
names[i] = g_strndup (name, name_length);
filled++;
}
}
else if (parent < n_names && names[parent] != NULL)
{
/* It's a non-root item whose parent was filled in already.
*
* Calculate the name of this item by combining it with
* its parent name.
*/
name = gvdb_table_item_get_key (table, item, &name_length);
if (name != NULL)
{
const gchar *parent_name = names[parent];
gsize parent_length;
gchar *fullname;
parent_length = strlen (parent_name);
fullname = g_malloc (parent_length + name_length + 1);
memcpy (fullname, parent_name, parent_length);
memcpy (fullname + parent_length, name, name_length);
fullname[parent_length + name_length] = '\0';
names[i] = fullname;
filled++;
}
}
}
total += filled;
}
while (filled && total < n_names);
/* If the table was corrupted then 'names' may have holes in it.
* Collapse those.
*/
if G_UNLIKELY (total != n_names)
{
GPtrArray *fixed_names;
fixed_names = g_ptr_array_sized_new (n_names + 1 /* NULL terminator */);
for (i = 0; i < n_names; i++)
if (names[i] != NULL)
g_ptr_array_add (fixed_names, names[i]);
g_free (names);
n_names = fixed_names->len;
g_ptr_array_add (fixed_names, NULL);
names = (gchar **) g_ptr_array_free (fixed_names, FALSE);
}
if (length)
{
G_STATIC_ASSERT (sizeof (*length) >= sizeof (n_names));
*length = n_names;
}
return names;
}
2010-04-28 19:39:02 +02:00
2010-04-16 03:26:34 +02:00
/**
* gvdb_table_list:
* @file: a #GvdbTable
* @key: a string
*
* List all of the keys that appear below @key. The nesting of keys
* within the hash file is defined by the program that created the hash
* file. One thing is constant: each item in the returned array can be
* concatenated to @key to obtain the full name of that key.
*
* It is not possible to tell from this function if a given key is
* itself a path, a value, or another hash table; you are expected to
* know this for yourself.
*
* You should call g_strfreev() on the return result when you no longer
* require it.
*
* Returns: a %NULL-terminated string array
2010-04-16 03:26:34 +02:00
**/
gchar **
gvdb_table_list (GvdbTable *file,
const gchar *key)
{
const struct gvdb_hash_item *item;
const guint32_le *list;
gchar **strv;
2010-04-28 19:39:02 +02:00
guint length;
guint i;
2010-04-16 03:26:34 +02:00
if ((item = gvdb_table_lookup (file, key, 'L')) == NULL)
return NULL;
2010-04-28 19:39:02 +02:00
if (!gvdb_table_list_from_item (file, item, &list, &length))
2010-04-16 03:26:34 +02:00
return NULL;
2010-04-28 19:39:02 +02:00
strv = g_new (gchar *, length + 1);
for (i = 0; i < length; i++)
2010-04-16 03:26:34 +02:00
{
guint32 itemno = guint32_from_le (list[i]);
if (itemno < file->n_hash_items)
{
const struct gvdb_hash_item *item;
const gchar *string;
gsize strsize;
item = file->hash_items + itemno;
string = gvdb_table_item_get_key (file, item, &strsize);
if (string != NULL)
strv[i] = g_strndup (string, strsize);
else
strv[i] = g_malloc0 (1);
}
else
strv[i] = g_malloc0 (1);
}
strv[i] = NULL;
return strv;
}
/**
* gvdb_table_has_value:
* @file: a #GvdbTable
* @key: a string
*
* Checks for a value named @key in @file.
*
* Note: this function does not consider non-value nodes (other hash
* tables, for example).
*
* Returns: %TRUE if @key is in the table
2010-04-16 03:26:34 +02:00
**/
gboolean
gvdb_table_has_value (GvdbTable *file,
const gchar *key)
{
static const struct gvdb_hash_item *item;
gsize size;
item = gvdb_table_lookup (file, key, 'v');
if (item == NULL)
return FALSE;
return gvdb_table_dereference (file, &item->value.pointer, 8, &size) != NULL;
2010-04-16 03:26:34 +02:00
}
2010-04-28 19:39:02 +02:00
static GVariant *
gvdb_table_value_from_item (GvdbTable *table,
const struct gvdb_hash_item *item)
{
GVariant *variant, *value;
gconstpointer data;
GBytes *bytes;
2010-04-28 19:39:02 +02:00
gsize size;
data = gvdb_table_dereference (table, &item->value.pointer, 8, &size);
if G_UNLIKELY (data == NULL)
return NULL;
bytes = g_bytes_new_from_bytes (table->bytes, ((gchar *) data) - table->data, size);
variant = g_variant_new_from_bytes (G_VARIANT_TYPE_VARIANT, bytes, table->trusted);
2010-04-28 19:39:02 +02:00
value = g_variant_get_variant (variant);
g_variant_unref (variant);
g_bytes_unref (bytes);
2010-04-28 19:39:02 +02:00
return value;
}
2010-04-16 03:26:34 +02:00
/**
* gvdb_table_get_value:
* @file: a #GvdbTable
* @key: a string
*
* Looks up a value named @key in @file.
*
* If the value is not found then %NULL is returned. Otherwise, a new
* #GVariant instance is returned. The #GVariant does not depend on the
* continued existence of @file.
*
* You should call g_variant_unref() on the return result when you no
* longer require it.
*
* Returns: a #GVariant, or %NULL
2010-04-16 03:26:34 +02:00
**/
GVariant *
gvdb_table_get_value (GvdbTable *file,
2010-06-10 19:34:56 +02:00
const gchar *key)
2010-04-16 03:26:34 +02:00
{
const struct gvdb_hash_item *item;
2010-06-10 21:27:46 +02:00
GVariant *value;
2010-04-16 03:26:34 +02:00
if ((item = gvdb_table_lookup (file, key, 'v')) == NULL)
return NULL;
2010-06-10 21:27:46 +02:00
value = gvdb_table_value_from_item (file, item);
if (value && file->byteswapped)
{
GVariant *tmp;
tmp = g_variant_byteswap (value);
g_variant_unref (value);
value = tmp;
}
return value;
2010-04-16 03:26:34 +02:00
}
/**
* gvdb_table_get_raw_value:
* @table: a #GvdbTable
* @key: a string
*
* Looks up a value named @key in @file.
*
* This call is equivalent to gvdb_table_get_value() except that it
* never byteswaps the value.
*
* Returns: a #GVariant, or %NULL
**/
GVariant *
gvdb_table_get_raw_value (GvdbTable *table,
const gchar *key)
{
const struct gvdb_hash_item *item;
if ((item = gvdb_table_lookup (table, key, 'v')) == NULL)
return NULL;
return gvdb_table_value_from_item (table, item);
}
2010-04-16 03:26:34 +02:00
/**
* gvdb_table_get_table:
* @file: a #GvdbTable
* @key: a string
*
* Looks up the hash table named @key in @file.
*
* The toplevel hash table in a #GvdbTable can contain reference to
* child hash tables (and those can contain further references...).
*
* If @key is not found in @file then %NULL is returned. Otherwise, a
* new #GvdbTable is returned, referring to the child hashtable as
* contained in the file. This newly-created #GvdbTable does not depend
* on the continued existence of @file.
*
* You should call gvdb_table_free() on the return result when you no
2010-04-16 03:26:34 +02:00
* longer require it.
*
* Returns: a new #GvdbTable, or %NULL
2010-04-16 03:26:34 +02:00
**/
GvdbTable *
gvdb_table_get_table (GvdbTable *file,
const gchar *key)
{
const struct gvdb_hash_item *item;
GvdbTable *new;
item = gvdb_table_lookup (file, key, 'H');
if (item == NULL)
return NULL;
new = g_slice_new0 (GvdbTable);
new->bytes = g_bytes_ref (file->bytes);
2010-04-16 03:26:34 +02:00
new->byteswapped = file->byteswapped;
new->trusted = file->trusted;
new->data = file->data;
new->size = file->size;
gvdb_table_setup_root (new, &item->value.pointer);
return new;
}
/**
* gvdb_table_free:
2010-04-16 03:26:34 +02:00
* @file: a #GvdbTable
*
* Frees @file.
2010-04-16 03:26:34 +02:00
**/
void
gvdb_table_free (GvdbTable *file)
2010-04-16 03:26:34 +02:00
{
g_bytes_unref (file->bytes);
g_slice_free (GvdbTable, file);
2010-04-16 03:26:34 +02:00
}
2010-04-28 19:39:02 +02:00
/**
* gvdb_table_is_valid:
* @table: a #GvdbTable
*
* Checks if the table is still valid.
*
* An on-disk GVDB can be marked as invalid. This happens when the file
* has been replaced. The appropriate action is typically to reopen the
* file.
*
* Returns: %TRUE if @table is still valid
**/
gboolean
gvdb_table_is_valid (GvdbTable *table)
{
return !!*table->data;
}