From a4bcff5ba36115495994e9f9ba66074471de76ab Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 26 Oct 2012 03:24:03 +0200 Subject: [PATCH] journal: introduce entry array chain cache When traversing entry array chains for a bisection or for retrieving an item by index we previously always started at the beginning of the chain. Since we tend to look at the same chains repeatedly, let's cache where we have been the last time, and maybe we can skip ahead with this the next time. This turns most bisections and index lookups from O(log(n)*log(n)) into O(log(n)). More importantly however, we seek around on disk much less, which is good to reduce buffer cache and seek times on rotational disks. --- .gitignore | 1 + Makefile.am | 9 ++++ src/journal/journal-file.c | 109 +++++++++++++++++++++++++++++++++++++--- src/journal/journal-file.h | 3 ++ src/journal/test-journal-enum.c | 53 +++++++++++++++++++ src/shared/hashmap.c | 19 +++++++ src/shared/hashmap.h | 3 ++ 7 files changed, 189 insertions(+), 8 deletions(-) create mode 100644 src/journal/test-journal-enum.c diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index edf8e7d..6c9deac 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -65,6 +65,9 @@ /* n_data was the first entry we added after the initial file format design */ #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data)) +/* How many entries to keep in the entry array chain cache at max */ +#define CHAIN_CACHE_MAX 20 + void journal_file_close(JournalFile *f) { assert(f); @@ -97,6 +100,8 @@ void journal_file_close(JournalFile *f) { if (f->mmap) mmap_cache_unref(f->mmap); + hashmap_free_free(f->chain_cache); + #ifdef HAVE_XZ free(f->compress_buffer); #endif @@ -1307,37 +1312,89 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st return r; } +typedef struct ChainCacheItem { + uint64_t first; /* the array at the begin of the chain */ + uint64_t array; /* the cached array */ + uint64_t begin; /* the first item in the cached array */ + uint64_t total; /* the total number of items in all arrays before this one in the chain */ +} ChainCacheItem; + +static void chain_cache_put( + Hashmap *h, + ChainCacheItem *ci, + uint64_t first, + uint64_t array, + uint64_t begin, + uint64_t total) { + + if (!ci) { + if (hashmap_size(h) >= CHAIN_CACHE_MAX) + ci = hashmap_steal_first(h); + else { + ci = new(ChainCacheItem, 1); + if (!ci) + return; + } + + ci->first = first; + + if (hashmap_put(h, &ci->first, ci) < 0) { + free(ci); + return; + } + } else + assert(ci->first == first); + + ci->array = array; + ci->begin = begin; + ci->total = total; +} + static int generic_array_get(JournalFile *f, uint64_t first, uint64_t i, Object **ret, uint64_t *offset) { Object *o; - uint64_t p = 0, a; + uint64_t p = 0, a, t = 0; int r; + ChainCacheItem *ci; assert(f); a = first; + + /* Try the chain cache first */ + ci = hashmap_get(f->chain_cache, &first); + if (ci && i > ci->total) { + a = ci->array; + i -= ci->total; + t = ci->total; + } + while (a > 0) { - uint64_t n; + uint64_t k; r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); if (r < 0) return r; - n = journal_file_entry_array_n_items(o); - if (i < n) { + k = journal_file_entry_array_n_items(o); + if (i < k) { p = le64toh(o->entry_array.items[i]); - break; + goto found; } - i -= n; + i -= k; + t += k; a = le64toh(o->entry_array.next_entry_array_offset); } - if (a <= 0 || p <= 0) - return 0; + return 0; + +found: + /* Let's cache this item for the next invocation */ + chain_cache_put(f->chain_cache, ci, first, a, o->entry_array.items[0], t); r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o); if (r < 0) @@ -1401,11 +1458,38 @@ static int generic_array_bisect(JournalFile *f, bool subtract_one = false; Object *o, *array = NULL; int r; + ChainCacheItem *ci; assert(f); assert(test_object); + /* Start with the first array in the chain */ a = first; + + ci = hashmap_get(f->chain_cache, &first); + if (ci && n > ci->total) { + /* Ah, we have iterated this bisection array chain + * previously! Let's see if we can skip ahead in the + * chain, as far as the last time. But we can't jump + * backwards in the chain, so let's check that + * first. */ + + r = test_object(f, ci->begin, needle); + if (r < 0) + return r; + + if (r == TEST_LEFT) { + /* OK, what we are looking for is right of th + * begin of this EntryArray, so let's jump + * straight to previously cached array in the + * chain */ + + a = ci->array; + n -= ci->total; + t = ci->total; + } + } + while (a > 0) { uint64_t left, right, k, lp; @@ -1486,6 +1570,9 @@ found: if (subtract_one && t == 0 && i == 0) return 0; + /* Let's cache this item for the next invocation */ + chain_cache_put(f->chain_cache, ci, first, a, array->entry_array.items[0], t); + if (subtract_one && i == 0) p = last_p; else if (subtract_one) @@ -2265,6 +2352,12 @@ int journal_file_open( goto fail; } + f->chain_cache = hashmap_new(uint64_hash_func, uint64_compare_func); + if (!f->chain_cache) { + r = -ENOMEM; + goto fail; + } + f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode); if (f->fd < 0) { r = -errno; diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index d87cbe4..cdbc8e4 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -33,6 +33,7 @@ #include "journal-def.h" #include "util.h" #include "mmap-cache.h" +#include "hashmap.h" typedef struct JournalMetrics { uint64_t max_use; @@ -64,6 +65,8 @@ typedef struct JournalFile { JournalMetrics metrics; MMapCache *mmap; + Hashmap *chain_cache; + #ifdef HAVE_XZ void *compress_buffer; uint64_t compress_buffer_size; diff --git a/src/shared/hashmap.c b/src/shared/hashmap.c index ef78070..dcfbb67 100644 --- a/src/shared/hashmap.c +++ b/src/shared/hashmap.c @@ -147,6 +147,25 @@ int trivial_compare_func(const void *a, const void *b) { return a < b ? -1 : (a > b ? 1 : 0); } +unsigned uint64_hash_func(const void *p) { + uint64_t u; + + assert_cc(sizeof(uint64_t) == 2*sizeof(unsigned)); + + u = *(const uint64_t*) p; + + return (unsigned) ((u >> 32) ^ u); +} + +int uint64_compare_func(const void *_a, const void *_b) { + uint64_t a, b; + + a = *(const uint64_t*) _a; + b = *(const uint64_t*) _b; + + return a < b ? -1 : (a > b ? 1 : 0); +} + Hashmap *hashmap_new(hash_func_t hash_func, compare_func_t compare_func) { bool b; Hashmap *h; diff --git a/src/shared/hashmap.h b/src/shared/hashmap.h index 55dea0a..6fd71cf 100644 --- a/src/shared/hashmap.h +++ b/src/shared/hashmap.h @@ -44,6 +44,9 @@ int string_compare_func(const void *a, const void *b); unsigned trivial_hash_func(const void *p); int trivial_compare_func(const void *a, const void *b); +unsigned uint64_hash_func(const void *p); +int uint64_compare_func(const void *a, const void *b); + Hashmap *hashmap_new(hash_func_t hash_func, compare_func_t compare_func); void hashmap_free(Hashmap *h); void hashmap_free_free(Hashmap *h); -- 1.8.1.1 From 34741aa3e2ee1e67a4cc735b7492aec13f0d822c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 26 Oct 2012 20:25:36 +0200 Subject: [PATCH] journal: special case the trivial cache chain cache entry --- src/journal/journal-file.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 6c9deac..3df099d 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -1328,6 +1328,11 @@ static void chain_cache_put( uint64_t total) { if (!ci) { + /* If the chain item to cache for this chain is the + * first one it's not worth caching anything */ + if (array == first) + return; + if (hashmap_size(h) >= CHAIN_CACHE_MAX) ci = hashmap_steal_first(h); else { -- 1.8.1.1