SHA256
1
0
forked from pool/apptainer
apptainer/70.patch
Christian Goll 879ec698d0 Accepting request 1003468 from home:mslacken:pr
- Updated to version 1.1.0-rc3 with following changes:
  * added squashfuse-0.1.105.tar.gz and 70.patch for the build of squashfuse_ll
    which will be removed as soon as the multithread patch is incoperated
  * Change squash mounts to prefer to use squashfuse_ll instead of squashfuse,
    if available, for improved performance. squashfuse_ll is not available
    in factory.
  * Also, for even better parallel performance, include a patched
    multithreaded version of squashfuse_ll in
  * Imply adding ${prefix}/libexec/apptainer/bin to the binary path in
    apptainer.conf, which is used for searching for helper executables. It is
    implied as the first directory of $PATH if present (which is at the
    beginning of binary path by default) or just as the first directory if
    $PATH is not included in binary path.
    ${prefix}/libexec/apptainer/bin.
  * Add --unsquash action flag to temporarily convert a SIF file to a sandbox
    before running. In previous versions this was the default when running a
    SIF file without setuid or with fakeroot, but now the default is to instead
    mount with squashfuse.
  * Add --sparse flag to overlay create command to allow generation of a sparse
    ext3 overlay image.
  * Support for a custom hashbang in the %test section of an Apptainer recipe
    (akin to the runscript and start sections).
  * When using fakeroot in setuid mode, have the image drivers first enter the
    the container's user namespace to avoid write errors with overlays.
  * Skip trying to use kernel overlayfs when using writable overlay and the
    lower layer is FUSE, because of a kernel bug introduced in kernel 5.15.
  * Add additional hidden options to the action command for testing different
    fakeroot modes with --fakeroot: --ignore-subuid, --ignore-fakeroot-command,
    and --ignore-userns.

OBS-URL: https://build.opensuse.org/request/show/1003468
OBS-URL: https://build.opensuse.org/package/show/network:cluster/apptainer?expand=0&rev=19
2022-09-14 08:31:59 +00:00

1785 lines
53 KiB
Diff

From 2ff837b814e84eedc25574014da9ed24ee44b7b4 Mon Sep 17 00:00:00 2001
From: Kevin Vigor <kvigor@gmail.com>
Date: Thu, 6 Feb 2020 09:59:56 -0800
Subject: [PATCH 1/3] Add locking to cache API in anticipation of multithreaded
decompression.
The existing cache API has effectively no locking and relies on the single-
threaded nature of the code to prevent contention on cache entries.
As a first step to multithreading the driver, refactor the cache API to allow
alternative implementations. In this changeset the API is changed but the
internal cache implementation is not make thread-safe, so it still suitable
only for single-threaded usage.
Specific changes:
* sqfs_cache type is made opaque.
* previously, newly allocated cache entries were assumed valid. This meant that
on any failure path following a cache entry allocation, one had to be careful
to call sqfs_cache_invalidate(). The assumption is now reversed, cache
entries are invalid until explicitly marked valid with
sqfs_cache_entry_valid(). This simplifies error handling.
* block cache code was intermixed with generic cache code, relocate to fs.c
* cache eviction led to object destruction in block cache. The only thing
preventing cache eviction while block object in use is single-threaded
nature of code. Instead use a refcounting mechanism on the block entries
so that we can independently manage block lifetime.
---
Makefile.am | 6 +-
cache.c | 142 +++++++++++++++++++++++++++++-----------------
cache.h | 40 ++++++-------
common.h | 26 ++++++++-
file.c | 23 ++++----
file.h | 4 --
fs.c | 61 ++++++++++++++++----
fs.h | 5 ++
table.c | 2 +-
tests/cachetest.c | 107 ++++++++++++++++++++++++++++++++++
10 files changed, 314 insertions(+), 102 deletions(-)
create mode 100644 tests/cachetest.c
diff --git a/Makefile.am b/Makefile.am
index 5659cd22..eaf7ac97 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -105,9 +105,11 @@ endif
TESTS =
if SQ_FUSE_TESTS
TESTS += tests/ll-smoke.sh
-check_PROGRAMS = endiantest
+check_PROGRAMS = cachetest endiantest
+cachetest_SOURCES=tests/cachetest.c
+cachetest_LDADD=libsquashfuse.la $(COMPRESSION_LIBS)
endiantest_SOURCES = tests/endiantest.c
-TESTS += endiantest
+TESTS += cachetest endiantest
endif
if SQ_DEMO_TESTS
TESTS += tests/ls.sh
diff --git a/cache.c b/cache.c
index 0deacfca..36d02234 100644
--- a/cache.c
+++ b/cache.c
@@ -22,85 +22,121 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
+#include "config.h"
#include "cache.h"
#include "fs.h"
+#include <assert.h>
#include <stdlib.h>
+typedef struct sqfs_cache_internal {
+ uint8_t *buf;
+
+ sqfs_cache_dispose dispose;
+
+ size_t size, count;
+ size_t next; /* next block to evict */
+} sqfs_cache_internal;
+
+typedef struct {
+ int valid;
+ sqfs_cache_idx idx;
+} sqfs_cache_entry_hdr;
+
sqfs_err sqfs_cache_init(sqfs_cache *cache, size_t size, size_t count,
- sqfs_cache_dispose dispose) {
- cache->size = size;
- cache->count = count;
- cache->dispose = dispose;
- cache->next = 0;
-
- cache->idxs = calloc(count, sizeof(sqfs_cache_idx));
- cache->buf = calloc(count, size);
- if (cache->idxs && cache->buf)
+ sqfs_cache_dispose dispose) {
+
+ sqfs_cache_internal *c = malloc(sizeof(sqfs_cache_internal));
+ if (!c) {
+ return SQFS_ERR;
+ }
+
+ c->size = size + sizeof(sqfs_cache_entry_hdr);
+ c->count = count;
+ c->dispose = dispose;
+ c->next = 0;
+
+ c->buf = calloc(count, c->size);
+
+ if (c->buf) {
+ *cache = c;
return SQFS_OK;
-
- sqfs_cache_destroy(cache);
+ }
+
+ sqfs_cache_destroy(&c);
return SQFS_ERR;
}
-static void *sqfs_cache_entry(sqfs_cache *cache, size_t i) {
- return cache->buf + i * cache->size;
+static sqfs_cache_entry_hdr *sqfs_cache_entry_header(
+ sqfs_cache_internal* cache,
+ size_t i) {
+ return (sqfs_cache_entry_hdr *)(cache->buf + i * cache->size);
+}
+
+static void* sqfs_cache_entry(sqfs_cache_internal* cache, size_t i) {
+ return (void *)(sqfs_cache_entry_header(cache, i) + 1);
}
void sqfs_cache_destroy(sqfs_cache *cache) {
- if (cache->buf && cache->idxs) {
- size_t i;
- for (i = 0; i < cache->count; ++i) {
- if (cache->idxs[i] != SQFS_CACHE_IDX_INVALID)
- cache->dispose(sqfs_cache_entry(cache, i));
+ if (cache && *cache) {
+ sqfs_cache_internal *c = *cache;
+ if (c->buf) {
+ size_t i;
+ for (i = 0; i < c->count; ++i) {
+ sqfs_cache_entry_hdr *hdr =
+ sqfs_cache_entry_header(c, i);
+ if (hdr->valid) {
+ c->dispose((void *)(hdr + 1));
+ }
+ }
}
+ free(c->buf);
+ free(c);
+ *cache = NULL;
}
- free(cache->buf);
- free(cache->idxs);
}
void *sqfs_cache_get(sqfs_cache *cache, sqfs_cache_idx idx) {
size_t i;
- for (i = 0; i < cache->count; ++i) {
- if (cache->idxs[i] == idx)
- return sqfs_cache_entry(cache, i);
+ sqfs_cache_internal *c = *cache;
+ sqfs_cache_entry_hdr *hdr;
+
+ for (i = 0; i < c->count; ++i) {
+ hdr = sqfs_cache_entry_header(c, i);
+ if (hdr->idx == idx) {
+ assert(hdr->valid);
+ return sqfs_cache_entry(c, i);
+ }
}
- return NULL;
-}
-void *sqfs_cache_add(sqfs_cache *cache, sqfs_cache_idx idx) {
- size_t i = (cache->next++);
- cache->next %= cache->count;
-
- if (cache->idxs[i] != SQFS_CACHE_IDX_INVALID)
- cache->dispose(sqfs_cache_entry(cache, i));
-
- cache->idxs[i] = idx;
- return sqfs_cache_entry(cache, i);
-}
+ /* No existing entry; free one if necessary, allocate a new one. */
+ i = (c->next++);
+ c->next %= c->count;
-/* sqfs_cache_add can be called but the caller can fail to fill it (IO
- * error, etc). sqfs_cache_invalidate invalidates the cache entry.
- * It does not call dispose; it merely marks the entry as reusable
- * since it is never fully initialized.
- */
-void sqfs_cache_invalidate(sqfs_cache *cache, sqfs_cache_idx idx) {
- size_t i;
- for (i = 0; i < cache->count; ++i) {
- if (cache->idxs[i] == idx) {
- cache->idxs[i] = SQFS_CACHE_IDX_INVALID;
- return;
- }
+ hdr = sqfs_cache_entry_header(c, i);
+ if (hdr->valid) {
+ /* evict */
+ c->dispose((void *)(hdr + 1));
+ hdr->valid = 0;
}
+
+ hdr->idx = idx;
+ return (void *)(hdr + 1);
+}
+
+int sqfs_cache_entry_valid(const sqfs_cache *cache, const void *e) {
+ sqfs_cache_entry_hdr *hdr = ((sqfs_cache_entry_hdr *)e) - 1;
+ return hdr->valid;
}
-static void sqfs_block_cache_dispose(void *data) {
- sqfs_block_cache_entry *entry = (sqfs_block_cache_entry*)data;
- sqfs_block_dispose(entry->block);
+void sqfs_cache_entry_mark_valid(sqfs_cache *cache, void *e) {
+ sqfs_cache_entry_hdr *hdr = ((sqfs_cache_entry_hdr *)e) - 1;
+ assert(hdr->valid == 0);
+ hdr->valid = 1;
}
-sqfs_err sqfs_block_cache_init(sqfs_cache *cache, size_t count) {
- return sqfs_cache_init(cache, sizeof(sqfs_block_cache_entry), count,
- &sqfs_block_cache_dispose);
+void sqfs_cache_put(const sqfs_cache *cache, const void *e) {
+ // nada, we have no locking in single-threaded implementation.
}
diff --git a/cache.h b/cache.h
index b78c524d..da471352 100644
--- a/cache.h
+++ b/cache.h
@@ -33,35 +33,37 @@
* - No thread safety
* - Misses are caller's responsibility
*/
-#define SQFS_CACHE_IDX_INVALID 0
typedef uint64_t sqfs_cache_idx;
typedef void (*sqfs_cache_dispose)(void* data);
-typedef struct {
- sqfs_cache_idx *idxs;
- uint8_t *buf;
-
- sqfs_cache_dispose dispose;
-
- size_t size, count;
- size_t next; /* next block to evict */
-} sqfs_cache;
+struct sqfs_cache_internal;
+typedef struct sqfs_cache_internal *sqfs_cache;
sqfs_err sqfs_cache_init(sqfs_cache *cache, size_t size, size_t count,
sqfs_cache_dispose dispose);
void sqfs_cache_destroy(sqfs_cache *cache);
+/* Get an entry for the given index.
+ *
+ * This will always succeed (evicting if necessary). The caller must then
+ * call sqfs_cache_entry_valid() to determine if the entry is valid. If not
+ * valid, the entry is newly allocated and the caller is responsible for
+ * initializing it and then calling sqfs_cache_entry_mark_valid().
+ *
+ * This call may block in multithreaded case.
+ *
+ * In multithreaded case, the cache is locked on return (no entries can
+ * be added or removed). Caller must call sqfs_cache_put() when it is safe
+ * to evict the returned cache entry.
+ */
void *sqfs_cache_get(sqfs_cache *cache, sqfs_cache_idx idx);
-void *sqfs_cache_add(sqfs_cache *cache, sqfs_cache_idx idx);
-void sqfs_cache_invalidate(sqfs_cache *cache, sqfs_cache_idx idx);
-
-
-typedef struct {
- sqfs_block *block;
- size_t data_size;
-} sqfs_block_cache_entry;
+/* inform cache it is now safe to evict this entry. */
+void sqfs_cache_put(const sqfs_cache *cache, const void *e);
-sqfs_err sqfs_block_cache_init(sqfs_cache *cache, size_t count);
+/* Determine if cache entry contains valid contents. */
+int sqfs_cache_entry_valid(const sqfs_cache *cache, const void *e);
+/* Mark cache entry as containing valid contents. */
+void sqfs_cache_entry_mark_valid(sqfs_cache *cache, void *e);
#endif
diff --git a/common.h b/common.h
index aeac5c67..9d50e006 100644
--- a/common.h
+++ b/common.h
@@ -32,12 +32,23 @@
#include <sys/types.h>
#ifdef _WIN32
- #include <win32.h>
+# include <win32.h>
+# include <intrin.h>
+# define atomic_inc_relaxed(ptr) \
+ _InterlockedIncrement(ptr)
+# define atomic_dec_acqrel(ptr) \
+ _InterlockedDecrement(ptr)
#else
typedef mode_t sqfs_mode_t;
typedef uid_t sqfs_id_t;
typedef off_t sqfs_off_t;
typedef int sqfs_fd_t;
+
+# define atomic_inc_relaxed(ptr) \
+ __atomic_add_fetch(&block->refcount, 1, __ATOMIC_RELAXED)
+# define atomic_dec_acqrel(ptr) \
+ __atomic_sub_fetch(&block->refcount, 1, __ATOMIC_ACQ_REL)
+
#endif
typedef enum {
@@ -59,6 +70,7 @@ typedef struct sqfs_inode sqfs_inode;
typedef struct {
size_t size;
void *data;
+ long refcount;
} sqfs_block;
typedef struct {
@@ -66,4 +78,16 @@ typedef struct {
size_t offset;
} sqfs_md_cursor;
+/* Increment the refcount on the block. */
+static inline void sqfs_block_ref(sqfs_block *block) {
+ atomic_inc_relaxed(&block->refcount);
+}
+
+/* decrement the refcount on the block, return non-zero if we held the last
+ * reference.
+ */
+static inline int sqfs_block_deref(sqfs_block *block) {
+ return atomic_dec_acqrel(&block->refcount) == 0;
+}
+
#endif
diff --git a/file.c b/file.c
index a4d894eb..d09f2a7d 100644
--- a/file.c
+++ b/file.c
@@ -177,7 +177,7 @@ sqfs_err sqfs_read_range(sqfs *fs, sqfs_inode *inode, sqfs_off_t start,
take = (size_t)(*size);
if (block) {
memcpy(buf, (char*)block->data + data_off + read_off, take);
- /* BLOCK CACHED, DON'T DISPOSE */
+ sqfs_block_dispose(block);
} else {
memset(buf, 0, take);
}
@@ -226,7 +226,7 @@ sqfs_err sqfs_blockidx_init(sqfs_cache *cache) {
}
sqfs_err sqfs_blockidx_add(sqfs *fs, sqfs_inode *inode,
- sqfs_blockidx_entry **out) {
+ sqfs_blockidx_entry **out, sqfs_blockidx_entry **cachep) {
size_t blocks; /* Number of blocks in the file */
size_t md_size; /* Amount of metadata necessary to hold the blocksizes */
size_t count; /* Number of block-index entries necessary */
@@ -234,10 +234,6 @@ sqfs_err sqfs_blockidx_add(sqfs *fs, sqfs_inode *inode,
sqfs_blockidx_entry *blockidx;
sqfs_blocklist bl;
- /* For the cache */
- sqfs_cache_idx idx;
- sqfs_blockidx_entry **cachep;
-
size_t i = 0;
bool first = true;
@@ -270,8 +266,6 @@ sqfs_err sqfs_blockidx_add(sqfs *fs, sqfs_inode *inode,
}
}
- idx = inode->base.inode_number + 1; /* zero means invalid */
- cachep = sqfs_cache_add(&fs->blockidx, idx);
*out = *cachep = blockidx;
return SQFS_OK;
}
@@ -299,12 +293,16 @@ sqfs_err sqfs_blockidx_blocklist(sqfs *fs, sqfs_inode *inode,
/* Get the index, creating it if necessary */
idx = inode->base.inode_number + 1; /* zero means invalid index */
- if ((bp = sqfs_cache_get(&fs->blockidx, idx))) {
+ bp = sqfs_cache_get(&fs->blockidx, idx);
+ if (sqfs_cache_entry_valid(&fs->blockidx, bp)) {
blockidx = *bp;
} else {
- sqfs_err err = sqfs_blockidx_add(fs, inode, &blockidx);
- if (err)
+ sqfs_err err = sqfs_blockidx_add(fs, inode, &blockidx, bp);
+ if (err) {
+ sqfs_cache_put(&fs->blockidx, bp);
return err;
+ }
+ sqfs_cache_entry_mark_valid(&fs->blockidx, bp);
}
skipped = (metablock * SQUASHFS_METADATA_SIZE / sizeof(sqfs_blocklist_entry))
@@ -316,6 +314,9 @@ sqfs_err sqfs_blockidx_blocklist(sqfs *fs, sqfs_inode *inode,
bl->remain -= skipped;
bl->pos = (uint64_t)skipped * fs->sb.block_size;
bl->block = blockidx->data_block;
+
+ sqfs_cache_put(&fs->blockidx, bp);
+
return SQFS_OK;
}
diff --git a/file.h b/file.h
index 249c6413..e3d2b028 100644
--- a/file.h
+++ b/file.h
@@ -71,10 +71,6 @@ typedef struct {
sqfs_err sqfs_blockidx_init(sqfs_cache *cache);
-/* Fill *out with all the block-index entries for this file */
-sqfs_err sqfs_blockidx_add(sqfs *fs, sqfs_inode *inode,
- sqfs_blockidx_entry **out);
-
/* Get a blocklist fast-forwarded to the correct location */
sqfs_err sqfs_blockidx_blocklist(sqfs *fs, sqfs_inode *inode,
sqfs_blocklist *bl, sqfs_off_t start);
diff --git a/fs.c b/fs.c
index d69bb681..1838c5ca 100644
--- a/fs.c
+++ b/fs.c
@@ -124,6 +124,8 @@ sqfs_err sqfs_block_read(sqfs *fs, sqfs_off_t pos, bool compressed,
sqfs_err err = SQFS_ERR;
if (!(*block = malloc(sizeof(**block))))
return SQFS_ERR;
+ /* start with refcount one, so dispose on failure path works as expected. */
+ (*block)->refcount = 1;
if (!((*block)->data = malloc(size)))
goto error;
@@ -188,44 +190,81 @@ sqfs_err sqfs_data_block_read(sqfs *fs, sqfs_off_t pos, uint32_t hdr,
}
sqfs_err sqfs_md_cache(sqfs *fs, sqfs_off_t *pos, sqfs_block **block) {
- sqfs_block_cache_entry *entry = sqfs_cache_get(
- &fs->md_cache, *pos);
- if (!entry) {
+ sqfs_block_cache_entry *entry = sqfs_cache_get(&fs->md_cache, *pos);
+ if (!sqfs_cache_entry_valid(&fs->md_cache, entry)) {
sqfs_err err = SQFS_OK;
- entry = sqfs_cache_add(&fs->md_cache, *pos);
/* fprintf(stderr, "MD BLOCK: %12llx\n", (long long)*pos); */
err = sqfs_md_block_read(fs, *pos,
&entry->data_size, &entry->block);
if (err) {
- sqfs_cache_invalidate(&fs->md_cache, *pos);
+ sqfs_cache_put(&fs->md_cache, entry);
return err;
}
+ sqfs_cache_entry_mark_valid(&fs->md_cache, entry);
}
+ /* block is created with refcount 1, which accounts for presence in the
+ * cache (will be decremented on eviction).
+ *
+ * We increment it here as a convienience for the caller, who will
+ * obviously want one. Therefore all callers must eventually call deref
+ * by means of calling sqfs_block_dispose().
+ */
*block = entry->block;
*pos += entry->data_size;
+
+ sqfs_block_ref(entry->block);
+ /* it is now safe to evict the entry from the cache, we have a
+ * reference to the block so eviction will not destroy it.
+ */
+ sqfs_cache_put(&fs->md_cache, entry);
+
return SQFS_OK;
}
sqfs_err sqfs_data_cache(sqfs *fs, sqfs_cache *cache, sqfs_off_t pos,
uint32_t hdr, sqfs_block **block) {
sqfs_block_cache_entry *entry = sqfs_cache_get(cache, pos);
- if (!entry) {
+ if (!sqfs_cache_entry_valid(cache, entry)) {
sqfs_err err = SQFS_OK;
- entry = sqfs_cache_add(cache, pos);
err = sqfs_data_block_read(fs, pos, hdr,
&entry->block);
if (err) {
- sqfs_cache_invalidate(cache, pos);
+ sqfs_cache_put(cache, entry);
return err;
}
+ sqfs_cache_entry_mark_valid(cache, entry);
}
+ /* block is created with refcount 1, which accounts for presence in the
+ * cache (will be decremented on eviction).
+ *
+ * We increment it here as a convenience for the caller, who will
+ * obviously want one. Therefore all callers must eventually call deref
+ * by means of calling sqfs_block_dispose().
+ */
*block = entry->block;
+ sqfs_block_ref(*block);
+ /* it is now safe to evict the entry from the cache, we have a
+ * reference to the block so eviction will not destroy it.
+ */
+ sqfs_cache_put(cache, entry);
return SQFS_OK;
}
void sqfs_block_dispose(sqfs_block *block) {
- free(block->data);
- free(block);
+ if (sqfs_block_deref(block)) {
+ free(block->data);
+ free(block);
+ }
+}
+
+static void sqfs_block_cache_dispose(void *data) {
+ sqfs_block_cache_entry *entry = (sqfs_block_cache_entry*)data;
+ sqfs_block_dispose(entry->block);
+}
+
+sqfs_err sqfs_block_cache_init(sqfs_cache *cache, size_t count) {
+ return sqfs_cache_init(cache, sizeof(sqfs_block_cache_entry), count,
+ &sqfs_block_cache_dispose);
}
void sqfs_md_cursor_inode(sqfs_md_cursor *cur, sqfs_inode_id id, sqfs_off_t base) {
@@ -247,7 +286,6 @@ sqfs_err sqfs_md_read(sqfs *fs, sqfs_md_cursor *cur, void *buf, size_t size) {
take = size;
if (buf)
memcpy(buf, (char*)block->data + cur->offset, take);
- /* BLOCK CACHED, DON'T DISPOSE */
if (buf)
buf = (char*)buf + take;
@@ -257,6 +295,7 @@ sqfs_err sqfs_md_read(sqfs *fs, sqfs_md_cursor *cur, void *buf, size_t size) {
cur->block = pos;
cur->offset = 0;
}
+ sqfs_block_dispose(block);
}
return SQFS_OK;
}
diff --git a/fs.h b/fs.h
index d300a3bb..1d475ce0 100644
--- a/fs.h
+++ b/fs.h
@@ -97,6 +97,11 @@ sqfs_compression_type sqfs_compression(sqfs *fs);
void sqfs_md_header(uint16_t hdr, bool *compressed, uint16_t *size);
void sqfs_data_header(uint32_t hdr, bool *compressed, uint32_t *size);
+typedef struct {
+ sqfs_block *block;
+ size_t data_size;
+} sqfs_block_cache_entry;
+sqfs_err sqfs_block_cache_init(sqfs_cache *cache, size_t count);
sqfs_err sqfs_block_read(sqfs *fs, sqfs_off_t pos, bool compressed, uint32_t size,
size_t outsize, sqfs_block **block);
void sqfs_block_dispose(sqfs_block *block);
diff --git a/table.c b/table.c
index c035398f..02a5442c 100644
--- a/table.c
+++ b/table.c
@@ -76,6 +76,6 @@ sqfs_err sqfs_table_get(sqfs_table *table, sqfs *fs, size_t idx, void *buf) {
return SQFS_ERR;
memcpy(buf, (char*)(block->data) + off, table->each);
- /* BLOCK CACHED, DON'T DISPOSE */
+ sqfs_block_dispose(block);
return SQFS_OK;
}
diff --git a/tests/cachetest.c b/tests/cachetest.c
new file mode 100644
index 00000000..8a2c2363
--- /dev/null
+++ b/tests/cachetest.c
@@ -0,0 +1,107 @@
+#include "cache.h"
+#include <stdio.h>
+
+typedef struct {
+ int x;
+ int y;
+} TestStruct;
+
+static void TestStructDispose(void *t) {
+ // nada.
+}
+
+#define EXPECT_EQ(exp1, exp2) \
+ do { if ((exp1) != (exp2)) { \
+ printf("Test failure: expected " #exp1 " to equal " #exp2 \
+ " at " __FILE__ ":%d\n", __LINE__); \
+ ++errors; \
+ } \
+ } while (0)
+
+#define EXPECT_NE(exp1, exp2) \
+ do { if ((exp1) == (exp2)) { \
+ printf("Test failure: expected " #exp1 " to !equal " #exp2 \
+ " at " __FILE__ ":%d\n", __LINE__); \
+ ++errors; \
+ } \
+ } while (0)
+
+
+int test_cache_miss(void) {
+ int errors = 0;
+ sqfs_cache cache;
+ TestStruct *entry;
+
+ EXPECT_EQ(sqfs_cache_init(&cache, sizeof(TestStruct), 16,
+ TestStructDispose), SQFS_OK);
+ entry = (TestStruct *)sqfs_cache_get(&cache, 1);
+ EXPECT_EQ(sqfs_cache_entry_valid(&cache, entry), 0);
+ sqfs_cache_destroy(&cache);
+
+ return errors == 0;
+}
+
+int test_mark_valid_and_lookup(void) {
+ int errors = 0;
+ sqfs_cache cache;
+ TestStruct *entry;
+
+ EXPECT_EQ(sqfs_cache_init(&cache, sizeof(TestStruct), 16,
+ TestStructDispose), SQFS_OK);
+ entry = (TestStruct *)sqfs_cache_get(&cache, 1);
+ entry->x = 666;
+ entry->y = 777;
+ sqfs_cache_entry_mark_valid(&cache, entry);
+ sqfs_cache_put(&cache, entry);
+ EXPECT_NE(sqfs_cache_entry_valid(&cache, entry), 0);
+ entry = (TestStruct *)sqfs_cache_get(&cache, 1);
+ EXPECT_NE(sqfs_cache_entry_valid(&cache, entry), 0);
+ EXPECT_EQ(entry->x, 666);
+ EXPECT_EQ(entry->y, 777);
+ sqfs_cache_put(&cache, entry);
+
+ sqfs_cache_destroy(&cache);
+ return errors == 0;
+}
+
+int test_two_entries(void) {
+ int errors = 0;
+ sqfs_cache cache;
+ TestStruct *entry1, *entry2;
+
+ EXPECT_EQ(sqfs_cache_init(&cache, sizeof(TestStruct), 16,
+ TestStructDispose), SQFS_OK);
+
+ entry1 = (TestStruct *)sqfs_cache_get(&cache, 1);
+ entry1->x = 1;
+ entry1->y = 2;
+ sqfs_cache_entry_mark_valid(&cache, entry1);
+ sqfs_cache_put(&cache, entry1);
+
+ entry2 = (TestStruct *)sqfs_cache_get(&cache, 666);
+ entry2->x = 3;
+ entry2->y = 4;
+ sqfs_cache_entry_mark_valid(&cache, entry2);
+ sqfs_cache_put(&cache, entry2);
+
+ entry1 = (TestStruct *)sqfs_cache_get(&cache, 1);
+ sqfs_cache_put(&cache, entry1);
+ entry2 = (TestStruct *)sqfs_cache_get(&cache, 666);
+ sqfs_cache_put(&cache, entry2);
+ EXPECT_NE(sqfs_cache_entry_valid(&cache, entry1), 0);
+ EXPECT_NE(sqfs_cache_entry_valid(&cache, entry2), 0);
+ EXPECT_EQ(entry1->x, 1);
+ EXPECT_EQ(entry1->y, 2);
+ EXPECT_EQ(entry2->x, 3);
+ EXPECT_EQ(entry2->y, 4);
+
+ sqfs_cache_destroy(&cache);
+
+ return errors == 0;
+}
+
+int main(void) {
+ return test_cache_miss() &&
+ test_mark_valid_and_lookup() &&
+ test_two_entries() ? 0 : 1;
+}
From 379c8507c15ef43b641c1024b43372f2de9fb480 Mon Sep 17 00:00:00 2001
From: Kevin Vigor <kvigor@gmail.com>
Date: Thu, 6 Feb 2020 12:20:19 -0800
Subject: [PATCH 2/3] Implement multi-threaded squashfuse_ll, allowing parallel
decompression.
A simple thread-safe cache implementation is added and squashfuse_ll init
is altered to use fuse_session_loop_mt().
Multithreading must be explicitly enabled at configure time with the
--enable-multithreading option. If enabled, the resulting squashfuse_ll
will be multithreaded by default, but this may be disabled at runtime
with the '-s' FUSE commandline option.
---
Makefile.am | 8 +-
cache.c | 4 +
cache_mt.c | 169 +++++++++++++++++++++++++++++++
configure.ac | 11 +-
fs.c | 9 +-
ll.c | 77 ++++++++++----
ll_main.c | 20 +++-
m4/squashfuse_c.m4 | 33 +-----
squashfs_fs.h | 6 +-
tests/cachetest.c | 1 +
tests/ll-smoke-singlethreaded.sh | 10 ++
tests/ll-smoke.sh | 141 ++++++++++++++++++++++++++
tests/ll-smoke.sh.in | 6 +-
13 files changed, 432 insertions(+), 63 deletions(-)
create mode 100644 cache_mt.c
create mode 100755 tests/ll-smoke-singlethreaded.sh
create mode 100755 tests/ll-smoke.sh
diff --git a/Makefile.am b/Makefile.am
index eaf7ac97..17b01be4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -26,7 +26,7 @@ pkgconfig_DATA = squashfuse.pc
noinst_LTLIBRARIES += libsquashfuse_convenience.la
libsquashfuse_convenience_la_SOURCES = swap.c cache.c table.c dir.c file.c fs.c \
decompress.c xattr.c hash.c stack.c traverse.c util.c \
- nonstd-pread.c nonstd-stat.c \
+ nonstd-pread.c nonstd-stat.c cache_mt.c \
squashfs_fs.h common.h nonstd-internal.h nonstd.h swap.h cache.h table.h \
dir.h file.h decompress.h xattr.h squashfuse.h hash.h stack.h traverse.h \
util.h fs.h
@@ -105,6 +105,12 @@ endif
TESTS =
if SQ_FUSE_TESTS
TESTS += tests/ll-smoke.sh
+if MULTITHREADED
+# I know this test looks backwards, but the default smoke test is multithreaded
+# when threading is enabled. So we additionally run a singlethreaded test in
+# that case.
+TESTS += tests/ll-smoke-singlethreaded.sh
+endif
check_PROGRAMS = cachetest endiantest
cachetest_SOURCES=tests/cachetest.c
cachetest_LDADD=libsquashfuse.la $(COMPRESSION_LIBS)
diff --git a/cache.c b/cache.c
index 36d02234..45408f24 100644
--- a/cache.c
+++ b/cache.c
@@ -24,6 +24,9 @@
*/
#include "config.h"
+
+#ifndef SQFS_MULTITHREADED
+
#include "cache.h"
#include "fs.h"
@@ -140,3 +143,4 @@ void sqfs_cache_entry_mark_valid(sqfs_cache *cache, void *e) {
void sqfs_cache_put(const sqfs_cache *cache, const void *e) {
// nada, we have no locking in single-threaded implementation.
}
+#endif /* SQFS_MULTITHREADED */
diff --git a/cache_mt.c b/cache_mt.c
new file mode 100644
index 00000000..1b17fa5a
--- /dev/null
+++ b/cache_mt.c
@@ -0,0 +1,169 @@
+#include "config.h"
+
+#ifdef SQFS_MULTITHREADED
+
+/* Thread-safe cache implementation.
+ *
+ * Simple implementation: basic hash table, each individual entry is
+ * protected by a mutex, any collision is handled by eviction.
+ */
+
+#include "cache.h"
+#include "fs.h"
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdlib.h>
+
+typedef struct sqfs_cache_internal {
+ uint8_t *buf;
+ sqfs_cache_dispose dispose;
+ size_t entry_size, count;
+} sqfs_cache_internal;
+
+typedef struct {
+ enum { EMPTY, FULL } state;
+ sqfs_cache_idx idx;
+ pthread_mutex_t lock;
+} sqfs_cache_entry_hdr;
+
+// MurmurHash64A performance-optimized for hash of uint64_t keys
+const static uint64_t kMurmur2Seed = 4193360111ul;
+static uint64_t MurmurRehash64A(uint64_t key) {
+ const uint64_t m = 0xc6a4a7935bd1e995;
+ const int r = 47;
+
+ uint64_t h = (uint64_t)kMurmur2Seed ^ (sizeof(uint64_t) * m);
+
+ key *= m;
+ key ^= key >> r;
+ key *= m;
+
+ h ^= key;
+ h *= m;
+
+ h ^= h >> r;
+ h *= m;
+ h ^= h >> r;
+
+ return h;
+}
+
+static sqfs_cache_entry_hdr *sqfs_cache_entry_header(
+ sqfs_cache_internal* cache,
+ size_t i) {
+ assert(i < cache->count);
+ return (sqfs_cache_entry_hdr *)(cache->buf + i * cache->entry_size);
+}
+
+sqfs_err sqfs_cache_init(sqfs_cache *cache, size_t entry_size, size_t count,
+ sqfs_cache_dispose dispose) {
+ size_t i;
+ pthread_mutexattr_t attr;
+ sqfs_cache_internal *c = malloc(sizeof(sqfs_cache_internal));
+
+ if (!c) {
+ return SQFS_ERR;
+ }
+
+ c->entry_size = entry_size + sizeof(sqfs_cache_entry_hdr);
+ c->count = count;
+ c->dispose = dispose;
+
+ pthread_mutexattr_init(&attr);
+#if defined(_GNU_SOURCE) && !defined(NDEBUG)
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
+#endif
+
+ c->buf = calloc(c->count, c->entry_size);
+ if (!c->buf) {
+ goto err_out;
+ }
+
+ for (i = 0; i < c->count; ++i) {
+ sqfs_cache_entry_hdr *hdr = sqfs_cache_entry_header(c, i);
+ hdr->state = EMPTY;
+ if (pthread_mutex_init(&hdr->lock, &attr)) {
+ goto err_out;
+ }
+ }
+
+ pthread_mutexattr_destroy(&attr);
+
+ *cache = c;
+ return SQFS_OK;
+
+err_out:
+ sqfs_cache_destroy(&c);
+ return SQFS_ERR;
+}
+
+void sqfs_cache_destroy(sqfs_cache *cache) {
+ if (cache && *cache) {
+ sqfs_cache_internal *c = *cache;
+ if (c->buf) {
+ size_t i;
+ for (i = 0; i < c->count; ++i) {
+ sqfs_cache_entry_hdr *hdr =
+ sqfs_cache_entry_header(c, i);
+ if (hdr->state == FULL) {
+ c->dispose((void *)(hdr + 1));
+ }
+ if (pthread_mutex_destroy(&hdr->lock)) {
+ assert(0);
+ }
+ }
+ }
+ free(c->buf);
+ free(c);
+ *cache = NULL;
+ }
+}
+
+void *sqfs_cache_get(sqfs_cache *cache, sqfs_cache_idx idx) {
+ sqfs_cache_internal *c = *cache;
+ sqfs_cache_entry_hdr *hdr;
+ void *entry;
+
+ uint64_t key = MurmurRehash64A(idx) % c->count;
+
+ hdr = sqfs_cache_entry_header(c, key);
+ if (pthread_mutex_lock(&hdr->lock)) { assert(0); }
+ /* matching unlock is in sqfs_cache_put() */
+ entry = (void *)(hdr + 1);
+
+ if (hdr->state == EMPTY) {
+ hdr->idx = idx;
+ return entry;
+ }
+
+ /* There's a valid entry: it's either a cache hit or a collision. */
+ assert(hdr->state == FULL);
+ if (hdr->idx == idx) {
+ return entry;
+ }
+
+ /* Collision. */
+ c->dispose((void *)(hdr + 1));
+ hdr->state = EMPTY;
+ hdr->idx = idx;
+ return entry;
+}
+
+int sqfs_cache_entry_valid(const sqfs_cache *cache, const void *e) {
+ sqfs_cache_entry_hdr *hdr = ((sqfs_cache_entry_hdr *)e) - 1;
+ return hdr->state == FULL;
+}
+
+void sqfs_cache_entry_mark_valid(sqfs_cache *cache, void *e) {
+ sqfs_cache_entry_hdr *hdr = ((sqfs_cache_entry_hdr *)e) - 1;
+ assert(hdr->state == EMPTY);
+ hdr->state = FULL;
+}
+
+void sqfs_cache_put(const sqfs_cache *cache, const void *e) {
+ sqfs_cache_entry_hdr *hdr = ((sqfs_cache_entry_hdr *)e) - 1;
+ if (pthread_mutex_unlock(&hdr->lock)) { assert(0); }
+}
+
+#endif /* SQFS_MULTITHREADED */
diff --git a/configure.ac b/configure.ac
index 762766e9..3869075a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -10,6 +10,7 @@ AH_BOTTOM([#endif])
AC_CANONICAL_BUILD
AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE([foreign -Wall subdir-objects])
+AC_USE_SYSTEM_EXTENSIONS
AM_SILENT_RULES(yes)
AM_PROG_AR
LT_INIT
@@ -23,10 +24,8 @@ AC_PROG_SED
AC_PROG_CPP
AC_SYS_LARGEFILE
AM_PROG_CC_C_O
-SQ_PROG_CPP_POSIX_2001
SQ_PROG_CC_WALL
-AC_DEFINE([_POSIX_C_SOURCE], [200112L], [POSIX 2001 compatibility])
# Non-POSIX declarations
SQ_CHECK_DECL_MAKEDEV
@@ -97,6 +96,14 @@ AC_CONFIG_FILES([tests/ll-smoke.sh],[chmod +x tests/ll-smoke.sh])
AS_IF([test "x$sq_high_level$sq_low_level$sq_demo" = xnonono],
AC_MSG_FAILURE([Nothing left to build]))
+AC_ARG_ENABLE([multithreading],
+ AS_HELP_STRING([--enable-multithreading], [enable multi-threaded low-level FUSE driver]),
+ [
+ AC_CHECK_LIB([pthread], [pthread_mutex_lock], [], AC_MSG_ERROR([libpthread is required for multithreaded build]))
+ AC_DEFINE(SQFS_MULTITHREADED, 1, [Enable multi-threaded low-level FUSE driver])
+ ])
+AM_CONDITIONAL([MULTITHREADED], [test x$enable_multithreading = xyes])
+
AC_SUBST([sq_decompressors])
AC_SUBST([sq_high_level])
AC_SUBST([sq_low_level])
diff --git a/fs.c b/fs.c
index 1838c5ca..ab854b0f 100644
--- a/fs.c
+++ b/fs.c
@@ -34,8 +34,13 @@
#include <sys/stat.h>
-#define DATA_CACHED_BLKS 1
-#define FRAG_CACHED_BLKS 3
+#ifdef SQFS_MULTITHREADED
+# define DATA_CACHED_BLKS 48
+# define FRAG_CACHED_BLKS 48
+#else
+# define DATA_CACHED_BLKS 1
+# define FRAG_CACHED_BLKS 3
+#endif
void sqfs_version_supported(int *min_major, int *min_minor, int *max_major,
int *max_minor) {
diff --git a/ll.c b/ll.c
index 4d17ba5b..596c8bf1 100644
--- a/ll.c
+++ b/ll.c
@@ -52,11 +52,49 @@ static sig_atomic_t open_refcount = 0;
/* same as lib/fuse_signals.c */
static struct fuse_session *fuse_instance = NULL;
+static void update_access_time(void) {
+#ifdef SQFS_MULTITHREADED
+ /* We only need to track access time if we have an idle timeout,
+ * don't bother with expensive operations if idle_timeout is 0.
+ */
+ if (idle_timeout_secs) {
+ time_t now = time(NULL);
+ __atomic_store_n(&last_access, now, __ATOMIC_RELEASE);
+ }
+#else
+ last_access = time(NULL);
+#endif
+}
+
+static void update_open_refcount(int delta) {
+#ifdef SQFS_MULTITHREADED
+ __atomic_fetch_add(&open_refcount, delta, __ATOMIC_RELEASE);
+#else
+ open_refcount += delta;
+#endif
+}
+
+static inline time_t get_access_time(void) {
+#ifdef SQFS_MULTITHREADED
+ return __atomic_load_n(&last_access, __ATOMIC_ACQUIRE);
+#else
+ return last_access;
+#endif
+}
+
+static inline sig_atomic_t get_open_refcount(void) {
+#ifdef SQFS_MULTITHREADED
+ return __atomic_load_n(&open_refcount, __ATOMIC_ACQUIRE);
+#else
+ return open_refcount;
+#endif
+}
+
void sqfs_ll_op_getattr(fuse_req_t req, fuse_ino_t ino,
struct fuse_file_info *fi) {
sqfs_ll_i lli;
struct stat st;
- last_access = time(NULL);
+ update_access_time();
if (sqfs_ll_iget(req, &lli, ino))
return;
@@ -71,7 +109,7 @@ void sqfs_ll_op_getattr(fuse_req_t req, fuse_ino_t ino,
void sqfs_ll_op_opendir(fuse_req_t req, fuse_ino_t ino,
struct fuse_file_info *fi) {
sqfs_ll_i *lli;
- last_access = time(NULL);
+ update_access_time();
fi->fh = (intptr_t)NULL;
@@ -86,7 +124,7 @@ void sqfs_ll_op_opendir(fuse_req_t req, fuse_ino_t ino,
fuse_reply_err(req, ENOTDIR);
} else {
fi->fh = (intptr_t)lli;
- ++open_refcount;
+ update_open_refcount(1);
fuse_reply_open(req, fi);
return;
}
@@ -96,14 +134,14 @@ void sqfs_ll_op_opendir(fuse_req_t req, fuse_ino_t ino,
void sqfs_ll_op_create(fuse_req_t req, fuse_ino_t parent, const char *name,
mode_t mode, struct fuse_file_info *fi) {
- last_access = time(NULL);
+ update_access_time();
fuse_reply_err(req, EROFS);
}
void sqfs_ll_op_releasedir(fuse_req_t req, fuse_ino_t ino,
struct fuse_file_info *fi) {
- last_access = time(NULL);
- --open_refcount;
+ update_access_time();
+ update_open_refcount(-1);
free((sqfs_ll_i*)(intptr_t)fi->fh);
fuse_reply_err(req, 0); /* yes, this is necessary */
}
@@ -132,7 +170,7 @@ void sqfs_ll_op_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
sqfs_ll_i *lli = (sqfs_ll_i*)(intptr_t)fi->fh;
int err = 0;
- last_access = time(NULL);
+ update_access_time();
if (sqfs_dir_open(&lli->ll->fs, &lli->inode, &dir, off))
err = EINVAL;
if (!err && !(bufpos = buf = malloc(size)))
@@ -173,7 +211,7 @@ void sqfs_ll_op_lookup(fuse_req_t req, fuse_ino_t parent,
bool found;
sqfs_inode inode;
- last_access = time(NULL);
+ update_access_time();
if (sqfs_ll_iget(req, &lli, parent))
return;
@@ -223,7 +261,7 @@ void sqfs_ll_op_open(fuse_req_t req, fuse_ino_t ino,
sqfs_inode *inode;
sqfs_ll *ll;
- last_access = time(NULL);
+ update_access_time();
if (fi->flags & (O_WRONLY | O_RDWR)) {
fuse_reply_err(req, EROFS);
return;
@@ -243,7 +281,7 @@ void sqfs_ll_op_open(fuse_req_t req, fuse_ino_t ino,
} else {
fi->fh = (intptr_t)inode;
fi->keep_cache = 1;
- ++open_refcount;
+ update_open_refcount(1);
fuse_reply_open(req, fi);
return;
}
@@ -254,8 +292,8 @@ void sqfs_ll_op_release(fuse_req_t req, fuse_ino_t ino,
struct fuse_file_info *fi) {
free((sqfs_inode*)(intptr_t)fi->fh);
fi->fh = 0;
- last_access = time(NULL);
- --open_refcount;
+ update_access_time();
+ update_open_refcount(-1);
fuse_reply_err(req, 0);
}
@@ -272,7 +310,7 @@ void sqfs_ll_op_read(fuse_req_t req, fuse_ino_t ino,
return;
}
- last_access = time(NULL);
+ update_access_time();
osize = size;
err = sqfs_read_range(&ll->fs, inode, off, &osize, buf);
if (err) {
@@ -289,7 +327,7 @@ void sqfs_ll_op_readlink(fuse_req_t req, fuse_ino_t ino) {
char *dst;
size_t size;
sqfs_ll_i lli;
- last_access = time(NULL);
+ update_access_time();
if (sqfs_ll_iget(req, &lli, ino))
return;
@@ -313,7 +351,7 @@ void sqfs_ll_op_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) {
char *buf;
int ferr;
- last_access = time(NULL);
+ update_access_time();
if (sqfs_ll_iget(req, &lli, ino))
return;
@@ -351,7 +389,7 @@ void sqfs_ll_op_getxattr(fuse_req_t req, fuse_ino_t ino,
}
#endif
- last_access = time(NULL);
+ update_access_time();
if (sqfs_ll_iget(req, &lli, ino))
return;
@@ -373,7 +411,7 @@ void sqfs_ll_op_getxattr(fuse_req_t req, fuse_ino_t ino,
void sqfs_ll_op_forget(fuse_req_t req, fuse_ino_t ino,
unsigned long nlookup) {
sqfs_ll_i lli;
- last_access = time(NULL);
+ update_access_time();
sqfs_ll_iget(req, &lli, SQFS_FUSE_INODE_NONE);
lli.ll->ino_forget(lli.ll, ino, nlookup);
fuse_reply_none(req);
@@ -489,7 +527,8 @@ void alarm_tick(int sig) {
return;
}
- if (open_refcount == 0 && time(NULL) - last_access > idle_timeout_secs) {
+ if (get_open_refcount() == 0 &&
+ time(NULL) - get_access_time() > idle_timeout_secs) {
/* Safely shutting down fuse in a cross-platform way is a dark art!
But just about any platform should stop on SIGINT, so do that */
kill(getpid(), SIGINT);
@@ -499,8 +538,8 @@ void alarm_tick(int sig) {
}
void setup_idle_timeout(struct fuse_session *se, unsigned int timeout_secs) {
- last_access = time(NULL);
idle_timeout_secs = timeout_secs;
+ update_access_time();
struct sigaction sa;
memset(&sa, 0, sizeof(struct sigaction));
diff --git a/ll_main.c b/ll_main.c
index aca76935..22302085 100644
--- a/ll_main.c
+++ b/ll_main.c
@@ -142,8 +142,22 @@ int main(int argc, char *argv[]) {
if (opts.idle_timeout_secs) {
setup_idle_timeout(ch.session, opts.idle_timeout_secs);
}
- /* FIXME: multithreading */
- err = fuse_session_loop(ch.session);
+#ifdef SQFS_MULTITHREADED
+# if FUSE_USE_VERSION >= 30
+ if (!fuse_cmdline_opts.singlethread) {
+ struct fuse_loop_config config;
+ config.clone_fd = 1;
+ config.max_idle_threads = 10;
+ err = fuse_session_loop_mt(ch.session, &config);
+ }
+# else /* FUSE_USE_VERSION < 30 */
+ if (fuse_cmdline_opts.mt) {
+ err = fuse_session_loop_mt(ch.session);
+ }
+# endif /* FUSE_USE_VERSION */
+ else
+#endif
+ err = fuse_session_loop(ch.session);
teardown_idle_timeout();
fuse_remove_signal_handlers(ch.session);
}
@@ -157,4 +171,4 @@ int main(int argc, char *argv[]) {
free(fuse_cmdline_opts.mountpoint);
return -err;
-}
\ No newline at end of file
+}
diff --git a/m4/squashfuse_c.m4 b/m4/squashfuse_c.m4
index f29a90b1..c4039c42 100644
--- a/m4/squashfuse_c.m4
+++ b/m4/squashfuse_c.m4
@@ -21,37 +21,6 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# SQ_PROG_CPP_POSIX_2001
-#
-# Check if a preprocessor flag is needed for POSIX-2001 headers.
-# Needed at least on Solaris and derivatives.
-AC_DEFUN([SQ_PROG_CPP_POSIX_2001],[
-AC_CACHE_CHECK([for option for POSIX-2001 preprocessor],
- [sq_cv_prog_cpp_posix2001],
-[
- sq_cv_prog_cpp_posix2001=unknown
- sq_save_CPPFLAGS=$CPPFLAGS
- for sq_flags in none -std=gnu99 -xc99=all
- do
- AS_IF([test "x$sq_flags" = xnone],,
- [CPPFLAGS="$save_CPPFLAGS $sq_flags"])
- AC_PREPROC_IFELSE([AC_LANG_PROGRAM([
- #define _POSIX_C_SOURCE 200112L
- #include <sys/types.h>
- ])],[
- sq_cv_prog_cpp_posix2001=$sq_flags
- break
- ])
- done
- CPPFLAGS=$sq_save_CPPFLAGS
-])
-AS_IF([test "x$sq_cv_prog_cpp_posix2001" = xunknown],
- [AC_MSG_FAILURE([can't preprocess for POSIX-2001])],
- [AS_IF([test "x$sq_cv_prog_cpp_posix2001" = xnone],,
- CPPFLAGS="$CPPFLAGS $sq_cv_prog_cpp_posix2001")
-])
-])
-
# SQ_PROG_CC_WALL
#
# Check if -Wall is supported
@@ -67,4 +36,4 @@ AC_CACHE_CHECK([how to enable all compiler warnings],
])
AS_IF([test "x$sq_cv_prog_cc_wall" = xunknown],,
[AC_SUBST([AM_CFLAGS],["$AM_CFLAGS $sq_cv_prog_cc_wall"])])
-])
\ No newline at end of file
+])
diff --git a/squashfs_fs.h b/squashfs_fs.h
index e0ab1f4e..a85b7606 100644
--- a/squashfs_fs.h
+++ b/squashfs_fs.h
@@ -105,7 +105,11 @@
/* cached data constants for filesystem */
-#define SQUASHFS_CACHED_BLKS 8
+#ifdef SQFS_MULTITHREADED
+# define SQUASHFS_CACHED_BLKS 128
+#else
+# define SQUASHFS_CACHED_BLKS 8
+#endif
#define SQUASHFS_MAX_FILE_SIZE_LOG 64
diff --git a/tests/cachetest.c b/tests/cachetest.c
index 8a2c2363..c515fcdf 100644
--- a/tests/cachetest.c
+++ b/tests/cachetest.c
@@ -36,6 +36,7 @@ int test_cache_miss(void) {
TestStructDispose), SQFS_OK);
entry = (TestStruct *)sqfs_cache_get(&cache, 1);
EXPECT_EQ(sqfs_cache_entry_valid(&cache, entry), 0);
+ sqfs_cache_put(&cache, entry);
sqfs_cache_destroy(&cache);
return errors == 0;
diff --git a/tests/ll-smoke-singlethreaded.sh b/tests/ll-smoke-singlethreaded.sh
new file mode 100755
index 00000000..c7cbfc38
--- /dev/null
+++ b/tests/ll-smoke-singlethreaded.sh
@@ -0,0 +1,10 @@
+!/bin/bash
+
+# Singlethreaded ll-smoke test.
+#
+# When multithreading is enabled at build time, it is the default
+# behavior of squashfuse_ll, but can be disabled at runtime with
+# the FUSE '-s' commandline option.
+#
+# So we just re-run the normal ll-smoke test with the '-s' option.
+SFLL_EXTRA_ARGS="-s" $(dirname -- $0)/ll-smoke.sh
diff --git a/tests/ll-smoke.sh b/tests/ll-smoke.sh
new file mode 100755
index 00000000..6b9f4641
--- /dev/null
+++ b/tests/ll-smoke.sh
@@ -0,0 +1,141 @@
+#!/bin/sh
+
+. "tests/lib.sh"
+
+# Very simple smoke test for squashfuse_ll. Make some random files.
+# assemble a squashfs image, mount it, compare the files.
+
+SFLL=${1:-./squashfuse_ll} # The squashfuse_ll binary.
+
+IDLE_TIMEOUT=5
+
+trap cleanup EXIT
+set -e
+
+WORKDIR=$(mktemp -d)
+
+sq_umount() {
+ case linux-gnu in
+ linux*)
+ fusermount3 -u $1
+ ;;
+ *)
+ umount $1
+ ;;
+ esac
+}
+
+sq_is_mountpoint() {
+ mount | grep -q "$1"
+}
+
+cleanup() {
+ set +e # Don't care about errors here.
+ if [ -n "$WORKDIR" ]; then
+ if [ -n "$SQ_SAVE_LOGS" ]; then
+ cp "$WORKDIR/squashfs_ll.log" "$SQ_SAVE_LOGS" || true
+ fi
+ if sq_is_mountpoint "$WORKDIR/mount"; then
+ sq_umount "$WORKDIR/mount"
+ fi
+ rm -rf "$WORKDIR"
+ fi
+}
+
+find_compressors
+
+echo "Generating random test files..."
+mkdir -p "$WORKDIR/source"
+head -c 64000000 /dev/urandom >"$WORKDIR/source/rand1"
+head -c 17000 /dev/urandom >"$WORKDIR/source/rand2"
+head -c 100000000 /dev/urandom >"$WORKDIR/source/rand3"
+head -c 87 /dev/zero >"$WORKDIR/source/z1 with spaces"
+
+for comp in $compressors; do
+ echo "Building $comp squashfs image..."
+ mksquashfs "$WORKDIR/source" "$WORKDIR/squashfs.image" -comp $comp -no-progress
+
+ mkdir -p "$WORKDIR/mount"
+
+ echo "Mounting squashfs image..."
+ $SFLL -f $SFLL_EXTRA_ARGS "$WORKDIR/squashfs.image" "$WORKDIR/mount" >"$WORKDIR/squashfs_ll.log" 2>&1 &
+ # Wait up to 5 seconds to be mounted. TSAN builds can take some time to mount.
+ for _ in $(seq 5); do
+ if sq_is_mountpoint "$WORKDIR/mount"; then
+ break
+ fi
+ sleep 1
+ done
+
+ if ! sq_is_mountpoint "$WORKDIR/mount"; then
+ echo "Image did not mount after 5 seconds."
+ cp "$WORKDIR/squashfs_ll.log" /tmp/squashfs_ll.smoke.log
+ echo "There may be clues in /tmp/squashfs_ll.smoke.log"
+ exit 1
+ fi
+
+ if command -v fio >/dev/null; then
+ echo "FIO tests..."
+ fio --filename="$WORKDIR/mount/rand1" --direct=1 --rw=randread --ioengine=libaio --bs=512 --iodepth=16 --numjobs=4 --name=j1 --minimal --output=/dev/null --runtime 30
+ fio --filename="$WORKDIR/mount/rand2" --rw=randread --ioengine=libaio --bs=4k --iodepth=16 --numjobs=4 --name=j2 --minimal --output=/dev/null --runtime 30
+ fio --filename="$WORKDIR/mount/rand3" --rw=randread --ioengine=psync --bs=128k --name=j3 --minimal --output=/dev/null --runtime 30
+ else
+ echo "Consider installing fio for better test coverage."
+ fi
+
+ echo "Comparing files..."
+ cmp "$WORKDIR/source/rand1" "$WORKDIR/mount/rand1"
+ cmp "$WORKDIR/source/rand2" "$WORKDIR/mount/rand2"
+ cmp "$WORKDIR/source/rand3" "$WORKDIR/mount/rand3"
+ cmp "$WORKDIR/source/z1 with spaces" "$WORKDIR/mount/z1 with spaces"
+
+ echo "Parallel md5sum..."
+ md5sum "$WORKDIR"/mount/* >"$WORKDIR/md5sums"
+ split -l1 "$WORKDIR/md5sums" "$WORKDIR/sumpiece"
+ echo "$WORKDIR"/sumpiece* | xargs -P4 -n1 md5sum -c
+
+ echo "Lookup tests..."
+ # Look for non-existent files to exercise failed lookup path.
+ if [ -e "$WORKDIR/mount/bogus" ]; then
+ echo "Bogus existence test"
+ exit 1
+ fi
+ # Twice so we hit cache path.
+ if [ -e "$WORKDIR/mount/bogus" ]; then
+ echo "Bogus existence test #2"
+ exit 1
+ fi
+
+ SRCSZ=$(wc -c < "$WORKDIR/source/rand1")
+ MNTSZ=$(wc -c < "$WORKDIR/mount/rand1")
+ if [ "$SRCSZ" != "$MNTSZ" ]; then
+ echo "Bogus size $MNTSZ != $SRCSZ"
+ exit 1
+ fi
+
+ echo "Unmounting..."
+ sq_umount "$WORKDIR/mount"
+
+ # Only test timeouts once, it takes a long time
+ if [ -z "$did_timeout" ]; then
+ echo "Remounting with idle unmount option..."
+ $SFLL $SFLL_EXTRA_ARGS -otimeout=$IDLE_TIMEOUT "$WORKDIR/squashfs.image" "$WORKDIR/mount"
+ if ! sq_is_mountpoint "$WORKDIR/mount"; then
+ echo "Not mounted?"
+ exit 1
+ fi
+ echo "Waiting up to $(( IDLE_TIMEOUT + 10 )) seconds for idle unmount..."
+ sleep $(( IDLE_TIMEOUT + 10 ))
+ if sq_is_mountpoint "$WORKDIR/mount"; then
+ echo "FS did not idle unmount in timely way."
+ exit 1
+ fi
+
+ did_timeout=yes
+ fi
+
+ rm -f "$WORKDIR/squashfs.image"
+done
+
+echo "Success."
+exit 0
diff --git a/tests/ll-smoke.sh.in b/tests/ll-smoke.sh.in
index 84256267..d7ddd8a1 100755
--- a/tests/ll-smoke.sh.in
+++ b/tests/ll-smoke.sh.in
@@ -52,13 +52,13 @@ head -c 100000000 /dev/urandom >"$WORKDIR/source/rand3"
head -c 87 /dev/zero >"$WORKDIR/source/z1 with spaces"
for comp in $compressors; do
- echo "Building $comp squashfs image,.,"
+ echo "Building $comp squashfs image..."
mksquashfs "$WORKDIR/source" "$WORKDIR/squashfs.image" -comp $comp -no-progress
mkdir -p "$WORKDIR/mount"
echo "Mounting squashfs image..."
- $SFLL -f "$WORKDIR/squashfs.image" "$WORKDIR/mount" >"$WORKDIR/squashfs_ll.log" 2>&1 &
+ $SFLL -f $SFLL_EXTRA_ARGS "$WORKDIR/squashfs.image" "$WORKDIR/mount" >"$WORKDIR/squashfs_ll.log" 2>&1 &
# Wait up to 5 seconds to be mounted. TSAN builds can take some time to mount.
for _ in $(seq 5); do
if sq_is_mountpoint "$WORKDIR/mount"; then
@@ -119,7 +119,7 @@ for comp in $compressors; do
# Only test timeouts once, it takes a long time
if [ -z "$did_timeout" ]; then
echo "Remounting with idle unmount option..."
- $SFLL -otimeout=$IDLE_TIMEOUT "$WORKDIR/squashfs.image" "$WORKDIR/mount"
+ $SFLL $SFLL_EXTRA_ARGS -otimeout=$IDLE_TIMEOUT "$WORKDIR/squashfs.image" "$WORKDIR/mount"
if ! sq_is_mountpoint "$WORKDIR/mount"; then
echo "Not mounted?"
exit 1
From 069e8f802481af0636fabef46bec75de1992b220 Mon Sep 17 00:00:00 2001
From: Kevin Vigor <kvigor@gmail.com>
Date: Mon, 23 May 2022 14:55:25 -0600
Subject: [PATCH 3/3] Enable lazy umount on SIGTERM.
libfuse sets SIGTERM signal handler to exit immediately. This is very
unfortunate if any other processes are still using the filesystem.
Teach squashfuse_ll to respond to SIGTERM with lazy umount. We cannot
directly call umount2() API from the signal handler, since it is not
signal safe, but we can fork/exec fusermount3 (yay posix?). This is
also a win because fusermount is suid, enabling non-privileged users
to umount. Note that normal libfuse umount uses same strategy when
running as non-root.
Note that this must be explicitly enabled at configure time with
--enable-sigterm-handler, and it is only tested on linux.
---
Makefile.am | 3 ++
configure.ac | 9 +++++
ll_main.c | 87 +++++++++++++++++++++++++++++++++++++++++
tests/umount-test.sh.in | 85 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 184 insertions(+)
create mode 100755 tests/umount-test.sh.in
diff --git a/Makefile.am b/Makefile.am
index 17b01be4..67c17cde 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -111,6 +111,9 @@ if MULTITHREADED
# that case.
TESTS += tests/ll-smoke-singlethreaded.sh
endif
+if SIGTERM_HANDLER
+TESTS += tests/umount-test.sh
+endif
check_PROGRAMS = cachetest endiantest
cachetest_SOURCES=tests/cachetest.c
cachetest_LDADD=libsquashfuse.la $(COMPRESSION_LIBS)
diff --git a/configure.ac b/configure.ac
index 3869075a..7cd4db77 100644
--- a/configure.ac
+++ b/configure.ac
@@ -91,6 +91,7 @@ AS_IF([test "x$sq_tests" = x], [sq_tests=" none"])
AC_SUBST([sq_mksquashfs_compressors])
AC_CONFIG_FILES([tests/ll-smoke.sh],[chmod +x tests/ll-smoke.sh])
+AC_CONFIG_FILES([tests/umount-test.sh],[chmod +x tests/umount-test.sh])
AS_IF([test "x$sq_high_level$sq_low_level$sq_demo" = xnonono],
@@ -104,6 +105,14 @@ AC_ARG_ENABLE([multithreading],
])
AM_CONDITIONAL([MULTITHREADED], [test x$enable_multithreading = xyes])
+AC_ARG_ENABLE([sigterm-handler],
+ AS_HELP_STRING([--enable-sigterm-handler], [enable lazy umount on SIGTERM in low-level FUSE driver]),
+ [
+ AC_CHECK_HEADER([linux/version.h], , [], AC_MSG_ERROR([linux host required for sigterm-handler.]))
+ AC_DEFINE(SQFS_SIGTERM_HANDLER, 1, [Enable lazy umount on SIGTERM in low-level FUSE driver])
+ ])
+AM_CONDITIONAL([SIGTERM_HANDLER], [test x$enable_sigterm_handler = xyes])
+
AC_SUBST([sq_decompressors])
AC_SUBST([sq_high_level])
AC_SUBST([sq_low_level])
diff --git a/ll_main.c b/ll_main.c
index 22302085..0f956b5d 100644
--- a/ll_main.c
+++ b/ll_main.c
@@ -37,6 +37,90 @@
#include <signal.h>
#include <unistd.h>
+
+#if defined(SQFS_SIGTERM_HANDLER)
+#include <sys/utsname.h>
+#include <linux/version.h>
+static bool kernel_version_at_least(unsigned required_major,
+ unsigned required_minor,
+ unsigned required_micro) {
+ struct utsname info;
+
+ if (uname(&info) >= 0) {
+ unsigned major, minor, micro;
+
+ if (sscanf(info.release, "%u.%u.%u", &major, &minor, &micro) == 3) {
+ return KERNEL_VERSION(major, minor, micro) >=
+ KERNEL_VERSION(required_major, required_minor, required_micro);
+ }
+ }
+ return false;
+}
+
+/* libfuse's default SIGTERM handler (set up in fuse_set_signal_handlers())
+ * immediately calls fuse_session_exit(), which shuts down the filesystem
+ * even if there are active users. This leads to nastiness if other processes
+ * still depend on the filesystem.
+ *
+ * So: we respond to SIGTERM by starting a lazy unmount. This is done
+ * by exec'ing fusermount3, which works properly for unpriviledged
+ * users (we cannot use umount2() syscall because it is not signal safe;
+ * fork() and exec(), amazingly, are).
+ *
+ * If we fail to start the lazy umount, we signal ourself with SIGINT,
+ * which falls back to the old behavior of exiting ASAP.
+ */
+static const char *g_mount_point = NULL;
+static void sigterm_handler(int signum) {
+ /* Unfortunately, lazy umount of in-use fuse filesystem triggers
+ * kernel bug on kernels < 5.2, Fixed by kernel commit
+ * e8f3bd773d22f488724dffb886a1618da85c2966 in 5.2.
+ */
+ if (g_mount_point && kernel_version_at_least(5,2,0)) {
+ int pid = fork();
+ if (pid == 0) {
+ /* child process: disassociate ourself from parent so
+ * we do not become zombie (as parent does not waitpid()).
+ */
+ pid_t parent = getppid();
+ setsid();
+ execl("/bin/fusermount3", "fusermount3",
+ "-u", "-q", "-z", "--", g_mount_point, NULL);
+ execlp("fusermount3", "fusermount3",
+ "-u", "-q", "-z", "--", g_mount_point, NULL);
+ /* if we get here, we can't run fusermount,
+ * kill the original process with a harshness.
+ */
+ kill(parent, SIGINT);
+ _exit(0);
+ } else if (pid > 0) {
+ /* parent process: nothing to do, murderous child will do us
+ * in one way or another.
+ */
+ return;
+ }
+ }
+ /* If we get here, we have failed to lazy unmount for whatever reason,
+ * kill ourself more brutally.
+ */
+ kill(getpid(), SIGINT);
+}
+
+static void set_sigterm_handler(const char *mountpoint) {
+ struct sigaction sa;
+
+ g_mount_point = mountpoint;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = sigterm_handler;
+ sigemptyset(&(sa.sa_mask));
+ sa.sa_flags = SA_RESTART;
+
+ if (sigaction(SIGTERM, &sa, NULL) == -1) {
+ perror("sigaction(SIGTERM)");
+ }
+}
+#endif /* SQFS_SIGTERM_HANDLER */
+
int main(int argc, char *argv[]) {
struct fuse_args args;
sqfs_opts opts;
@@ -139,6 +223,9 @@ int main(int argc, char *argv[]) {
ll) == SQFS_OK) {
if (sqfs_ll_daemonize(fuse_cmdline_opts.foreground) != -1) {
if (fuse_set_signal_handlers(ch.session) != -1) {
+#if defined(SQFS_SIGTERM_HANDLER)
+ set_sigterm_handler(fuse_cmdline_opts.mountpoint);
+#endif
if (opts.idle_timeout_secs) {
setup_idle_timeout(ch.session, opts.idle_timeout_secs);
}
diff --git a/tests/umount-test.sh.in b/tests/umount-test.sh.in
new file mode 100755
index 00000000..06fed533
--- /dev/null
+++ b/tests/umount-test.sh.in
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+. "tests/lib.sh"
+
+SFLL=${1:-./squashfuse_ll} # The squashfuse_ll binary.
+TIMEOUT=20
+
+case @build_os@ in
+ linux*)
+ ;;
+ *)
+ echo "This test is only enabled on linux hosts."
+ exit 0
+ ;;
+esac
+
+function cleanup {
+ set +e
+ if [[ -n "$TAIL_PID" ]]; then
+ kill "$TAIL_PID"
+ fi
+ @sq_fusermount@ -u "$MNTDIR" >& /dev/null
+ rm -rf "$WORKDIR"
+}
+
+set -e
+WORKDIR=$(mktemp -d)
+MNTDIR="$WORKDIR/mountpoint"
+mkdir -p "$MNTDIR"
+mkdir -p "$WORKDIR/source"
+trap cleanup EXIT
+
+# Make a tiny squashfs filesystem.
+echo "Hello world" >"$WORKDIR/source/hello"
+mksquashfs "$WORKDIR/source" "$WORKDIR/squashfs.image" -comp zstd -no-progress >& /dev/null
+
+# Mount it.
+$SFLL "$WORKDIR/squashfs.image" "$MNTDIR"
+SFPID=$(pgrep -f "squashfuse_ll.*$MNTDIR")
+
+if ! [[ -d /proc/$SFPID ]]; then
+ echo "squashfuse process missing"
+ exit 1
+fi
+if ! grep -q "$MNTDIR" /proc/mounts; then
+ echo "mount missing."
+ exit 1
+fi
+
+# background a task to hold a file open from the image.
+tail -f "${MNTDIR}/hello" >/dev/null &
+TAIL_PID=$!
+
+# SIGTERM the squashfuse process.
+kill -15 "$SFPID"
+
+# Now we expect the mountpoint to disappear due to lazy umount.
+if ! timeout $TIMEOUT bash -c \
+ "while grep -q $MNTDIR /proc/mounts; do \
+ sleep 1;
+ done"; then
+ echo "$MNTDIR did not dismount in response to SIGTERM."
+ exit 1
+fi
+
+# but the process should remain alive, because of the background task.
+if ! [[ -d /proc/$SFPID ]]; then
+ echo "squashfuse process missing"
+ exit 1
+fi
+
+# Now kill the background process.
+kill $TAIL_PID
+TAIL_PID=
+
+# Now we expect the process to die.
+if ! timeout $TIMEOUT bash -c \
+ "while [[ -d /proc/$SFPID ]]; do \
+ sleep 1;
+ done"; then
+ echo "squashfuse process did not die once filesystem was released."
+ exit 1
+fi
+
+echo "Success."