add editorconfig

Add a .editorconfig file for qemu. Specifies the indent and tab style for various files (C code and Makefiles for starters). Most popular editors support this either natively or via plugin. Check http://editorconfig.org/ for details. Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> Message-id: 20170717101547.22295-1-kraxel@redhat.com
add scripts/git.orderfile
2017-07-20 09:56:56 +02:00 · 2017-07-20 09:56:41 +02:00 · 2017-07-19 20:45:37 +01:00 · 2017-07-19 17:51:36 +01:00 · 2017-07-19 16:31:08 +01:00 · 2017-07-19 14:42:18 +01:00
598 changed files with 24820 additions and 6423 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -0,0 +1,15 @@
+# http://editorconfig.org
+root = true
+
+[*]
+end_of_line = lf
+insert_final_newline = true
+charset = utf-8
+
+[Makefile*]
+indent_style = tab
+indent_size = 8
+
+[*.{c,h}]
+indent_style = space
+indent_size = 4
--- a/.travis.yml
+++ b/.travis.yml
@@ -86,6 +86,9 @@ matrix:
    - env: CONFIG="--enable-trace-backends=ust"
           TEST_CMD=""
      compiler: gcc
+    - env: CONFIG="--disable-tcg"
+           TEST_CMD=""
+      compiler: gcc
    - env: CONFIG=""
      os: osx
      compiler: clang
--- a/32
+++ b/32
@@ -84,14 +84,10 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
-F: cpu-exec.c
-F: cpu-exec-common.c
 F: cpus.c
-F: cputlb.c
 F: exec.c
 F: softmmu_template.h
-F: translate-all.*
-F: translate-common.c
+F: accel/tcg/
 F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/helper*.h
@@ -277,8 +273,8 @@ Overall
 M: Paolo Bonzini <pbonzini@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
-F: kvm-*
 F: */kvm.*
+F: accel/kvm/
 F: include/sysemu/kvm*.h

 ARM
@@ -327,7 +323,6 @@ M: Stefano Stabellini <sstabellini@kernel.org>
 M: Anthony Perard <anthony.perard@citrix.com>
 L: xen-devel@lists.xenproject.org
 S: Supported
-F: xen-*
 F: */xen*
 F: hw/9pfs/xen-9p-backend.c
 F: hw/char/xen_console.c
@@ -380,7 +375,7 @@ F: hw/*/allwinner*
 F: include/hw/*/allwinner*
 F: hw/arm/cubieboard.c

-ARM PrimeCell
+ARM PrimeCell and CMSDK devices
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
@@ -394,6 +389,10 @@ F: hw/intc/pl190.c
 F: hw/sd/pl181.c
 F: hw/timer/pl031.c
 F: include/hw/arm/primecell.h
+F: hw/timer/cmsdk-apb-timer.c
+F: include/hw/timer/cmsdk-apb-timer.h
+F: hw/char/cmsdk-apb-uart.c
+F: include/hw/char/cmsdk-apb-uart.h

 ARM cores
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -455,6 +454,14 @@ S: Maintained
 F: hw/arm/integratorcp.c
 F: hw/misc/arm_integrator_debug.c

+MPS2
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/mps2.c
+F: hw/misc/mps2-scc.c
+F: include/hw/misc/mps2-scc.h
+
 Musicpal
 M: Jan Kiszka <jan.kiszka@web.de>
 L: qemu-arm@nongnu.org
@@ -1160,6 +1167,13 @@ F: docs/specs/vmgenid.txt
 F: tests/vmgenid-test.c
 F: stubs/vmgenid.c

+Unimplemented device
+M: Peter Maydell <peter.maydell@linaro.org>
+R: Philippe Mathieu-Daudé <f4bug@amsat.org>
+S: Maintained
+F: include/hw/misc/unimp.h
+F: hw/misc/unimp.c
+
 Subsystems
 ----------
 Audio
@@ -1650,7 +1664,7 @@ TCI target
 M: Stefan Weil <sw@weilnetz.de>
 S: Maintained
 F: tcg/tci/
-F: tci.c
+F: tcg/tci.c
 F: disas/tci.c

 Block drivers
--- a/2
+++ b/2
@@ -553,7 +553,7 @@ efi-e1000e.rom efi-vmxnet3.rom \
 qemu-icon.bmp qemu_logo_no_text.svg \
 bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
 multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
-s390-ccw.img \
+s390-ccw.img s390-netboot.img \
 spapr-rtas.bin slof.bin skiboot.lid \
 palcode-clipper \
 u-boot.e500 \
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -280,6 +280,7 @@ struct tb_desc {
    CPUArchState *env;
    tb_page_addr_t phys_page1;
    uint32_t flags;
+    uint32_t trace_vcpu_dstate;
 };

 static bool tb_cmp(const void *p, const void *d)
@@ -291,6 +292,7 @@ static bool tb_cmp(const void *p, const void *d)
        tb->page_addr[0] == desc->phys_page1 &&
        tb->cs_base == desc->cs_base &&
        tb->flags == desc->flags &&
+        tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
        !atomic_read(&tb->invalid)) {
        /* check next page if needed */
        if (tb->page_addr[1] == -1) {
@@ -319,10 +321,11 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
    desc.env = (CPUArchState *)cpu->env_ptr;
    desc.cs_base = cs_base;
    desc.flags = flags;
+    desc.trace_vcpu_dstate = *cpu->trace_dstate;
    desc.pc = pc;
    phys_pc = get_page_addr_code(desc.env, pc);
    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
-    h = tb_hash_func(phys_pc, pc, flags);
+    h = tb_hash_func(phys_pc, pc, flags, *cpu->trace_dstate);
    return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
 }

@@ -342,7 +345,8 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
    if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
-                 tb->flags != flags)) {
+                 tb->flags != flags ||
+                 tb->trace_vcpu_dstate != *cpu->trace_dstate)) {
        tb = tb_htable_lookup(cpu, pc, cs_base, flags);
        if (!tb) {

--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -54,6 +54,7 @@
 #include "exec/tb-hash.h"
 #include "translate-all.h"
 #include "qemu/bitmap.h"
+#include "qemu/error-report.h"
 #include "qemu/timer.h"
 #include "qemu/main-loop.h"
 #include "exec/log.h"
@@ -112,6 +113,11 @@ typedef struct PageDesc {
 #define V_L2_BITS 10
 #define V_L2_SIZE (1 << V_L2_BITS)

+/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
+QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
+                  sizeof(((TranslationBlock *)0)->trace_vcpu_dstate)
+                  * BITS_PER_BYTE);
+
 /*
 * L1 Mapping properties
 */
@@ -1071,7 +1077,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)

    /* remove the TB from the hash list */
    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags);
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate);
    qht_remove(&tcg_ctx.tb_ctx.htable, tb, h);

    /* remove the TB from the page list */
@@ -1216,7 +1222,7 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
    }

    /* add in the hash table */
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags);
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate);
    qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);

 #ifdef DEBUG_TB_CHECK
@@ -1262,6 +1268,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
    tb->cs_base = cs_base;
    tb->flags = flags;
    tb->cflags = cflags;
+    tb->trace_vcpu_dstate = *cpu->trace_dstate;
    tb->invalid = false;

 #ifdef CONFIG_PROFILER
@@ -1851,11 +1858,6 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)

    tb_lock();

-    if (!tcg_enabled()) {
-        cpu_fprintf(f, "TCG not enabled\n");
-        return;
-    }
-
    target_code_size = 0;
    max_target_code_size = 0;
    cross_page = 0;
--- a/audio/rate_template.h
+++ b/audio/rate_template.h
@@ -71,6 +71,12 @@ void NAME (void *opaque, struct st_sample *ibuf, struct st_sample *obuf,
        while (rate->ipos <= (rate->opos >> 32)) {
            ilast = *ibuf++;
            rate->ipos++;
+
+            /* if ipos overflow, there is  a infinite loop */
+            if (rate->ipos == 0xffffffff) {
+                rate->ipos = 1;
+                rate->opos = rate->opos & 0xffffffff;
+            }
            /* See if we finished the input buffer yet */
            if (ibuf >= iend) {
                goto the_end;
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -28,7 +28,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    }

    path = object_get_canonical_path_component(OBJECT(backend));
-    memory_region_init_ram(&backend->mr, OBJECT(backend), path,
+    memory_region_init_ram_nomigrate(&backend->mr, OBJECT(backend), path,
                           backend->size, errp);
    g_free(path);
 }
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -106,7 +106,7 @@ static void rng_egd_opened(RngBackend *b, Error **errp)

    /* FIXME we should resubmit pending requests when the CDS reconnects. */
    qemu_chr_fe_set_handlers(&s->chr, rng_egd_chr_can_read,
-                             rng_egd_chr_read, NULL, s, NULL, true);
+                             rng_egd_chr_read, NULL, NULL, s, NULL, true);
 }

 static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
--- a/block.c
+++ b/block.c
@@ -2185,6 +2185,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
        ret = -EINVAL;
        goto free_exit;
    }
+    bdrv_set_aio_context(backing_hd, bdrv_get_aio_context(bs));

    /* Hook up the backing file link; drop our reference, bs owns the
     * backing_hd reference now */
@@ -2573,15 +2574,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
        goto close_and_fail;
    }

-    if (!bdrv_key_required(bs)) {
-        bdrv_parent_cb_change_media(bs, true);
-    } else if (!runstate_check(RUN_STATE_PRELAUNCH)
-               && !runstate_check(RUN_STATE_INMIGRATE)
-               && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
-        error_setg(errp,
-                   "Guest must be stopped for opening of encrypted image");
-        goto close_and_fail;
-    }
+    bdrv_parent_cb_change_media(bs, true);

    QDECREF(options);

@@ -2989,24 +2982,45 @@ error:
 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
 {
    BlockDriver *drv;
+    BlockDriverState *bs;
+    bool old_can_write, new_can_write;

    assert(reopen_state != NULL);
-    drv = reopen_state->bs->drv;
+    bs = reopen_state->bs;
+    drv = bs->drv;
    assert(drv != NULL);

+    old_can_write =
+        !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
+
    /* If there are any driver level actions to take */
    if (drv->bdrv_reopen_commit) {
        drv->bdrv_reopen_commit(reopen_state);
    }

    /* set BDS specific flags now */
-    QDECREF(reopen_state->bs->explicit_options);
+    QDECREF(bs->explicit_options);

-    reopen_state->bs->explicit_options   = reopen_state->explicit_options;
-    reopen_state->bs->open_flags         = reopen_state->flags;
-    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
+    bs->explicit_options   = reopen_state->explicit_options;
+    bs->open_flags         = reopen_state->flags;
+    bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);

-    bdrv_refresh_limits(reopen_state->bs, NULL);
+    bdrv_refresh_limits(bs, NULL);
+
+    new_can_write =
+        !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
+    if (!old_can_write && new_can_write && drv->bdrv_reopen_bitmaps_rw) {
+        Error *local_err = NULL;
+        if (drv->bdrv_reopen_bitmaps_rw(bs, &local_err) < 0) {
+            /* This is not fatal, bitmaps just left read-only, so all following
+             * writes will fail. User can remove read-only bitmaps to unblock
+             * writes.
+             */
+            error_reportf_err(local_err,
+                              "%s: Failed to make dirty bitmaps writable: ",
+                              bdrv_get_node_name(bs));
+        }
+    }
 }

 /*
@@ -3040,9 +3054,6 @@ static void bdrv_close(BlockDriverState *bs)
    bdrv_flush(bs);
    bdrv_drain(bs); /* in case flush left pending I/O */

-    bdrv_release_named_dirty_bitmaps(bs);
-    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
-
    if (bs->drv) {
        BdrvChild *child, *next;

@@ -3072,7 +3083,6 @@ static void bdrv_close(BlockDriverState *bs)
        bs->backing_format[0] = '\0';
        bs->total_sectors = 0;
        bs->encrypted = false;
-        bs->valid_key = false;
        bs->sg = false;
        QDECREF(bs->options);
        QDECREF(bs->explicit_options);
@@ -3081,6 +3091,9 @@ static void bdrv_close(BlockDriverState *bs)
        bs->full_open_options = NULL;
    }

+    bdrv_release_named_dirty_bitmaps(bs);
+    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
+
    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
        g_free(ban);
    }
@@ -3398,7 +3411,8 @@ exit:
 /**
 * Truncate file to 'offset' bytes (needed only for file protocols)
 */
-int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp)
+int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
+                  Error **errp)
 {
    BlockDriverState *bs = child->bs;
    BlockDriver *drv = bs->drv;
@@ -3421,7 +3435,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp)

    assert(!(bs->open_flags & BDRV_O_INACTIVE));

-    ret = drv->bdrv_truncate(bs, offset, errp);
+    ret = drv->bdrv_truncate(bs, offset, prealloc, errp);
    if (ret == 0) {
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
        bdrv_dirty_bitmap_truncate(bs);
@@ -3450,6 +3464,41 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
    return -ENOTSUP;
 }

+/*
+ * bdrv_measure:
+ * @drv: Format driver
+ * @opts: Creation options for new image
+ * @in_bs: Existing image containing data for new image (may be NULL)
+ * @errp: Error object
+ * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo())
+ *          or NULL on error
+ *
+ * Calculate file size required to create a new image.
+ *
+ * If @in_bs is given then space for allocated clusters and zero clusters
+ * from that image are included in the calculation.  If @opts contains a
+ * backing file that is shared by @in_bs then backing clusters may be omitted
+ * from the calculation.
+ *
+ * If @in_bs is NULL then the calculation includes no allocated clusters
+ * unless a preallocation option is given in @opts.
+ *
+ * Note that @in_bs may use a different BlockDriver from @drv.
+ *
+ * If an error occurs the @errp pointer is set.
+ */
+BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
+                               BlockDriverState *in_bs, Error **errp)
+{
+    if (!drv->bdrv_measure) {
+        error_setg(errp, "Block driver '%s' does not support size measurement",
+                   drv->format_name);
+        return NULL;
+    }
+
+    return drv->bdrv_measure(opts, in_bs, errp);
+}
+
 /**
 * Return number of sectors on success, -errno on error.
 */
@@ -3502,72 +3551,6 @@ bool bdrv_is_encrypted(BlockDriverState *bs)
    return bs->encrypted;
 }

-bool bdrv_key_required(BlockDriverState *bs)
-{
-    BdrvChild *backing = bs->backing;
-
-    if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
-        return true;
-    }
-    return (bs->encrypted && !bs->valid_key);
-}
-
-int bdrv_set_key(BlockDriverState *bs, const char *key)
-{
-    int ret;
-    if (bs->backing && bs->backing->bs->encrypted) {
-        ret = bdrv_set_key(bs->backing->bs, key);
-        if (ret < 0)
-            return ret;
-        if (!bs->encrypted)
-            return 0;
-    }
-    if (!bs->encrypted) {
-        return -EINVAL;
-    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
-        return -ENOMEDIUM;
-    }
-    ret = bs->drv->bdrv_set_key(bs, key);
-    if (ret < 0) {
-        bs->valid_key = false;
-    } else if (!bs->valid_key) {
-        /* call the change callback now, we skipped it on open */
-        bs->valid_key = true;
-        bdrv_parent_cb_change_media(bs, true);
-    }
-    return ret;
-}
-
-/*
- * Provide an encryption key for @bs.
- * If @key is non-null:
- *     If @bs is not encrypted, fail.
- *     Else if the key is invalid, fail.
- *     Else set @bs's key to @key, replacing the existing key, if any.
- * If @key is null:
- *     If @bs is encrypted and still lacks a key, fail.
- *     Else do nothing.
- * On failure, store an error object through @errp if non-null.
- */
-void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
-{
-    if (key) {
-        if (!bdrv_is_encrypted(bs)) {
-            error_setg(errp, "Node '%s' is not encrypted",
-                      bdrv_get_device_or_node_name(bs));
-        } else if (bdrv_set_key(bs, key) < 0) {
-            error_setg(errp, QERR_INVALID_PASSWORD);
-        }
-    } else {
-        if (bdrv_key_required(bs)) {
-            error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
-                      "'%s' (%s) is encrypted",
-                      bdrv_get_device_or_node_name(bs),
-                      bdrv_get_encrypted_filename(bs));
-        }
-    }
-}
-
 const char *bdrv_get_format_name(BlockDriverState *bs)
 {
    return bs->drv ? bs->drv->format_name : NULL;
@@ -4135,6 +4118,10 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
        }
    }

+    /* At this point persistent bitmaps should be already stored by the format
+     * driver */
+    bdrv_release_persistent_dirty_bitmaps(bs);
+
    return 0;
 }

@@ -4267,11 +4254,9 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
    if (!QLIST_EMPTY(&bs->op_blockers[op])) {
        blocker = QLIST_FIRST(&bs->op_blockers[op]);
-        if (errp) {
-            *errp = error_copy(blocker->reason);
-            error_prepend(errp, "Node '%s' is busy: ",
-                          bdrv_get_device_or_node_name(bs));
-        }
+        error_propagate(errp, error_copy(blocker->reason));
+        error_prepend(errp, "Node '%s' is busy: ",
+                      bdrv_get_device_or_node_name(bs));
        return true;
    }
    return false;
@@ -4411,55 +4396,65 @@ void bdrv_img_create(const char *filename, const char *fmt,

    backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);

-    // The size for the image must always be specified, with one exception:
-    // If we are using a backing file, we can obtain the size from there
+    /* The size for the image must always be specified, unless we have a backing
+     * file and we have not been forbidden from opening it. */
    size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
-    if (size == -1) {
-        if (backing_file) {
-            BlockDriverState *bs;
-            char *full_backing = g_new0(char, PATH_MAX);
-            int64_t size;
-            int back_flags;
-            QDict *backing_options = NULL;
+    if (backing_file && !(flags & BDRV_O_NO_BACKING)) {
+        BlockDriverState *bs;
+        char *full_backing = g_new0(char, PATH_MAX);
+        int back_flags;
+        QDict *backing_options = NULL;

-            bdrv_get_full_backing_filename_from_filename(filename, backing_file,
-                                                         full_backing, PATH_MAX,
-                                                         &local_err);
-            if (local_err) {
-                g_free(full_backing);
-                goto out;
-            }
-
-            /* backing files always opened read-only */
-            back_flags = flags;
-            back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
-
-            if (backing_fmt) {
-                backing_options = qdict_new();
-                qdict_put_str(backing_options, "driver", backing_fmt);
-            }
-
-            bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
-                           &local_err);
+        bdrv_get_full_backing_filename_from_filename(filename, backing_file,
+                                                     full_backing, PATH_MAX,
+                                                     &local_err);
+        if (local_err) {
            g_free(full_backing);
-            if (!bs) {
-                goto out;
-            }
-            size = bdrv_getlength(bs);
-            if (size < 0) {
-                error_setg_errno(errp, -size, "Could not get size of '%s'",
-                                 backing_file);
-                bdrv_unref(bs);
-                goto out;
-            }
-
-            qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
-
-            bdrv_unref(bs);
-        } else {
-            error_setg(errp, "Image creation needs a size parameter");
            goto out;
        }
+
+        /* backing files always opened read-only */
+        back_flags = flags;
+        back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+
+        if (backing_fmt) {
+            backing_options = qdict_new();
+            qdict_put_str(backing_options, "driver", backing_fmt);
+        }
+
+        bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
+                       &local_err);
+        g_free(full_backing);
+        if (!bs && size != -1) {
+            /* Couldn't open BS, but we have a size, so it's nonfatal */
+            warn_reportf_err(local_err,
+                            "Could not verify backing image. "
+                            "This may become an error in future versions.\n");
+            local_err = NULL;
+        } else if (!bs) {
+            /* Couldn't open bs, do not have size */
+            error_append_hint(&local_err,
+                              "Could not open backing image to determine size.\n");
+            goto out;
+        } else {
+            if (size == -1) {
+                /* Opened BS, have no size */
+                size = bdrv_getlength(bs);
+                if (size < 0) {
+                    error_setg_errno(errp, -size, "Could not get size of '%s'",
+                                     backing_file);
+                    bdrv_unref(bs);
+                    goto out;
+                }
+                qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
+            }
+            bdrv_unref(bs);
+        }
+    } /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
+
+    if (size == -1) {
+        error_setg(errp, "Image creation needs a size parameter");
+        goto out;
    }

    if (!quiet) {
@@ -4933,3 +4928,25 @@ void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)

    parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
 }
+
+bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                     uint32_t granularity, Error **errp)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (!drv) {
+        error_setg_errno(errp, ENOMEDIUM,
+                         "Can't store persistent bitmaps to %s",
+                         bdrv_get_device_or_node_name(bs));
+        return false;
+    }
+
+    if (!drv->bdrv_can_store_new_dirty_bitmap) {
+        error_setg_errno(errp, ENOTSUP,
+                         "Can't store persistent bitmaps to %s",
+                         bdrv_get_device_or_node_name(bs));
+        return false;
+    }
+
+    return drv->bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp);
+}
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,5 +1,5 @@
 block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
+block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o
 block-obj-y += qed.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
--- a/block/backup.c
+++ b/block/backup.c
@@ -639,12 +639,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
    ret = bdrv_get_info(target, &bdi);
    if (ret == -ENOTSUP && !target->backing) {
        /* Cluster size is not defined */
-        error_report("WARNING: The target block device doesn't provide "
-                     "information about the block size and it doesn't have a "
-                     "backing file. The default block size of %u bytes is "
-                     "used. If the actual block size of the target exceeds "
-                     "this default, the backup may be unusable",
-                     BACKUP_CLUSTER_SIZE_DEFAULT);
+        warn_report("The target block device doesn't provide "
+                    "information about the block size and it doesn't have a "
+                    "backing file. The default block size of %u bytes is "
+                    "used. If the actual block size of the target exceeds "
+                    "this default, the backup may be unusable",
+                    BACKUP_CLUSTER_SIZE_DEFAULT);
        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
    } else if (ret < 0 && !target->backing) {
        error_setg_errno(errp, -ret,
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -821,9 +821,10 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
    return bdrv_getlength(bs->file->bs);
 }

-static int blkdebug_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int blkdebug_truncate(BlockDriverState *bs, int64_t offset,
+                             PreallocMode prealloc, Error **errp)
 {
-    return bdrv_truncate(bs->file, offset, errp);
+    return bdrv_truncate(bs->file, offset, prealloc, errp);
 }

 static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -83,7 +83,6 @@ static const AIOCBInfo block_backend_aiocb_info = {

 static void drive_info_del(DriveInfo *dinfo);
 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
-static char *blk_get_attached_dev_id(BlockBackend *blk);

 /* All BlockBackends */
 static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -343,7 +342,7 @@ void blk_unref(BlockBackend *blk)
 * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
 * ones which are hidden (i.e. are not referenced by the monitor).
 */
-static BlockBackend *blk_all_next(BlockBackend *blk)
+BlockBackend *blk_all_next(BlockBackend *blk)
 {
    return blk ? QTAILQ_NEXT(blk, link)
               : QTAILQ_FIRST(&block_backends);
@@ -726,7 +725,7 @@ void *blk_get_attached_dev(BlockBackend *blk)

 /* Return the qdev ID, or if no ID is assigned the QOM path, of the block
 * device attached to the BlockBackend. */
-static char *blk_get_attached_dev_id(BlockBackend *blk)
+char *blk_get_attached_dev_id(BlockBackend *blk)
 {
    DeviceState *dev;

@@ -1773,14 +1772,15 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
                   BDRV_REQ_WRITE_COMPRESSED);
 }

-int blk_truncate(BlockBackend *blk, int64_t offset, Error **errp)
+int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
+                 Error **errp)
 {
    if (!blk_is_available(blk)) {
        error_setg(errp, "No medium inserted");
        return -ENOMEDIUM;
    }

-    return bdrv_truncate(blk->root, offset, errp);
+    return bdrv_truncate(blk->root, offset, prealloc, errp);
 }

 static void blk_pdiscard_entry(void *opaque)
--- a/block/commit.c
+++ b/block/commit.c
@@ -90,7 +90,9 @@ static void commit_complete(BlockJob *job, void *opaque)

    /* Make sure overlay_bs and top stay around until bdrv_set_backing_hd() */
    bdrv_ref(top);
-    bdrv_ref(overlay_bs);
+    if (overlay_bs) {
+        bdrv_ref(overlay_bs);
+    }

    /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
     * the normal backing chain can be restored. */
@@ -163,7 +165,7 @@ static void coroutine_fn commit_run(void *opaque)
    }

    if (base_len < s->common.len) {
-        ret = blk_truncate(s->base, s->common.len, NULL);
+        ret = blk_truncate(s->base, s->common.len, PREALLOC_MODE_OFF, NULL);
        if (ret) {
            goto out;
        }
@@ -521,7 +523,7 @@ int bdrv_commit(BlockDriverState *bs)
     * grow the backing file image if possible.  If not possible,
     * we must return an error */
    if (length > backing_length) {
-        ret = blk_truncate(backing, length, &local_err);
+        ret = blk_truncate(backing, length, PREALLOC_MODE_OFF, &local_err);
        if (ret < 0) {
            error_report_err(local_err);
            goto ro_cleanup;
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -24,16 +24,10 @@
 #include "sysemu/block-backend.h"
 #include "crypto/block.h"
 #include "qapi/opts-visitor.h"
+#include "qapi/qobject-input-visitor.h"
 #include "qapi-visit.h"
 #include "qapi/error.h"
-
-#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
-#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
-#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time"
+#include "block/crypto.h"

 typedef struct BlockCrypto BlockCrypto;

@@ -135,11 +129,7 @@ static QemuOptsList block_crypto_runtime_opts_luks = {
    .name = "crypto",
    .head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head),
    .desc = {
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
-            .type = QEMU_OPT_STRING,
-            .help = "ID of the secret that provides the encryption key",
-        },
+        BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""),
        { /* end of list */ }
    },
 };
@@ -154,49 +144,21 @@ static QemuOptsList block_crypto_create_opts_luks = {
            .type = QEMU_OPT_SIZE,
            .help = "Virtual disk size"
        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
-            .type = QEMU_OPT_STRING,
-            .help = "ID of the secret that provides the encryption key",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of encryption cipher algorithm",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of encryption cipher mode",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of IV generator algorithm",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of IV generator hash algorithm",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_HASH_ALG,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of encryption hash algorithm",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_ITER_TIME,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Time to spend in PBKDF in milliseconds",
-        },
+        BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""),
+        BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(""),
+        BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(""),
+        BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(""),
+        BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(""),
+        BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(""),
+        BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(""),
        { /* end of list */ }
    },
 };


-static QCryptoBlockOpenOptions *
+QCryptoBlockOpenOptions *
 block_crypto_open_opts_init(QCryptoBlockFormat format,
-                            QemuOpts *opts,
+                            QDict *opts,
                            Error **errp)
 {
    Visitor *v;
@@ -206,7 +168,7 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
    ret = g_new0(QCryptoBlockOpenOptions, 1);
    ret->format = format;

-    v = opts_visitor_new(opts);
+    v = qobject_input_visitor_new_keyval(QOBJECT(opts));

    visit_start_struct(v, NULL, NULL, 0, &local_err);
    if (local_err) {
@@ -219,6 +181,11 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
            v, &ret->u.luks, &local_err);
        break;

+    case Q_CRYPTO_BLOCK_FORMAT_QCOW:
+        visit_type_QCryptoBlockOptionsQCow_members(
+            v, &ret->u.qcow, &local_err);
+        break;
+
    default:
        error_setg(&local_err, "Unsupported block format %d", format);
        break;
@@ -240,9 +207,9 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
 }


-static QCryptoBlockCreateOptions *
+QCryptoBlockCreateOptions *
 block_crypto_create_opts_init(QCryptoBlockFormat format,
-                              QemuOpts *opts,
+                              QDict *opts,
                              Error **errp)
 {
    Visitor *v;
@@ -252,7 +219,7 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
    ret = g_new0(QCryptoBlockCreateOptions, 1);
    ret->format = format;

-    v = opts_visitor_new(opts);
+    v = qobject_input_visitor_new_keyval(QOBJECT(opts));

    visit_start_struct(v, NULL, NULL, 0, &local_err);
    if (local_err) {
@@ -265,6 +232,11 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
            v, &ret->u.luks, &local_err);
        break;

+    case Q_CRYPTO_BLOCK_FORMAT_QCOW:
+        visit_type_QCryptoBlockOptionsQCow_members(
+            v, &ret->u.qcow, &local_err);
+        break;
+
    default:
        error_setg(&local_err, "Unsupported block format %d", format);
        break;
@@ -299,6 +271,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
    int ret = -EINVAL;
    QCryptoBlockOpenOptions *open_opts = NULL;
    unsigned int cflags = 0;
+    QDict *cryptoopts = NULL;

    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
                               false, errp);
@@ -313,7 +286,9 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
        goto cleanup;
    }

-    open_opts = block_crypto_open_opts_init(format, opts, errp);
+    cryptoopts = qemu_opts_to_qdict(opts, NULL);
+
+    open_opts = block_crypto_open_opts_init(format, cryptoopts, errp);
    if (!open_opts) {
        goto cleanup;
    }
@@ -321,7 +296,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
    if (flags & BDRV_O_NO_IO) {
        cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
    }
-    crypto->block = qcrypto_block_open(open_opts,
+    crypto->block = qcrypto_block_open(open_opts, NULL,
                                       block_crypto_read_func,
                                       bs,
                                       cflags,
@@ -333,10 +308,10 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
    }

    bs->encrypted = true;
-    bs->valid_key = true;

    ret = 0;
 cleanup:
+    QDECREF(cryptoopts);
    qapi_free_QCryptoBlockOpenOptions(open_opts);
    return ret;
 }
@@ -356,13 +331,16 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,
        .opts = opts,
        .filename = filename,
    };
+    QDict *cryptoopts;

-    create_opts = block_crypto_create_opts_init(format, opts, errp);
+    cryptoopts = qemu_opts_to_qdict(opts, NULL);
+
+    create_opts = block_crypto_create_opts_init(format, cryptoopts, errp);
    if (!create_opts) {
        return -1;
    }

-    crypto = qcrypto_block_create(create_opts,
+    crypto = qcrypto_block_create(create_opts, NULL,
                                  block_crypto_init_func,
                                  block_crypto_write_func,
                                  &data,
@@ -375,6 +353,7 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,

    ret = 0;
 cleanup:
+    QDECREF(cryptoopts);
    qcrypto_block_free(crypto);
    blk_unref(data.blk);
    qapi_free_QCryptoBlockCreateOptions(create_opts);
@@ -382,7 +361,7 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,
 }

 static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,
-                                 Error **errp)
+                                 PreallocMode prealloc, Error **errp)
 {
    BlockCrypto *crypto = bs->opaque;
    size_t payload_offset =
@@ -390,7 +369,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,

    offset += payload_offset;

-    return bdrv_truncate(bs->file, offset, errp);
+    return bdrv_truncate(bs->file, offset, prealloc, errp);
 }

 static void block_crypto_close(BlockDriverState *bs)
--- a/block/crypto.h
+++ b/block/crypto.h
@@ -0,0 +1,101 @@
+/*
+ * QEMU block full disk encryption
+ *
+ * Copyright (c) 2015-2017 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef BLOCK_CRYPTO_H__
+#define BLOCK_CRYPTO_H__
+
+#define BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, helpstr)                \
+    {                                                                   \
+        .name = prefix BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET,                \
+        .type = QEMU_OPT_STRING,                                        \
+        .help = helpstr,                                                \
+    }
+
+#define BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET "key-secret"
+
+#define BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET(prefix)                    \
+    BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix,                             \
+        "ID of the secret that provides the AES encryption key")
+
+#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time"
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(prefix)                    \
+    BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix,                             \
+        "ID of the secret that provides the keyslot passphrase")
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(prefix)       \
+    {                                                      \
+        .name = prefix BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG,   \
+        .type = QEMU_OPT_STRING,                           \
+        .help = "Name of encryption cipher algorithm",     \
+    }
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(prefix)      \
+    {                                                      \
+        .name = prefix BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE,  \
+        .type = QEMU_OPT_STRING,                           \
+        .help = "Name of encryption cipher mode",          \
+    }
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(prefix)     \
+    {                                                   \
+        .name = prefix BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG, \
+        .type = QEMU_OPT_STRING,                        \
+        .help = "Name of IV generator algorithm",       \
+    }
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(prefix)        \
+    {                                                           \
+        .name = prefix BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG,    \
+        .type = QEMU_OPT_STRING,                                \
+        .help = "Name of IV generator hash algorithm",          \
+    }
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(prefix)       \
+    {                                                    \
+        .name = prefix BLOCK_CRYPTO_OPT_LUKS_HASH_ALG,   \
+        .type = QEMU_OPT_STRING,                         \
+        .help = "Name of encryption hash algorithm",     \
+    }
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(prefix)           \
+    {                                                         \
+        .name = prefix BLOCK_CRYPTO_OPT_LUKS_ITER_TIME,       \
+        .type = QEMU_OPT_NUMBER,                              \
+        .help = "Time to spend in PBKDF in milliseconds",     \
+    }
+
+QCryptoBlockCreateOptions *
+block_crypto_create_opts_init(QCryptoBlockFormat format,
+                              QDict *opts,
+                              Error **errp);
+
+QCryptoBlockOpenOptions *
+block_crypto_open_opts_init(QCryptoBlockFormat format,
+                            QDict *opts,
+                            Error **errp);
+
+#endif /* BLOCK_CRYPTO_H__ */
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -43,8 +43,18 @@ struct BdrvDirtyBitmap {
    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
    char *name;                 /* Optional non-empty unique ID */
    int64_t size;               /* Size of the bitmap (Number of sectors) */
-    bool disabled;              /* Bitmap is read-only */
+    bool disabled;              /* Bitmap is disabled. It ignores all writes to
+                                   the device */
    int active_iterators;       /* How many iterators are active */
+    bool readonly;              /* Bitmap is read-only. This field also
+                                   prevents the respective image from being
+                                   modified (i.e. blocks writes and discards).
+                                   Such operations must fail and both the image
+                                   and this bitmap must remain unchanged while
+                                   this flag is set. */
+    bool autoload;              /* For persistent bitmaps: bitmap must be
+                                   autoloaded on image opening */
+    bool persistent;            /* bitmap must be saved to owner disk image */
    QLIST_ENTRY(BdrvDirtyBitmap) list;
 };

@@ -93,6 +103,8 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
    g_free(bitmap->name);
    bitmap->name = NULL;
+    bitmap->persistent = false;
+    bitmap->autoload = false;
 }

 /* Called with BQL taken.  */
@@ -289,6 +301,10 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
    bitmap->name = NULL;
    successor->name = name;
    bitmap->successor = NULL;
+    successor->persistent = bitmap->persistent;
+    bitmap->persistent = false;
+    successor->autoload = bitmap->autoload;
+    bitmap->autoload = false;
    bdrv_release_dirty_bitmap(bs, bitmap);

    return successor;
@@ -340,15 +356,20 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
    bdrv_dirty_bitmaps_unlock(bs);
 }

+static bool bdrv_dirty_bitmap_has_name(BdrvDirtyBitmap *bitmap)
+{
+    return !!bdrv_dirty_bitmap_name(bitmap);
+}
+
 /* Called with BQL taken.  */
-static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
-                                                  BdrvDirtyBitmap *bitmap,
-                                                  bool only_named)
+static void bdrv_do_release_matching_dirty_bitmap(
+    BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+    bool (*cond)(BdrvDirtyBitmap *bitmap))
 {
    BdrvDirtyBitmap *bm, *next;
    bdrv_dirty_bitmaps_lock(bs);
    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
-        if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
+        if ((!bitmap || bm == bitmap) && (!cond || cond(bm))) {
            assert(!bm->active_iterators);
            assert(!bdrv_dirty_bitmap_frozen(bm));
            assert(!bm->meta);
@@ -373,17 +394,47 @@ out:
 /* Called with BQL taken.  */
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
-    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
+    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, NULL);
 }

 /**
 * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
 * There must not be any frozen bitmaps attached.
+ * This function does not remove persistent bitmaps from the storage.
 * Called with BQL taken.
 */
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
 {
-    bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
+    bdrv_do_release_matching_dirty_bitmap(bs, NULL, bdrv_dirty_bitmap_has_name);
+}
+
+/**
+ * Release all persistent dirty bitmaps attached to a BDS (for use in
+ * bdrv_inactivate_recurse()).
+ * There must not be any frozen bitmaps attached.
+ * This function does not remove persistent bitmaps from the storage.
+ */
+void bdrv_release_persistent_dirty_bitmaps(BlockDriverState *bs)
+{
+    bdrv_do_release_matching_dirty_bitmap(bs, NULL,
+                                          bdrv_dirty_bitmap_get_persistance);
+}
+
+/**
+ * Remove persistent dirty bitmap from the storage if it exists.
+ * Absence of bitmap is not an error, because we have the following scenario:
+ * BdrvDirtyBitmap can have .persistent = true but not yet saved and have no
+ * stored version. For such bitmap bdrv_remove_persistent_dirty_bitmap() should
+ * not fail.
+ * This function doesn't release corresponding BdrvDirtyBitmap.
+ */
+void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+                                         const char *name,
+                                         Error **errp)
+{
+    if (bs->drv && bs->drv->bdrv_remove_persistent_dirty_bitmap) {
+        bs->drv->bdrv_remove_persistent_dirty_bitmap(bs, name, errp);
+    }
 }

 /* Called with BQL taken.  */
@@ -455,7 +506,7 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
    return granularity;
 }

-uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
+uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap)
 {
    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
 }
@@ -504,6 +555,7 @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
                                  int64_t cur_sector, int64_t nr_sectors)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    assert(!bdrv_dirty_bitmap_readonly(bitmap));
    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
 }

@@ -520,6 +572,7 @@ void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
                                    int64_t cur_sector, int64_t nr_sectors)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    assert(!bdrv_dirty_bitmap_readonly(bitmap));
    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
 }

@@ -534,6 +587,7 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    assert(!bdrv_dirty_bitmap_readonly(bitmap));
    bdrv_dirty_bitmap_lock(bitmap);
    if (!out) {
        hbitmap_reset_all(bitmap->bitmap);
@@ -550,6 +604,7 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
 {
    HBitmap *tmp = bitmap->bitmap;
    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    assert(!bdrv_dirty_bitmap_readonly(bitmap));
    bitmap->bitmap = in;
    hbitmap_free(tmp);
 }
@@ -586,6 +641,13 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
    hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish);
 }

+void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
+                                        uint64_t start, uint64_t count,
+                                        bool finish)
+{
+    hbitmap_deserialize_ones(bitmap->bitmap, start, count, finish);
+}
+
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
 {
    hbitmap_deserialize_finish(bitmap->bitmap);
@@ -605,6 +667,7 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
            continue;
        }
+        assert(!bdrv_dirty_bitmap_readonly(bitmap));
        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
    }
    bdrv_dirty_bitmaps_unlock(bs);
@@ -627,3 +690,78 @@ int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap)
 {
    return hbitmap_count(bitmap->meta);
 }
+
+bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap)
+{
+    return bitmap->readonly;
+}
+
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value)
+{
+    qemu_mutex_lock(bitmap->mutex);
+    bitmap->readonly = value;
+    qemu_mutex_unlock(bitmap->mutex);
+}
+
+bool bdrv_has_readonly_bitmaps(BlockDriverState *bs)
+{
+    BdrvDirtyBitmap *bm;
+    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+        if (bm->readonly) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload)
+{
+    qemu_mutex_lock(bitmap->mutex);
+    bitmap->autoload = autoload;
+    qemu_mutex_unlock(bitmap->mutex);
+}
+
+bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap)
+{
+    return bitmap->autoload;
+}
+
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent)
+{
+    qemu_mutex_lock(bitmap->mutex);
+    bitmap->persistent = persistent;
+    qemu_mutex_unlock(bitmap->mutex);
+}
+
+bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap)
+{
+    return bitmap->persistent;
+}
+
+bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs)
+{
+    BdrvDirtyBitmap *bm;
+    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+        if (bm->persistent && !bm->readonly) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
+                                        BdrvDirtyBitmap *bitmap)
+{
+    return bitmap == NULL ? QLIST_FIRST(&bs->dirty_bitmaps) :
+                            QLIST_NEXT(bitmap, list);
+}
+
+char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp)
+{
+    return hbitmap_sha256(bitmap->bitmap, errp);
+}
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1624,7 +1624,122 @@ static void raw_close(BlockDriverState *bs)
    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+/**
+ * Truncates the given regular file @fd to @offset and, when growing, fills the
+ * new space according to @prealloc.
+ *
+ * Returns: 0 on success, -errno on failure.
+ */
+static int raw_regular_truncate(int fd, int64_t offset, PreallocMode prealloc,
+                                Error **errp)
+{
+    int result = 0;
+    int64_t current_length = 0;
+    char *buf = NULL;
+    struct stat st;
+
+    if (fstat(fd, &st) < 0) {
+        result = -errno;
+        error_setg_errno(errp, -result, "Could not stat file");
+        return result;
+    }
+
+    current_length = st.st_size;
+    if (current_length > offset && prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Cannot use preallocation for shrinking files");
+        return -ENOTSUP;
+    }
+
+    switch (prealloc) {
+#ifdef CONFIG_POSIX_FALLOCATE
+    case PREALLOC_MODE_FALLOC:
+        /*
+         * Truncating before posix_fallocate() makes it about twice slower on
+         * file systems that do not support fallocate(), trying to check if a
+         * block is allocated before allocating it, so don't do that here.
+         */
+        result = -posix_fallocate(fd, current_length, offset - current_length);
+        if (result != 0) {
+            /* posix_fallocate() doesn't set errno. */
+            error_setg_errno(errp, -result,
+                             "Could not preallocate new data");
+        }
+        goto out;
+#endif
+    case PREALLOC_MODE_FULL:
+    {
+        int64_t num = 0, left = offset - current_length;
+
+        /*
+         * Knowing the final size from the beginning could allow the file
+         * system driver to do less allocations and possibly avoid
+         * fragmentation of the file.
+         */
+        if (ftruncate(fd, offset) != 0) {
+            result = -errno;
+            error_setg_errno(errp, -result, "Could not resize file");
+            goto out;
+        }
+
+        buf = g_malloc0(65536);
+
+        result = lseek(fd, current_length, SEEK_SET);
+        if (result < 0) {
+            result = -errno;
+            error_setg_errno(errp, -result,
+                             "Failed to seek to the old end of file");
+            goto out;
+        }
+
+        while (left > 0) {
+            num = MIN(left, 65536);
+            result = write(fd, buf, num);
+            if (result < 0) {
+                result = -errno;
+                error_setg_errno(errp, -result,
+                                 "Could not write zeros for preallocation");
+                goto out;
+            }
+            left -= result;
+        }
+        if (result >= 0) {
+            result = fsync(fd);
+            if (result < 0) {
+                result = -errno;
+                error_setg_errno(errp, -result,
+                                 "Could not flush file to disk");
+                goto out;
+            }
+        }
+        goto out;
+    }
+    case PREALLOC_MODE_OFF:
+        if (ftruncate(fd, offset) != 0) {
+            result = -errno;
+            error_setg_errno(errp, -result, "Could not resize file");
+        }
+        return result;
+    default:
+        result = -ENOTSUP;
+        error_setg(errp, "Unsupported preallocation mode: %s",
+                   PreallocMode_lookup[prealloc]);
+        return result;
+    }
+
+out:
+    if (result < 0) {
+        if (ftruncate(fd, current_length) < 0) {
+            error_report("Failed to restore old file length: %s",
+                         strerror(errno));
+        }
+    }
+
+    g_free(buf);
+    return result;
+}
+
+static int raw_truncate(BlockDriverState *bs, int64_t offset,
+                        PreallocMode prealloc, Error **errp)
 {
    BDRVRawState *s = bs->opaque;
    struct stat st;
@@ -1637,12 +1752,16 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
    }

    if (S_ISREG(st.st_mode)) {
-        if (ftruncate(s->fd, offset) < 0) {
-            ret = -errno;
-            error_setg_errno(errp, -ret, "Failed to resize the file");
-            return ret;
-        }
-    } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
+        return raw_regular_truncate(s->fd, offset, prealloc, errp);
+    }
+
+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Preallocation mode '%s' unsupported for this "
+                   "non-regular file", PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
+    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
        if (offset > raw_getlength(bs)) {
            error_setg(errp, "Cannot grow device files");
            return -EINVAL;
@@ -1885,71 +2004,9 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 #endif
    }

-    switch (prealloc) {
-#ifdef CONFIG_POSIX_FALLOCATE
-    case PREALLOC_MODE_FALLOC:
-        /*
-         * Truncating before posix_fallocate() makes it about twice slower on
-         * file systems that do not support fallocate(), trying to check if a
-         * block is allocated before allocating it, so don't do that here.
-         */
-        result = -posix_fallocate(fd, 0, total_size);
-        if (result != 0) {
-            /* posix_fallocate() doesn't set errno. */
-            error_setg_errno(errp, -result,
-                             "Could not preallocate data for the new file");
-        }
-        break;
-#endif
-    case PREALLOC_MODE_FULL:
-    {
-        /*
-         * Knowing the final size from the beginning could allow the file
-         * system driver to do less allocations and possibly avoid
-         * fragmentation of the file.
-         */
-        if (ftruncate(fd, total_size) != 0) {
-            result = -errno;
-            error_setg_errno(errp, -result, "Could not resize file");
-            goto out_close;
-        }
-
-        int64_t num = 0, left = total_size;
-        buf = g_malloc0(65536);
-
-        while (left > 0) {
-            num = MIN(left, 65536);
-            result = write(fd, buf, num);
-            if (result < 0) {
-                result = -errno;
-                error_setg_errno(errp, -result,
-                                 "Could not write to the new file");
-                break;
-            }
-            left -= result;
-        }
-        if (result >= 0) {
-            result = fsync(fd);
-            if (result < 0) {
-                result = -errno;
-                error_setg_errno(errp, -result,
-                                 "Could not flush new file to disk");
-            }
-        }
-        g_free(buf);
-        break;
-    }
-    case PREALLOC_MODE_OFF:
-        if (ftruncate(fd, total_size) != 0) {
-            result = -errno;
-            error_setg_errno(errp, -result, "Could not resize file");
-        }
-        break;
-    default:
-        result = -EINVAL;
-        error_setg(errp, "Unsupported preallocation mode: %s",
-                   PreallocMode_lookup[prealloc]);
-        break;
+    result = raw_regular_truncate(fd, total_size, prealloc, errp);
+    if (result < 0) {
+        goto out_close;
    }

 out_close:
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -461,12 +461,19 @@ static void raw_close(BlockDriverState *bs)
    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int raw_truncate(BlockDriverState *bs, int64_t offset,
+                        PreallocMode prealloc, Error **errp)
 {
    BDRVRawState *s = bs->opaque;
    LONG low, high;
    DWORD dwPtrLow;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    low = offset;
    high = offset >> 32;

--- a/block/gluster.c
+++ b/block/gluster.c
@@ -345,8 +345,7 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
        is_unix = true;
    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
        gsconf->type = SOCKET_ADDRESS_TYPE_INET;
-        error_report("Warning: rdma feature is not supported, falling "
-                     "back to tcp");
+        warn_report("rdma feature is not supported, falling back to tcp");
    } else {
        ret = -EINVAL;
        goto out;
@@ -1096,11 +1095,17 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
 }

 static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset,
-                                 Error **errp)
+                                 PreallocMode prealloc, Error **errp)
 {
    int ret;
    BDRVGlusterState *s = bs->opaque;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    ret = glfs_ftruncate(s->fd, offset);
    if (ret < 0) {
        ret = -errno;
--- a/block/io.c
+++ b/block/io.c
@@ -149,6 +149,37 @@ bool bdrv_requests_pending(BlockDriverState *bs)
    return false;
 }

+typedef struct {
+    Coroutine *co;
+    BlockDriverState *bs;
+    bool done;
+} BdrvCoDrainData;
+
+static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
+{
+    BdrvCoDrainData *data = opaque;
+    BlockDriverState *bs = data->bs;
+
+    bs->drv->bdrv_co_drain(bs);
+
+    /* Set data->done before reading bs->wakeup.  */
+    atomic_mb_set(&data->done, true);
+    bdrv_wakeup(bs);
+}
+
+static void bdrv_drain_invoke(BlockDriverState *bs)
+{
+    BdrvCoDrainData data = { .bs = bs, .done = false };
+
+    if (!bs->drv || !bs->drv->bdrv_co_drain) {
+        return;
+    }
+
+    data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
+    bdrv_coroutine_enter(bs, data.co);
+    BDRV_POLL_WHILE(bs, !data.done);
+}
+
 static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
    BdrvChild *child, *tmp;
@@ -156,9 +187,8 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)

    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);

-    if (bs->drv && bs->drv->bdrv_drain) {
-        bs->drv->bdrv_drain(bs);
-    }
+    /* Ensure any pending metadata writes are submitted to bs->file.  */
+    bdrv_drain_invoke(bs);

    QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
        BlockDriverState *bs = child->bs;
@@ -184,12 +214,6 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
    return waited;
 }

-typedef struct {
-    Coroutine *co;
-    BlockDriverState *bs;
-    bool done;
-} BdrvCoDrainData;
-
 static void bdrv_co_drain_bh_cb(void *opaque)
 {
    BdrvCoDrainData *data = opaque;
@@ -1315,6 +1339,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
    uint64_t bytes_remaining = bytes;
    int max_transfer;

+    if (bdrv_has_readonly_bitmaps(bs)) {
+        return -EPERM;
+    }
+
    assert(is_power_of_2(align));
    assert((offset & (align - 1)) == 0);
    assert((bytes & (align - 1)) == 0);
@@ -2287,6 +2315,10 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
        return -ENOMEDIUM;
    }

+    if (bdrv_has_readonly_bitmaps(bs)) {
+        return -EPERM;
+    }
+
    ret = bdrv_check_byte_request(bs, offset, bytes);
    if (ret < 0) {
        return ret;
@@ -2327,7 +2359,6 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
    assert(max_pdiscard >= bs->bl.request_alignment);

    while (bytes > 0) {
-        int ret;
        int num = bytes;

        if (head) {
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1761,9 +1761,9 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     * filename encoded options */
    filename = qdict_get_try_str(options, "filename");
    if (filename) {
-        error_report("Warning: 'filename' option specified. "
-                      "This is an unsupported option, and may be deprecated "
-                      "in the future");
+        warn_report("'filename' option specified. "
+                    "This is an unsupported option, and may be deprecated "
+                    "in the future");
        iscsi_parse_filename(filename, options, &local_err);
        if (local_err) {
            ret = -EINVAL;
@@ -2079,11 +2079,18 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
    }
 }

-static int iscsi_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int iscsi_truncate(BlockDriverState *bs, int64_t offset,
+                          PreallocMode prealloc, Error **errp)
 {
    IscsiLun *iscsilun = bs->opaque;
    Error *local_err = NULL;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    if (iscsilun->type != TYPE_DISK) {
        error_setg(errp, "Cannot resize non-disk iSCSI devices");
        return -ENOTSUP;
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -739,7 +739,8 @@ static void coroutine_fn mirror_run(void *opaque)
        }

        if (s->bdev_length > base_length) {
-            ret = blk_truncate(s->target, s->bdev_length, NULL);
+            ret = blk_truncate(s->target, s->bdev_length, PREALLOC_MODE_OFF,
+                               NULL);
            if (ret < 0) {
                goto immediate_exit;
            }
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -242,7 +242,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
    ssize_t ret;

    if (flags & BDRV_REQ_FUA) {
-        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
+        assert(client->info.flags & NBD_FLAG_SEND_FUA);
        request.flags |= NBD_CMD_FLAG_FUA;
    }

@@ -270,12 +270,12 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
    };
    NBDReply reply;

-    if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
+    if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
        return -ENOTSUP;
    }

    if (flags & BDRV_REQ_FUA) {
-        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
+        assert(client->info.flags & NBD_FLAG_SEND_FUA);
        request.flags |= NBD_CMD_FLAG_FUA;
    }
    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
@@ -299,7 +299,7 @@ int nbd_client_co_flush(BlockDriverState *bs)
    NBDReply reply;
    ssize_t ret;

-    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
+    if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
        return 0;
    }

@@ -327,7 +327,7 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
    NBDReply reply;
    ssize_t ret;

-    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
+    if (!(client->info.flags & NBD_FLAG_SEND_TRIM)) {
        return 0;
    }

@@ -384,22 +384,24 @@ int nbd_client_init(BlockDriverState *bs,
    logout("session init %s\n", export);
    qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);

+    client->info.request_sizes = true;
    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
-                                &client->nbdflags,
                                tlscreds, hostname,
-                                &client->ioc,
-                                &client->size, errp);
+                                &client->ioc, &client->info, errp);
    if (ret < 0) {
        logout("Failed to negotiate with the NBD server\n");
        return ret;
    }
-    if (client->nbdflags & NBD_FLAG_SEND_FUA) {
+    if (client->info.flags & NBD_FLAG_SEND_FUA) {
        bs->supported_write_flags = BDRV_REQ_FUA;
        bs->supported_zero_flags |= BDRV_REQ_FUA;
    }
-    if (client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES) {
+    if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
    }
+    if (client->info.min_block > bs->bl.request_alignment) {
+        bs->bl.request_alignment = client->info.min_block;
+    }

    qemu_co_mutex_init(&client->send_mutex);
    qemu_co_queue_init(&client->free_sema);
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -20,8 +20,7 @@
 typedef struct NBDClientSession {
    QIOChannelSocket *sioc; /* The master data channel */
    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
-    uint16_t nbdflags;
-    off_t size;
+    NBDExportInfo info;

    CoMutex send_mutex;
    CoQueue free_sema;
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -472,9 +472,17 @@ static int nbd_co_flush(BlockDriverState *bs)

 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-    bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
-    bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
-    bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
+    NBDClientSession *s = nbd_get_client_session(bs);
+    uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, s->info.max_block);
+
+    bs->bl.max_pdiscard = max;
+    bs->bl.max_pwrite_zeroes = max;
+    bs->bl.max_transfer = max;
+
+    if (s->info.opt_block &&
+        s->info.opt_block > bs->bl.opt_transfer) {
+        bs->bl.opt_transfer = s->info.opt_block;
+    }
 }

 static void nbd_close(BlockDriverState *bs)
@@ -492,7 +500,7 @@ static int64_t nbd_getlength(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;

-    return s->client.size;
+    return s->client.info.size;
 }

 static void nbd_detach_aio_context(BlockDriverState *bs)
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -558,8 +558,8 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
        }
        client->readahead = qemu_opt_get_number(opts, "readahead-size", 0);
        if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
-            error_report("NFS Warning: Truncating NFS readahead "
-                         "size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
+            warn_report("Truncating NFS readahead size to %d",
+                        QEMU_NFS_MAX_READAHEAD_SIZE);
            client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE;
        }
        nfs_set_readahead(client->context, client->readahead);
@@ -579,8 +579,8 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
        }
        client->pagecache = qemu_opt_get_number(opts, "page-cache-size", 0);
        if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
-            error_report("NFS Warning: Truncating NFS pagecache "
-                         "size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
+            warn_report("Truncating NFS pagecache size to %d pages",
+                        QEMU_NFS_MAX_PAGECACHE_SIZE);
            client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE;
        }
        nfs_set_pagecache(client->context, client->pagecache);
@@ -595,8 +595,8 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
        /* limit the maximum debug level to avoid potential flooding
         * of our log files. */
        if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) {
-            error_report("NFS Warning: Limiting NFS debug level "
-                         "to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
+            warn_report("Limiting NFS debug level to %d",
+                        QEMU_NFS_MAX_DEBUG_LEVEL);
            client->debug = QEMU_NFS_MAX_DEBUG_LEVEL;
        }
        nfs_set_debug(client->context, client->debug);
@@ -759,11 +759,18 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
    return (task.ret < 0 ? task.ret : st.st_blocks * 512);
 }

-static int nfs_file_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int nfs_file_truncate(BlockDriverState *bs, int64_t offset,
+                             PreallocMode prealloc, Error **errp)
 {
    NFSClient *client = bs->opaque;
    int ret;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    ret = nfs_ftruncate(client->context, client->fh, offset);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Failed to truncate file");
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -224,7 +224,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
        } else {
            ret = bdrv_truncate(bs->file,
                                (s->data_end + space) << BDRV_SECTOR_BITS,
-                                NULL);
+                                PREALLOC_MODE_OFF, NULL);
        }
        if (ret < 0) {
            return ret;
@@ -458,7 +458,8 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
        res->leaks += count;
        if (fix & BDRV_FIX_LEAKS) {
            Error *local_err = NULL;
-            ret = bdrv_truncate(bs->file, res->image_end_offset, &local_err);
+            ret = bdrv_truncate(bs->file, res->image_end_offset,
+                                PREALLOC_MODE_OFF, &local_err);
            if (ret < 0) {
                error_report_err(local_err);
                res->check_errors++;
@@ -507,7 +508,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(file, true);

-    ret = blk_truncate(file, 0, errp);
+    ret = blk_truncate(file, 0, PREALLOC_MODE_OFF, errp);
    if (ret < 0) {
        goto exit;
    }
@@ -699,7 +700,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (!(flags & BDRV_O_RESIZE) || !bdrv_has_zero_init(bs->file->bs) ||
-            bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs), NULL) != 0) {
+            bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs),
+                          PREALLOC_MODE_OFF, NULL) != 0) {
        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
    }

@@ -742,7 +744,8 @@ static void parallels_close(BlockDriverState *bs)
    }

    if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, NULL);
+        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS,
+                      PREALLOC_MODE_OFF, NULL);
    }

    g_free(s->bat_dirty_bmap);
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -45,7 +45,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
    info->ro                     = bs->read_only;
    info->drv                    = g_strdup(bs->drv->format_name);
    info->encrypted              = bs->encrypted;
-    info->encryption_key_missing = bdrv_key_required(bs);
+    info->encryption_key_missing = false;

    info->cache = g_new(BlockdevCacheInfo, 1);
    *info->cache = (BlockdevCacheInfo) {
@@ -322,11 +322,21 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
 {
    BlockInfo *info = g_malloc0(sizeof(*info));
    BlockDriverState *bs = blk_bs(blk);
+    char *qdev;
+
    info->device = g_strdup(blk_name(blk));
    info->type = g_strdup("unknown");
    info->locked = blk_dev_is_medium_locked(blk);
    info->removable = blk_dev_has_removable_media(blk);

+    qdev = blk_get_attached_dev_id(blk);
+    if (qdev && *qdev) {
+        info->has_qdev = true;
+        info->qdev = qdev;
+    } else {
+        g_free(qdev);
+    }
+
    if (blk_dev_has_tray(blk)) {
        info->has_tray_open = true;
        info->tray_open = blk_dev_is_tray_open(blk);
@@ -462,8 +472,14 @@ BlockInfoList *qmp_query_block(Error **errp)
    BlockBackend *blk;
    Error *local_err = NULL;

-    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
-        BlockInfoList *info = g_malloc0(sizeof(*info));
+    for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
+        BlockInfoList *info;
+
+        if (!*blk_name(blk) && !blk_get_attached_dev(blk)) {
+            continue;
+        }
+
+        info = g_malloc0(sizeof(*info));
        bdrv_query_info(blk, &info->value, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -31,8 +31,10 @@
 #include "qemu/bswap.h"
 #include <zlib.h>
 #include "qapi/qmp/qerror.h"
-#include "crypto/cipher.h"
+#include "qapi/qmp/qstring.h"
+#include "crypto/block.h"
 #include "migration/blocker.h"
+#include "block/crypto.h"

 /**************************************************************/
 /* QEMU COW block driver with compression and encryption support */
@@ -77,7 +79,7 @@ typedef struct BDRVQcowState {
    uint8_t *cluster_cache;
    uint8_t *cluster_data;
    uint64_t cluster_cache_offset;
-    QCryptoCipher *cipher; /* NULL if no key yet */
+    QCryptoBlock *crypto; /* Disk encryption format driver */
    uint32_t crypt_method_header;
    CoMutex lock;
    Error *migration_blocker;
@@ -97,6 +99,15 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
        return 0;
 }

+static QemuOptsList qcow_runtime_opts = {
+    .name = "qcow",
+    .head = QTAILQ_HEAD_INITIALIZER(qcow_runtime_opts.head),
+    .desc = {
+        BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET("encrypt."),
+        { /* end of list */ }
+    },
+};
+
 static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
@@ -105,11 +116,19 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;
    QCowHeader header;
    Error *local_err = NULL;
+    QCryptoBlockOpenOptions *crypto_opts = NULL;
+    unsigned int cflags = 0;
+    QDict *encryptopts = NULL;
+    const char *encryptfmt;
+
+    qdict_extract_subqdict(options, &encryptopts, "encrypt.");
+    encryptfmt = qdict_get_try_str(encryptopts, "format");

    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
                               false, errp);
    if (!bs->file) {
-        return -EINVAL;
+        ret = -EINVAL;
+        goto fail;
    }

    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
@@ -155,17 +174,6 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    if (header.crypt_method > QCOW_CRYPT_AES) {
-        error_setg(errp, "invalid encryption method in qcow header");
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128,
-                                 QCRYPTO_CIPHER_MODE_CBC)) {
-        error_setg(errp, "AES cipher not available");
-        ret = -EINVAL;
-        goto fail;
-    }
    s->crypt_method_header = header.crypt_method;
    if (s->crypt_method_header) {
        if (bdrv_uses_whitelist() &&
@@ -181,8 +189,44 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
            ret = -ENOSYS;
            goto fail;
        }
+        if (s->crypt_method_header == QCOW_CRYPT_AES) {
+            if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
+                error_setg(errp,
+                           "Header reported 'aes' encryption format but "
+                           "options specify '%s'", encryptfmt);
+                ret = -EINVAL;
+                goto fail;
+            }
+            qdict_del(encryptopts, "format");
+            crypto_opts = block_crypto_open_opts_init(
+                Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp);
+            if (!crypto_opts) {
+                ret = -EINVAL;
+                goto fail;
+            }

+            if (flags & BDRV_O_NO_IO) {
+                cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
+            }
+            s->crypto = qcrypto_block_open(crypto_opts, "encrypt.",
+                                           NULL, NULL, cflags, errp);
+            if (!s->crypto) {
+                ret = -EINVAL;
+                goto fail;
+            }
+        } else {
+            error_setg(errp, "invalid encryption method in qcow header");
+            ret = -EINVAL;
+            goto fail;
+        }
        bs->encrypted = true;
+    } else {
+        if (encryptfmt) {
+            error_setg(errp, "No encryption in image header, but options "
+                       "specified format '%s'", encryptfmt);
+            ret = -EINVAL;
+            goto fail;
+        }
    }
    s->cluster_bits = header.cluster_bits;
    s->cluster_size = 1 << s->cluster_bits;
@@ -266,6 +310,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

+    QDECREF(encryptopts);
+    qapi_free_QCryptoBlockOpenOptions(crypto_opts);
    qemu_co_mutex_init(&s->lock);
    return 0;

@@ -274,6 +320,9 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    qemu_vfree(s->l2_cache);
    g_free(s->cluster_cache);
    g_free(s->cluster_data);
+    qcrypto_block_free(s->crypto);
+    QDECREF(encryptopts);
+    qapi_free_QCryptoBlockOpenOptions(crypto_opts);
    return ret;
 }

@@ -286,85 +335,6 @@ static int qcow_reopen_prepare(BDRVReopenState *state,
    return 0;
 }

-static int qcow_set_key(BlockDriverState *bs, const char *key)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint8_t keybuf[16];
-    int len, i;
-    Error *err;
-
-    memset(keybuf, 0, 16);
-    len = strlen(key);
-    if (len > 16)
-        len = 16;
-    /* XXX: we could compress the chars to 7 bits to increase
-       entropy */
-    for(i = 0;i < len;i++) {
-        keybuf[i] = key[i];
-    }
-    assert(bs->encrypted);
-
-    qcrypto_cipher_free(s->cipher);
-    s->cipher = qcrypto_cipher_new(
-        QCRYPTO_CIPHER_ALG_AES_128,
-        QCRYPTO_CIPHER_MODE_CBC,
-        keybuf, G_N_ELEMENTS(keybuf),
-        &err);
-
-    if (!s->cipher) {
-        /* XXX would be nice if errors in this method could
-         * be properly propagate to the caller. Would need
-         * the bdrv_set_key() API signature to be fixed. */
-        error_free(err);
-        return -1;
-    }
-    return 0;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
-   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
-   supported */
-static int encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
-                           uint8_t *out_buf, const uint8_t *in_buf,
-                           int nb_sectors, bool enc, Error **errp)
-{
-    union {
-        uint64_t ll[2];
-        uint8_t b[16];
-    } ivec;
-    int i;
-    int ret;
-
-    for(i = 0; i < nb_sectors; i++) {
-        ivec.ll[0] = cpu_to_le64(sector_num);
-        ivec.ll[1] = 0;
-        if (qcrypto_cipher_setiv(s->cipher,
-                                 ivec.b, G_N_ELEMENTS(ivec.b),
-                                 errp) < 0) {
-            return -1;
-        }
-        if (enc) {
-            ret = qcrypto_cipher_encrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        } else {
-            ret = qcrypto_cipher_decrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        }
-        if (ret < 0) {
-            return -1;
-        }
-        sector_num++;
-        in_buf += 512;
-        out_buf += 512;
-    }
-    return 0;
-}

 /* 'allocate' is:
 *
@@ -473,22 +443,23 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                /* round to cluster size */
                cluster_offset = (cluster_offset + s->cluster_size - 1) &
                    ~(s->cluster_size - 1);
-                bdrv_truncate(bs->file, cluster_offset + s->cluster_size, NULL);
+                bdrv_truncate(bs->file, cluster_offset + s->cluster_size,
+                              PREALLOC_MODE_OFF, NULL);
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
                if (bs->encrypted &&
                    (n_end - n_start) < s->cluster_sectors) {
                    uint64_t start_sect;
-                    assert(s->cipher);
+                    assert(s->crypto);
                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
-                    memset(s->cluster_data + 512, 0x00, 512);
                    for(i = 0; i < s->cluster_sectors; i++) {
                        if (i < n_start || i >= n_end) {
                            Error *err = NULL;
-                            if (encrypt_sectors(s, start_sect + i,
-                                                s->cluster_data,
-                                                s->cluster_data + 512, 1,
-                                                true, &err) < 0) {
+                            memset(s->cluster_data, 0x00, 512);
+                            if (qcrypto_block_encrypt(s->crypto, start_sect + i,
+                                                      s->cluster_data,
+                                                      BDRV_SECTOR_SIZE,
+                                                      &err) < 0) {
                                error_free(err);
                                errno = EIO;
                                return -1;
@@ -533,7 +504,7 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
    if (!cluster_offset) {
        return 0;
    }
-    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->cipher) {
+    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) {
        return BDRV_BLOCK_DATA;
    }
    cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
@@ -664,9 +635,9 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
                break;
            }
            if (bs->encrypted) {
-                assert(s->cipher);
-                if (encrypt_sectors(s, sector_num, buf, buf,
-                                    n, false, &err) < 0) {
+                assert(s->crypto);
+                if (qcrypto_block_decrypt(s->crypto, sector_num, buf,
+                                          n * BDRV_SECTOR_SIZE, &err) < 0) {
                    goto fail;
                }
            }
@@ -700,9 +671,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster;
    uint64_t cluster_offset;
-    const uint8_t *src_buf;
    int ret = 0, n;
-    uint8_t *cluster_data = NULL;
    struct iovec hd_iov;
    QEMUIOVector hd_qiov;
    uint8_t *buf;
@@ -710,7 +679,9 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,

    s->cluster_cache_offset = -1; /* disable compressed cache */

-    if (qiov->niov > 1) {
+    /* We must always copy the iov when encrypting, so we
+     * don't modify the original data buffer during encryption */
+    if (bs->encrypted || qiov->niov > 1) {
        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
        if (buf == NULL) {
            return -ENOMEM;
@@ -739,22 +710,16 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
        }
        if (bs->encrypted) {
            Error *err = NULL;
-            assert(s->cipher);
-            if (!cluster_data) {
-                cluster_data = g_malloc0(s->cluster_size);
-            }
-            if (encrypt_sectors(s, sector_num, cluster_data, buf,
-                                n, true, &err) < 0) {
+            assert(s->crypto);
+            if (qcrypto_block_encrypt(s->crypto, sector_num, buf,
+                                      n * BDRV_SECTOR_SIZE, &err) < 0) {
                error_free(err);
                ret = -EIO;
                break;
            }
-            src_buf = cluster_data;
-        } else {
-            src_buf = buf;
        }

-        hd_iov.iov_base = (void *)src_buf;
+        hd_iov.iov_base = (void *)buf;
        hd_iov.iov_len = n * 512;
        qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
        qemu_co_mutex_unlock(&s->lock);
@@ -773,10 +738,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
    }
    qemu_co_mutex_unlock(&s->lock);

-    if (qiov->niov > 1) {
-        qemu_vfree(orig_buf);
-    }
-    g_free(cluster_data);
+    qemu_vfree(orig_buf);

    return ret;
 }
@@ -785,8 +747,8 @@ static void qcow_close(BlockDriverState *bs)
 {
    BDRVQcowState *s = bs->opaque;

-    qcrypto_cipher_free(s->cipher);
-    s->cipher = NULL;
+    qcrypto_block_free(s->crypto);
+    s->crypto = NULL;
    g_free(s->l1_table);
    qemu_vfree(s->l2_cache);
    g_free(s->cluster_cache);
@@ -803,17 +765,35 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    uint8_t *tmp;
    int64_t total_size = 0;
    char *backing_file = NULL;
-    int flags = 0;
    Error *local_err = NULL;
    int ret;
    BlockBackend *qcow_blk;
+    const char *encryptfmt = NULL;
+    QDict *options;
+    QDict *encryptopts = NULL;
+    QCryptoBlockCreateOptions *crypto_opts = NULL;
+    QCryptoBlock *crypto = NULL;

    /* Read out options */
    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                          BDRV_SECTOR_SIZE);
+    if (total_size == 0) {
+        error_setg(errp, "Image size is too small, cannot be zero length");
+        ret = -EINVAL;
+        goto cleanup;
+    }
+
    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
-        flags |= BLOCK_FLAG_ENCRYPT;
+    encryptfmt = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
+    if (encryptfmt) {
+        if (qemu_opt_get(opts, BLOCK_OPT_ENCRYPT)) {
+            error_setg(errp, "Options " BLOCK_OPT_ENCRYPT " and "
+                       BLOCK_OPT_ENCRYPT_FORMAT " are mutually exclusive");
+            ret = -EINVAL;
+            goto cleanup;
+        }
+    } else if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
+        encryptfmt = "aes";
    }

    ret = bdrv_create_file(filename, opts, &local_err);
@@ -833,7 +813,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(qcow_blk, true);

-    ret = blk_truncate(qcow_blk, 0, errp);
+    ret = blk_truncate(qcow_blk, 0, PREALLOC_MODE_OFF, errp);
    if (ret < 0) {
        goto exit;
    }
@@ -867,8 +847,32 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    l1_size = (total_size + (1LL << shift) - 1) >> shift;

    header.l1_table_offset = cpu_to_be64(header_size);
-    if (flags & BLOCK_FLAG_ENCRYPT) {
+
+    options = qemu_opts_to_qdict(opts, NULL);
+    qdict_extract_subqdict(options, &encryptopts, "encrypt.");
+    QDECREF(options);
+    if (encryptfmt) {
+        if (!g_str_equal(encryptfmt, "aes")) {
+            error_setg(errp, "Unknown encryption format '%s', expected 'aes'",
+                       encryptfmt);
+            ret = -EINVAL;
+            goto exit;
+        }
        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+
+        crypto_opts = block_crypto_create_opts_init(
+            Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp);
+        if (!crypto_opts) {
+            ret = -EINVAL;
+            goto exit;
+        }
+
+        crypto = qcrypto_block_create(crypto_opts, "encrypt.",
+                                      NULL, NULL, NULL, errp);
+        if (!crypto) {
+            ret = -EINVAL;
+            goto exit;
+        }
    } else {
        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
    }
@@ -903,6 +907,9 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
 exit:
    blk_unref(qcow_blk);
 cleanup:
+    QDECREF(encryptopts);
+    qcrypto_block_free(crypto);
+    qapi_free_QCryptoBlockCreateOptions(crypto_opts);
    g_free(backing_file);
    return ret;
 }
@@ -917,7 +924,8 @@ static int qcow_make_empty(BlockDriverState *bs)
    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
            l1_length) < 0)
        return -1;
-    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, NULL);
+    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length,
+                        PREALLOC_MODE_OFF, NULL);
    if (ret < 0)
        return ret;

@@ -1041,9 +1049,15 @@ static QemuOptsList qcow_create_opts = {
        {
            .name = BLOCK_OPT_ENCRYPT,
            .type = QEMU_OPT_BOOL,
-            .help = "Encrypt the image",
-            .def_value_str = "off"
+            .help = "Encrypt the image with format 'aes'. (Deprecated "
+                    "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",
        },
+        {
+            .name = BLOCK_OPT_ENCRYPT_FORMAT,
+            .type = QEMU_OPT_STRING,
+            .help = "Encrypt the image, format choices: 'aes'",
+        },
+        BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET("encrypt."),
        { /* end of list */ }
    }
 };
@@ -1064,7 +1078,6 @@ static BlockDriver bdrv_qcow = {
    .bdrv_co_writev         = qcow_co_writev,
    .bdrv_co_get_block_status   = qcow_co_get_block_status,

-    .bdrv_set_key           = qcow_set_key,
    .bdrv_make_empty        = qcow_make_empty,
    .bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed,
    .bdrv_get_info          = qcow_get_info,
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -357,52 +357,6 @@ static int count_contiguous_clusters_unallocated(int nb_clusters,
    return i;
 }

-/* The crypt function is compatible with the linux cryptoloop
-   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
-   supported */
-int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
-                          uint8_t *out_buf, const uint8_t *in_buf,
-                          int nb_sectors, bool enc,
-                          Error **errp)
-{
-    union {
-        uint64_t ll[2];
-        uint8_t b[16];
-    } ivec;
-    int i;
-    int ret;
-
-    for(i = 0; i < nb_sectors; i++) {
-        ivec.ll[0] = cpu_to_le64(sector_num);
-        ivec.ll[1] = 0;
-        if (qcrypto_cipher_setiv(s->cipher,
-                                 ivec.b, G_N_ELEMENTS(ivec.b),
-                                 errp) < 0) {
-            return -1;
-        }
-        if (enc) {
-            ret = qcrypto_cipher_encrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        } else {
-            ret = qcrypto_cipher_decrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        }
-        if (ret < 0) {
-            return -1;
-        }
-        sector_num++;
-        in_buf += 512;
-        out_buf += 512;
-    }
-    return 0;
-}
-
 static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
                                            uint64_t src_cluster_offset,
                                            unsigned offset_in_cluster,
@@ -435,19 +389,22 @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,

 static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
                                                uint64_t src_cluster_offset,
+                                                uint64_t cluster_offset,
                                                unsigned offset_in_cluster,
                                                uint8_t *buffer,
                                                unsigned bytes)
 {
    if (bytes && bs->encrypted) {
        BDRVQcow2State *s = bs->opaque;
-        int64_t sector = (src_cluster_offset + offset_in_cluster)
+        int64_t sector = (s->crypt_physical_offset ?
+                          (cluster_offset + offset_in_cluster) :
+                          (src_cluster_offset + offset_in_cluster))
                         >> BDRV_SECTOR_BITS;
-        assert(s->cipher);
        assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
        assert((bytes & ~BDRV_SECTOR_MASK) == 0);
-        if (qcow2_encrypt_sectors(s, sector, buffer, buffer,
-                                  bytes >> BDRV_SECTOR_BITS, true, NULL) < 0) {
+        assert(s->crypto);
+        if (qcrypto_block_encrypt(s->crypto, sector, buffer,
+                                  bytes, NULL) < 0) {
            return false;
        }
    }
@@ -834,10 +791,11 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)

    /* Encrypt the data if necessary before writing it */
    if (bs->encrypted) {
-        if (!do_perform_cow_encrypt(bs, m->offset, start->offset,
-                                    start_buffer, start->nb_bytes) ||
-            !do_perform_cow_encrypt(bs, m->offset, end->offset,
-                                    end_buffer, end->nb_bytes)) {
+        if (!do_perform_cow_encrypt(bs, m->offset, m->alloc_offset,
+                                    start->offset, start_buffer,
+                                    start->nb_bytes) ||
+            !do_perform_cow_encrypt(bs, m->offset, m->alloc_offset,
+                                    end->offset, end_buffer, end->nb_bytes)) {
            ret = -EIO;
            goto fail;
        }
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -281,25 +281,6 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
    return 0;
 }

-/*
- * Rounds the refcount table size up to avoid growing the table for each single
- * refcount block that is allocated.
- */
-static unsigned int next_refcount_table_size(BDRVQcow2State *s,
-    unsigned int min_size)
-{
-    unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
-    unsigned int refcount_table_clusters =
-        MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
-
-    while (min_clusters > refcount_table_clusters) {
-        refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
-    }
-
-    return refcount_table_clusters << (s->cluster_bits - 3);
-}
-
-
 /* Checks if two offsets are described by the same refcount block */
 static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
    uint64_t offset_b)
@@ -321,7 +302,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
 {
    BDRVQcow2State *s = bs->opaque;
    unsigned int refcount_table_index;
-    int ret;
+    int64_t ret;

    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);

@@ -396,7 +377,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
                                    refcount_block);
        if (ret < 0) {
-            goto fail_block;
+            goto fail;
        }

        memset(*refcount_block, 0, s->cluster_size);
@@ -411,12 +392,12 @@ static int alloc_refcount_block(BlockDriverState *bs,
        ret = update_refcount(bs, new_block, s->cluster_size, 1, false,
                              QCOW2_DISCARD_NEVER);
        if (ret < 0) {
-            goto fail_block;
+            goto fail;
        }

        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
        if (ret < 0) {
-            goto fail_block;
+            goto fail;
        }

        /* Initialize the new refcount block only after updating its refcount,
@@ -424,7 +405,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
                                    refcount_block);
        if (ret < 0) {
-            goto fail_block;
+            goto fail;
        }

        memset(*refcount_block, 0, s->cluster_size);
@@ -435,7 +416,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
    qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block);
    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
    if (ret < 0) {
-        goto fail_block;
+        goto fail;
    }

    /* If the refcount table is big enough, just hook the block up there */
@@ -446,7 +427,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
            s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
            &data64, sizeof(data64));
        if (ret < 0) {
-            goto fail_block;
+            goto fail;
        }

        s->refcount_table[refcount_table_index] = new_block;
@@ -490,74 +471,201 @@ static int alloc_refcount_block(BlockDriverState *bs,
                                            (new_block >> s->cluster_bits) + 1),
                                        s->refcount_block_size);

-    if (blocks_used > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) {
-        return -EFBIG;
-    }
-
-    /* And now we need at least one block more for the new metadata */
-    uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
-    uint64_t last_table_size;
-    uint64_t blocks_clusters;
-    do {
-        uint64_t table_clusters =
-            size_to_clusters(s, table_size * sizeof(uint64_t));
-        blocks_clusters = 1 +
-            DIV_ROUND_UP(table_clusters, s->refcount_block_size);
-        uint64_t meta_clusters = table_clusters + blocks_clusters;
-
-        last_table_size = table_size;
-        table_size = next_refcount_table_size(s, blocks_used +
-            DIV_ROUND_UP(meta_clusters, s->refcount_block_size));
-
-    } while (last_table_size != table_size);
-
-#ifdef DEBUG_ALLOC2
-    fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
-        s->refcount_table_size, table_size);
-#endif
-
    /* Create the new refcount table and blocks */
    uint64_t meta_offset = (blocks_used * s->refcount_block_size) *
        s->cluster_size;
-    uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
-    uint64_t *new_table = g_try_new0(uint64_t, table_size);
-    void *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size);

-    assert(table_size > 0 && blocks_clusters > 0);
-    if (new_table == NULL || new_blocks == NULL) {
+    ret = qcow2_refcount_area(bs, meta_offset, 0, false,
+                              refcount_table_index, new_block);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = load_refcount_block(bs, new_block, refcount_block);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* If we were trying to do the initial refcount update for some cluster
+     * allocation, we might have used the same clusters to store newly
+     * allocated metadata. Make the caller search some new space. */
+    return -EAGAIN;
+
+fail:
+    if (*refcount_block != NULL) {
+        qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
+    }
+    return ret;
+}
+
+/*
+ * Starting at @start_offset, this function creates new self-covering refcount
+ * structures: A new refcount table and refcount blocks which cover all of
+ * themselves, and a number of @additional_clusters beyond their end.
+ * @start_offset must be at the end of the image file, that is, there must be
+ * only empty space beyond it.
+ * If @exact_size is false, the refcount table will have 50 % more entries than
+ * necessary so it will not need to grow again soon.
+ * If @new_refblock_offset is not zero, it contains the offset of a refcount
+ * block that should be entered into the new refcount table at index
+ * @new_refblock_index.
+ *
+ * Returns: The offset after the new refcount structures (i.e. where the
+ *          @additional_clusters may be placed) on success, -errno on error.
+ */
+int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
+                            uint64_t additional_clusters, bool exact_size,
+                            int new_refblock_index,
+                            uint64_t new_refblock_offset)
+{
+    BDRVQcow2State *s = bs->opaque;
+    uint64_t total_refblock_count_u64, additional_refblock_count;
+    int total_refblock_count, table_size, area_reftable_index, table_clusters;
+    int i;
+    uint64_t table_offset, block_offset, end_offset;
+    int ret;
+    uint64_t *new_table;
+
+    assert(!(start_offset % s->cluster_size));
+
+    qcow2_refcount_metadata_size(start_offset / s->cluster_size +
+                                 additional_clusters,
+                                 s->cluster_size, s->refcount_order,
+                                 !exact_size, &total_refblock_count_u64);
+    if (total_refblock_count_u64 > QCOW_MAX_REFTABLE_SIZE) {
+        return -EFBIG;
+    }
+    total_refblock_count = total_refblock_count_u64;
+
+    /* Index in the refcount table of the first refcount block to cover the area
+     * of refcount structures we are about to create; we know that
+     * @total_refblock_count can cover @start_offset, so this will definitely
+     * fit into an int. */
+    area_reftable_index = (start_offset / s->cluster_size) /
+                          s->refcount_block_size;
+
+    if (exact_size) {
+        table_size = total_refblock_count;
+    } else {
+        table_size = total_refblock_count +
+                     DIV_ROUND_UP(total_refblock_count, 2);
+    }
+    /* The qcow2 file can only store the reftable size in number of clusters */
+    table_size = ROUND_UP(table_size, s->cluster_size / sizeof(uint64_t));
+    table_clusters = (table_size * sizeof(uint64_t)) / s->cluster_size;
+
+    if (table_size > QCOW_MAX_REFTABLE_SIZE) {
+        return -EFBIG;
+    }
+
+    new_table = g_try_new0(uint64_t, table_size);
+
+    assert(table_size > 0);
+    if (new_table == NULL) {
        ret = -ENOMEM;
-        goto fail_table;
+        goto fail;
    }

    /* Fill the new refcount table */
-    memcpy(new_table, s->refcount_table,
-        s->refcount_table_size * sizeof(uint64_t));
-    new_table[refcount_table_index] = new_block;
-
-    int i;
-    for (i = 0; i < blocks_clusters; i++) {
-        new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
+    if (table_size > s->max_refcount_table_index) {
+        /* We're actually growing the reftable */
+        memcpy(new_table, s->refcount_table,
+               (s->max_refcount_table_index + 1) * sizeof(uint64_t));
+    } else {
+        /* Improbable case: We're shrinking the reftable. However, the caller
+         * has assured us that there is only empty space beyond @start_offset,
+         * so we can simply drop all of the refblocks that won't fit into the
+         * new reftable. */
+        memcpy(new_table, s->refcount_table, table_size * sizeof(uint64_t));
    }

-    /* Fill the refcount blocks */
-    uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
-    int block = 0;
-    for (i = 0; i < table_clusters + blocks_clusters; i++) {
-        s->set_refcount(new_blocks, block++, 1);
+    if (new_refblock_offset) {
+        assert(new_refblock_index < total_refblock_count);
+        new_table[new_refblock_index] = new_refblock_offset;
    }

+    /* Count how many new refblocks we have to create */
+    additional_refblock_count = 0;
+    for (i = area_reftable_index; i < total_refblock_count; i++) {
+        if (!new_table[i]) {
+            additional_refblock_count++;
+        }
+    }
+
+    table_offset = start_offset + additional_refblock_count * s->cluster_size;
+    end_offset = table_offset + table_clusters * s->cluster_size;
+
+    /* Fill the refcount blocks, and create new ones, if necessary */
+    block_offset = start_offset;
+    for (i = area_reftable_index; i < total_refblock_count; i++) {
+        void *refblock_data;
+        uint64_t first_offset_covered;
+
+        /* Reuse an existing refblock if possible, create a new one otherwise */
+        if (new_table[i]) {
+            ret = qcow2_cache_get(bs, s->refcount_block_cache, new_table[i],
+                                  &refblock_data);
+            if (ret < 0) {
+                goto fail;
+            }
+        } else {
+            ret = qcow2_cache_get_empty(bs, s->refcount_block_cache,
+                                        block_offset, &refblock_data);
+            if (ret < 0) {
+                goto fail;
+            }
+            memset(refblock_data, 0, s->cluster_size);
+            qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
+                                         refblock_data);
+
+            new_table[i] = block_offset;
+            block_offset += s->cluster_size;
+        }
+
+        /* First host offset covered by this refblock */
+        first_offset_covered = (uint64_t)i * s->refcount_block_size *
+                               s->cluster_size;
+        if (first_offset_covered < end_offset) {
+            int j, end_index;
+
+            /* Set the refcount of all of the new refcount structures to 1 */
+
+            if (first_offset_covered < start_offset) {
+                assert(i == area_reftable_index);
+                j = (start_offset - first_offset_covered) / s->cluster_size;
+                assert(j < s->refcount_block_size);
+            } else {
+                j = 0;
+            }
+
+            end_index = MIN((end_offset - first_offset_covered) /
+                            s->cluster_size,
+                            s->refcount_block_size);
+
+            for (; j < end_index; j++) {
+                /* The caller guaranteed us this space would be empty */
+                assert(s->get_refcount(refblock_data, j) == 0);
+                s->set_refcount(refblock_data, j, 1);
+            }
+
+            qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
+                                         refblock_data);
+        }
+
+        qcow2_cache_put(bs, s->refcount_block_cache, &refblock_data);
+    }
+
+    assert(block_offset == table_offset);
+
    /* Write refcount blocks to disk */
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
-    ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
-        blocks_clusters * s->cluster_size);
-    g_free(new_blocks);
-    new_blocks = NULL;
+    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
    if (ret < 0) {
-        goto fail_table;
+        goto fail;
    }

    /* Write refcount table to disk */
-    for(i = 0; i < table_size; i++) {
+    for (i = 0; i < total_refblock_count; i++) {
        cpu_to_be64s(&new_table[i]);
    }

@@ -565,10 +673,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
    ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
        table_size * sizeof(uint64_t));
    if (ret < 0) {
-        goto fail_table;
+        goto fail;
    }

-    for(i = 0; i < table_size; i++) {
+    for (i = 0; i < total_refblock_count; i++) {
        be64_to_cpus(&new_table[i]);
    }

@@ -584,7 +692,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
                           offsetof(QCowHeader, refcount_table_offset),
                           &data, sizeof(data));
    if (ret < 0) {
-        goto fail_table;
+        goto fail;
    }

    /* And switch it in memory */
@@ -601,23 +709,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
                        QCOW2_DISCARD_OTHER);

-    ret = load_refcount_block(bs, new_block, refcount_block);
-    if (ret < 0) {
-        return ret;
-    }
+    return end_offset;

-    /* If we were trying to do the initial refcount update for some cluster
-     * allocation, we might have used the same clusters to store newly
-     * allocated metadata. Make the caller search some new space. */
-    return -EAGAIN;
-
-fail_table:
-    g_free(new_blocks);
+fail:
    g_free(new_table);
-fail_block:
-    if (*refcount_block != NULL) {
-        qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
-    }
    return ret;
 }

@@ -1323,11 +1418,10 @@ static int realloc_refcount_array(BDRVQcow2State *s, void **array,
 *
 * Modifies the number of errors in res.
 */
-static int inc_refcounts(BlockDriverState *bs,
-                         BdrvCheckResult *res,
-                         void **refcount_table,
-                         int64_t *refcount_table_size,
-                         int64_t offset, int64_t size)
+int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
+                             void **refcount_table,
+                             int64_t *refcount_table_size,
+                             int64_t offset, int64_t size)
 {
    BDRVQcow2State *s = bs->opaque;
    uint64_t start, last, cluster_offset, k, refcount;
@@ -1420,8 +1514,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
            nb_csectors = ((l2_entry >> s->csize_shift) &
                           s->csize_mask) + 1;
            l2_entry &= s->cluster_offset_mask;
-            ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-                                l2_entry & ~511, nb_csectors * 512);
+            ret = qcow2_inc_refcounts_imrt(bs, res,
+                                           refcount_table, refcount_table_size,
+                                           l2_entry & ~511, nb_csectors * 512);
            if (ret < 0) {
                goto fail;
            }
@@ -1454,8 +1549,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
            }

            /* Mark cluster as used */
-            ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-                                offset, s->cluster_size);
+            ret = qcow2_inc_refcounts_imrt(bs, res,
+                                           refcount_table, refcount_table_size,
+                                           offset, s->cluster_size);
            if (ret < 0) {
                goto fail;
            }
@@ -1508,8 +1604,8 @@ static int check_refcounts_l1(BlockDriverState *bs,
    l1_size2 = l1_size * sizeof(uint64_t);

    /* Mark L1 table as used */
-    ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-                        l1_table_offset, l1_size2);
+    ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size,
+                                   l1_table_offset, l1_size2);
    if (ret < 0) {
        goto fail;
    }
@@ -1538,8 +1634,9 @@ static int check_refcounts_l1(BlockDriverState *bs,
        if (l2_offset) {
            /* Mark L2 table as used */
            l2_offset &= L1E_OFFSET_MASK;
-            ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-                                l2_offset, s->cluster_size);
+            ret = qcow2_inc_refcounts_imrt(bs, res,
+                                           refcount_table, refcount_table_size,
+                                           l2_offset, s->cluster_size);
            if (ret < 0) {
                goto fail;
            }
@@ -1730,7 +1827,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
                }

                ret = bdrv_truncate(bs->file, offset + s->cluster_size,
-                                    &local_err);
+                                    PREALLOC_MODE_OFF, &local_err);
                if (ret < 0) {
                    error_report_err(local_err);
                    goto resize_fail;
@@ -1757,14 +1854,15 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
                }

                res->corruptions_fixed++;
-                ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                                    offset, s->cluster_size);
+                ret = qcow2_inc_refcounts_imrt(bs, res,
+                                               refcount_table, nb_clusters,
+                                               offset, s->cluster_size);
                if (ret < 0) {
                    return ret;
                }
                /* No need to check whether the refcount is now greater than 1:
                 * This area was just allocated and zeroed, so it can only be
-                 * exactly 1 after inc_refcounts() */
+                 * exactly 1 after qcow2_inc_refcounts_imrt() */
                continue;

 resize_fail:
@@ -1779,8 +1877,8 @@ resize_fail:
        }

        if (offset != 0) {
-            ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                                offset, s->cluster_size);
+            ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
+                                           offset, s->cluster_size);
            if (ret < 0) {
                return ret;
            }
@@ -1820,8 +1918,8 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
    }

    /* header */
-    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                        0, s->cluster_size);
+    ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
+                                   0, s->cluster_size);
    if (ret < 0) {
        return ret;
    }
@@ -1842,16 +1940,32 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
            return ret;
        }
    }
-    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                        s->snapshots_offset, s->snapshots_size);
+    ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
+                                   s->snapshots_offset, s->snapshots_size);
    if (ret < 0) {
        return ret;
    }

    /* refcount data */
-    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                        s->refcount_table_offset,
-                        s->refcount_table_size * sizeof(uint64_t));
+    ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
+                                   s->refcount_table_offset,
+                                   s->refcount_table_size * sizeof(uint64_t));
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* encryption */
+    if (s->crypto_header.length) {
+        ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
+                                       s->crypto_header.offset,
+                                       s->crypto_header.length);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    /* bitmaps */
+    ret = qcow2_check_bitmaps_refcounts(bs, res, refcount_table, nb_clusters);
    if (ret < 0) {
        return ret;
    }
--- a/block/qcow2.c
+++ b/block/qcow2.c
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -25,7 +25,7 @@
 #ifndef BLOCK_QCOW2_H
 #define BLOCK_QCOW2_H

-#include "crypto/cipher.h"
+#include "crypto/block.h"
 #include "qemu/coroutine.h"

 //#define DEBUG_ALLOC
@@ -36,6 +36,7 @@

 #define QCOW_CRYPT_NONE 0
 #define QCOW_CRYPT_AES  1
+#define QCOW_CRYPT_LUKS 2

 #define QCOW_MAX_CRYPT_CLUSTERS 32
 #define QCOW_MAX_SNAPSHOTS 65536
@@ -52,6 +53,10 @@
 * space for snapshot names and IDs */
 #define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)

+/* Bitmap header extension constraints */
+#define QCOW2_MAX_BITMAPS 65535
+#define QCOW2_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW2_MAX_BITMAPS)
+
 /* indicate that the refcount of the referenced cluster is exactly one. */
 #define QCOW_OFLAG_COPIED     (1ULL << 63)
 /* indicate that the cluster is compressed (they never have the copied flag) */
@@ -163,6 +168,11 @@ typedef struct QCowSnapshot {
 struct Qcow2Cache;
 typedef struct Qcow2Cache Qcow2Cache;

+typedef struct Qcow2CryptoHeaderExtension {
+    uint64_t offset;
+    uint64_t length;
+} QEMU_PACKED Qcow2CryptoHeaderExtension;
+
 typedef struct Qcow2UnknownHeaderExtension {
    uint32_t magic;
    uint32_t len;
@@ -195,6 +205,14 @@ enum {
    QCOW2_COMPAT_FEAT_MASK            = QCOW2_COMPAT_LAZY_REFCOUNTS,
 };

+/* Autoclear feature bits */
+enum {
+    QCOW2_AUTOCLEAR_BITMAPS_BITNR = 0,
+    QCOW2_AUTOCLEAR_BITMAPS       = 1 << QCOW2_AUTOCLEAR_BITMAPS_BITNR,
+
+    QCOW2_AUTOCLEAR_MASK          = QCOW2_AUTOCLEAR_BITMAPS,
+};
+
 enum qcow2_discard_type {
    QCOW2_DISCARD_NEVER = 0,
    QCOW2_DISCARD_ALWAYS,
@@ -222,6 +240,13 @@ typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
 typedef void Qcow2SetRefcountFunc(void *refcount_array,
                                  uint64_t index, uint64_t value);

+typedef struct Qcow2BitmapHeaderExt {
+    uint32_t nb_bitmaps;
+    uint32_t reserved32;
+    uint64_t bitmap_directory_size;
+    uint64_t bitmap_directory_offset;
+} QEMU_PACKED Qcow2BitmapHeaderExt;
+
 typedef struct BDRVQcow2State {
    int cluster_bits;
    int cluster_size;
@@ -257,13 +282,21 @@ typedef struct BDRVQcow2State {

    CoMutex lock;

-    QCryptoCipher *cipher; /* current cipher, NULL if no key yet */
+    Qcow2CryptoHeaderExtension crypto_header; /* QCow2 header extension */
+    QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
+    QCryptoBlock *crypto; /* Disk encryption format driver */
+    bool crypt_physical_offset; /* Whether to use virtual or physical offset
+                                   for encryption initialization vector tweak */
    uint32_t crypt_method_header;
    uint64_t snapshots_offset;
    int snapshots_size;
    unsigned int nb_snapshots;
    QCowSnapshot *snapshots;

+    uint32_t nb_bitmaps;
+    uint64_t bitmap_directory_size;
+    uint64_t bitmap_directory_offset;
+
    int flags;
    int qcow_version;
    bool use_lazy_refcounts;
@@ -492,6 +525,10 @@ static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
                  int64_t sector_num, int nb_sectors);

+int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
+                                     int refcount_order, bool generous_increase,
+                                     uint64_t *refblock_count);
+
 int qcow2_mark_dirty(BlockDriverState *bs);
 int qcow2_mark_corrupt(BlockDriverState *bs);
 int qcow2_mark_consistent(BlockDriverState *bs);
@@ -512,6 +549,11 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
                                  uint64_t addend, bool decrease,
                                  enum qcow2_discard_type type);

+int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t offset,
+                            uint64_t additional_clusters, bool exact_size,
+                            int new_refblock_index,
+                            uint64_t new_refblock_offset);
+
 int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
 int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
                                int64_t nb_clusters);
@@ -534,6 +576,10 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
                                 int64_t size);
 int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
                                  int64_t size);
+int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
+                             void **refcount_table,
+                             int64_t *refcount_table_size,
+                             int64_t offset, int64_t size);

 int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
                                BlockDriverAmendStatusCB *status_cb,
@@ -545,8 +591,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
-                          uint8_t *out_buf, const uint8_t *in_buf,
-                          int nb_sectors, bool enc, Error **errp);
+                          uint8_t *buf, int nb_sectors, bool enc, Error **errp);

 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
                             unsigned int *bytes, uint64_t *cluster_offset);
@@ -605,4 +650,20 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    void **table);
 void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);

+/* qcow2-bitmap.c functions */
+int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+                                  void **refcount_table,
+                                  int64_t *refcount_table_size);
+bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp);
+int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
+void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp);
+int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
+bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs,
+                                      const char *name,
+                                      uint32_t granularity,
+                                      Error **errp);
+void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+                                          const char *name,
+                                          Error **errp);
+
 #endif
--- a/block/qed-cluster.c
+++ b/block/qed-cluster.c
@@ -85,6 +85,8 @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
 *
 * On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1
 * table offset, respectively. len is number of contiguous unallocated bytes.
+ *
+ * Called with table_lock held.
 */
 int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
                                  uint64_t pos, size_t *len,
@@ -112,7 +114,6 @@ int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
    }

    ret = qed_read_l2_table(s, request, l2_offset);
-    qed_acquire(s);
    if (ret) {
        goto out;
    }
@@ -137,6 +138,5 @@ int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,

 out:
    *img_offset = offset;
-    qed_release(s);
    return ret;
 }
--- a/block/qed-l2-cache.c
+++ b/block/qed-l2-cache.c
@@ -101,6 +101,8 @@ CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
 /**
 * Decrease an entry's reference count and free if necessary when the reference
 * count drops to zero.
+ *
+ * Called with table_lock held.
 */
 void qed_unref_l2_cache_entry(CachedL2Table *entry)
 {
@@ -122,6 +124,8 @@ void qed_unref_l2_cache_entry(CachedL2Table *entry)
 *
 * For a cached entry, this function increases the reference count and returns
 * the entry.
+ *
+ * Called with table_lock held.
 */
 CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
 {
@@ -150,6 +154,8 @@ CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
 * N.B. This function steals a reference to the l2_table from the caller so the
 * caller must obtain a new reference by issuing a call to
 * qed_find_l2_cache_entry().
+ *
+ * Called with table_lock held.
 */
 void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
 {
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -18,6 +18,7 @@
 #include "qed.h"
 #include "qemu/bswap.h"

+/* Called either from qed_check or with table_lock held.  */
 static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
 {
    QEMUIOVector qiov;
@@ -32,18 +33,22 @@ static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)

    trace_qed_read_table(s, offset, table);

+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_unlock(&s->table_lock);
+    }
    ret = bdrv_preadv(s->bs->file, offset, &qiov);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_lock(&s->table_lock);
+    }
    if (ret < 0) {
        goto out;
    }

    /* Byteswap offsets */
-    qed_acquire(s);
    noffsets = qiov.size / sizeof(uint64_t);
    for (i = 0; i < noffsets; i++) {
        table->offsets[i] = le64_to_cpu(table->offsets[i]);
    }
-    qed_release(s);

    ret = 0;
 out:
@@ -61,6 +66,8 @@ out:
 * @index:      Index of first element
 * @n:          Number of elements
 * @flush:      Whether or not to sync to disk
+ *
+ * Called either from qed_check or with table_lock held.
 */
 static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
                           unsigned int index, unsigned int n, bool flush)
@@ -97,16 +104,20 @@ static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
    /* Adjust for offset into table */
    offset += start * sizeof(uint64_t);

+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_unlock(&s->table_lock);
+    }
    ret = bdrv_pwritev(s->bs->file, offset, &qiov);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_lock(&s->table_lock);
+    }
    trace_qed_write_table_cb(s, table, flush, ret);
    if (ret < 0) {
        goto out;
    }

    if (flush) {
-        qed_acquire(s);
        ret = bdrv_flush(s->bs);
-        qed_release(s);
        if (ret < 0) {
            goto out;
        }
@@ -123,6 +134,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
    return qed_read_table(s, s->header.l1_table_offset, s->l1_table);
 }

+/* Called either from qed_check or with table_lock held.  */
 int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n)
 {
    BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
@@ -136,6 +148,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
    return qed_write_l1_table(s, index, n);
 }

+/* Called either from qed_check or with table_lock held.  */
 int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
 {
    int ret;
@@ -154,7 +167,6 @@ int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
    ret = qed_read_table(s, offset, request->l2_table->table);

-    qed_acquire(s);
    if (ret) {
        /* can't trust loaded L2 table anymore */
        qed_unref_l2_cache_entry(request->l2_table);
@@ -170,7 +182,6 @@ int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
        request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
        assert(request->l2_table != NULL);
    }
-    qed_release(s);

    return ret;
 }
@@ -180,6 +191,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
    return qed_read_l2_table(s, request, offset);
 }

+/* Called either from qed_check or with table_lock held.  */
 int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                       unsigned int index, unsigned int n, bool flush)
 {
--- a/block/qed.c
+++ b/block/qed.c
@@ -93,6 +93,8 @@ int qed_write_header_sync(BDRVQEDState *s)
 *
 * This function only updates known header fields in-place and does not affect
 * extra data after the QED header.
+ *
+ * No new allocating reqs can start while this function runs.
 */
 static int coroutine_fn qed_write_header(BDRVQEDState *s)
 {
@@ -109,6 +111,8 @@ static int coroutine_fn qed_write_header(BDRVQEDState *s)
    QEMUIOVector qiov;
    int ret;

+    assert(s->allocating_acb || s->allocating_write_reqs_plugged);
+
    buf = qemu_blockalign(s->bs, len);
    iov = (struct iovec) {
        .iov_base = buf,
@@ -219,6 +223,8 @@ static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n,
 * This function only produces the offset where the new clusters should be
 * written.  It updates BDRVQEDState but does not make any changes to the image
 * file.
+ *
+ * Called with table_lock held.
 */
 static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
 {
@@ -236,6 +242,8 @@ QEDTable *qed_alloc_table(BDRVQEDState *s)

 /**
 * Allocate a new zeroed L2 table
+ *
+ * Called with table_lock held.
 */
 static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
 {
@@ -249,19 +257,32 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
    return l2_table;
 }

-static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
+static bool qed_plug_allocating_write_reqs(BDRVQEDState *s)
 {
+    qemu_co_mutex_lock(&s->table_lock);
+
+    /* No reentrancy is allowed.  */
    assert(!s->allocating_write_reqs_plugged);
+    if (s->allocating_acb != NULL) {
+        /* Another allocating write came concurrently.  This cannot happen
+         * from bdrv_qed_co_drain, but it can happen when the timer runs.
+         */
+        qemu_co_mutex_unlock(&s->table_lock);
+        return false;
+    }

    s->allocating_write_reqs_plugged = true;
+    qemu_co_mutex_unlock(&s->table_lock);
+    return true;
 }

 static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
 {
+    qemu_co_mutex_lock(&s->table_lock);
    assert(s->allocating_write_reqs_plugged);
-
    s->allocating_write_reqs_plugged = false;
-    qemu_co_enter_next(&s->allocating_write_reqs);
+    qemu_co_queue_next(&s->allocating_write_reqs);
+    qemu_co_mutex_unlock(&s->table_lock);
 }

 static void coroutine_fn qed_need_check_timer_entry(void *opaque)
@@ -269,17 +290,14 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque)
    BDRVQEDState *s = opaque;
    int ret;

-    /* The timer should only fire when allocating writes have drained */
-    assert(!s->allocating_acb);
-
    trace_qed_need_check_timer_cb(s);

-    qed_acquire(s);
-    qed_plug_allocating_write_reqs(s);
+    if (!qed_plug_allocating_write_reqs(s)) {
+        return;
+    }

    /* Ensure writes are on disk before clearing flag */
    ret = bdrv_co_flush(s->bs->file->bs);
-    qed_release(s);
    if (ret < 0) {
        qed_unplug_allocating_write_reqs(s);
        return;
@@ -301,16 +319,6 @@ static void qed_need_check_timer_cb(void *opaque)
    qemu_coroutine_enter(co);
 }

-void qed_acquire(BDRVQEDState *s)
-{
-    aio_context_acquire(bdrv_get_aio_context(s->bs));
-}
-
-void qed_release(BDRVQEDState *s)
-{
-    aio_context_release(bdrv_get_aio_context(s->bs));
-}
-
 static void qed_start_need_check_timer(BDRVQEDState *s)
 {
    trace_qed_start_need_check_timer(s);
@@ -350,7 +358,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
    }
 }

-static void bdrv_qed_drain(BlockDriverState *bs)
+static void coroutine_fn bdrv_qed_co_drain(BlockDriverState *bs)
 {
    BDRVQEDState *s = bs->opaque;

@@ -359,10 +367,20 @@ static void bdrv_qed_drain(BlockDriverState *bs)
     */
    if (s->need_check_timer && timer_pending(s->need_check_timer)) {
        qed_cancel_need_check_timer(s);
-        qed_need_check_timer_cb(s);
+        qed_need_check_timer_entry(s);
    }
 }

+static void bdrv_qed_init_state(BlockDriverState *bs)
+{
+    BDRVQEDState *s = bs->opaque;
+
+    memset(s, 0, sizeof(BDRVQEDState));
+    s->bs = bs;
+    qemu_co_mutex_init(&s->table_lock);
+    qemu_co_queue_init(&s->allocating_write_reqs);
+}
+
 static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
                            Error **errp)
 {
@@ -371,9 +389,6 @@ static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
    int64_t file_size;
    int ret;

-    s->bs = bs;
-    qemu_co_queue_init(&s->allocating_write_reqs);
-
    ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
    if (ret < 0) {
        return ret;
@@ -507,6 +522,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

+    bdrv_qed_init_state(bs);
    return bdrv_qed_do_open(bs, options, flags, errp);
 }

@@ -583,7 +599,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    blk_set_allow_write_beyond_eof(blk, true);

    /* File must start empty and grow, check truncate is supported */
-    ret = blk_truncate(blk, 0, errp);
+    ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp);
    if (ret < 0) {
        goto out;
    }
@@ -681,6 +697,7 @@ typedef struct {
    BlockDriverState **file;
 } QEDIsAllocatedCB;

+/* Called with table_lock held.  */
 static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
 {
    QEDIsAllocatedCB *cb = opaque;
@@ -728,6 +745,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
    uint64_t offset;
    int ret;

+    qemu_co_mutex_lock(&s->table_lock);
    ret = qed_find_cluster(s, &request, cb.pos, &len, &offset);
    qed_is_allocated_cb(&cb, ret, offset, len);

@@ -735,6 +753,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
    assert(cb.status != BDRV_BLOCK_OFFSET_MASK);

    qed_unref_l2_cache_entry(request.l2_table);
+    qemu_co_mutex_unlock(&s->table_lock);

    return cb.status;
 }
@@ -865,6 +884,8 @@ out:
 *
 * The cluster offset may be an allocated byte offset in the image file, the
 * zero cluster marker, or the unallocated cluster marker.
+ *
+ * Called with table_lock held.
 */
 static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table,
                                             int index, unsigned int n,
@@ -880,6 +901,7 @@ static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table,
    }
 }

+/* Called with table_lock held.  */
 static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
 {
    BDRVQEDState *s = acb_to_s(acb);
@@ -903,7 +925,7 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
    if (acb == s->allocating_acb) {
        s->allocating_acb = NULL;
        if (!qemu_co_queue_empty(&s->allocating_write_reqs)) {
-            qemu_co_enter_next(&s->allocating_write_reqs);
+            qemu_co_queue_next(&s->allocating_write_reqs);
        } else if (s->header.features & QED_F_NEED_CHECK) {
            qed_start_need_check_timer(s);
        }
@@ -912,6 +934,8 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)

 /**
 * Update L1 table with new L2 table offset and write it out
+ *
+ * Called with table_lock held.
 */
 static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
 {
@@ -940,6 +964,8 @@ static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)

 /**
 * Update L2 table with new cluster offsets and write them out
+ *
+ * Called with table_lock held.
 */
 static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
 {
@@ -976,50 +1002,26 @@ static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)

 /**
 * Write data to the image file
+ *
+ * Called with table_lock *not* held.
 */
 static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
 {
    BDRVQEDState *s = acb_to_s(acb);
    uint64_t offset = acb->cur_cluster +
                      qed_offset_into_cluster(s, acb->cur_pos);
-    int ret;

    trace_qed_aio_write_main(s, acb, 0, offset, acb->cur_qiov.size);

    BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
-    ret = bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size,
-                          &acb->cur_qiov, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (acb->find_cluster_ret != QED_CLUSTER_FOUND) {
-        if (s->bs->backing) {
-            /*
-             * Flush new data clusters before updating the L2 table
-             *
-             * This flush is necessary when a backing file is in use.  A crash
-             * during an allocating write could result in empty clusters in the
-             * image.  If the write only touched a subregion of the cluster,
-             * then backing image sectors have been lost in the untouched
-             * region.  The solution is to flush after writing a new data
-             * cluster and before updating the L2 table.
-             */
-            ret = bdrv_co_flush(s->bs->file->bs);
-            if (ret < 0) {
-                return ret;
-            }
-        }
-        ret = qed_aio_write_l2_update(acb, acb->cur_cluster);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-    return 0;
+    return bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size,
+                           &acb->cur_qiov, 0);
 }

 /**
 * Populate untouched regions of new data cluster
+ *
+ * Called with table_lock held.
 */
 static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
 {
@@ -1027,6 +1029,8 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
    uint64_t start, len, offset;
    int ret;

+    qemu_co_mutex_unlock(&s->table_lock);
+
    /* Populate front untouched region of new data cluster */
    start = qed_start_of_cluster(s, acb->cur_pos);
    len = qed_offset_into_cluster(s, acb->cur_pos);
@@ -1034,7 +1038,7 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
    trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
    ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster);
    if (ret < 0) {
-        return ret;
+        goto out;
    }

    /* Populate back untouched region of new data cluster */
@@ -1047,10 +1051,31 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
    trace_qed_aio_write_postfill(s, acb, start, len, offset);
    ret = qed_copy_from_backing_file(s, start, len, offset);
    if (ret < 0) {
-        return ret;
+        goto out;
    }

-    return qed_aio_write_main(acb);
+    ret = qed_aio_write_main(acb);
+    if (ret < 0) {
+        goto out;
+    }
+
+    if (s->bs->backing) {
+        /*
+         * Flush new data clusters before updating the L2 table
+         *
+         * This flush is necessary when a backing file is in use.  A crash
+         * during an allocating write could result in empty clusters in the
+         * image.  If the write only touched a subregion of the cluster,
+         * then backing image sectors have been lost in the untouched
+         * region.  The solution is to flush after writing a new data
+         * cluster and before updating the L2 table.
+         */
+        ret = bdrv_co_flush(s->bs->file->bs);
+    }
+
+out:
+    qemu_co_mutex_lock(&s->table_lock);
+    return ret;
 }

 /**
@@ -1073,6 +1098,8 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
 * @len:        Length in bytes
 *
 * This path is taken when writing to previously unallocated clusters.
+ *
+ * Called with table_lock held.
 */
 static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 {
@@ -1087,7 +1114,7 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
    /* Freeze this request if another allocating write is in progress */
    if (s->allocating_acb != acb || s->allocating_write_reqs_plugged) {
        if (s->allocating_acb != NULL) {
-            qemu_co_queue_wait(&s->allocating_write_reqs, NULL);
+            qemu_co_queue_wait(&s->allocating_write_reqs, &s->table_lock);
            assert(s->allocating_acb == NULL);
        }
        s->allocating_acb = acb;
@@ -1103,6 +1130,7 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
        if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
            return 0;
        }
+        acb->cur_cluster = 1;
    } else {
        acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
    }
@@ -1115,15 +1143,14 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
        }
    }

-    if (acb->flags & QED_AIOCB_ZERO) {
-        ret = qed_aio_write_l2_update(acb, 1);
-    } else {
+    if (!(acb->flags & QED_AIOCB_ZERO)) {
        ret = qed_aio_write_cow(acb);
+        if (ret < 0) {
+            return ret;
+        }
    }
-    if (ret < 0) {
-        return ret;
-    }
-    return 0;
+
+    return qed_aio_write_l2_update(acb, acb->cur_cluster);
 }

 /**
@@ -1134,10 +1161,17 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 * @len:        Length in bytes
 *
 * This path is taken when writing to already allocated clusters.
+ *
+ * Called with table_lock held.
 */
 static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
                                              size_t len)
 {
+    BDRVQEDState *s = acb_to_s(acb);
+    int r;
+
+    qemu_co_mutex_unlock(&s->table_lock);
+
    /* Allocate buffer for zero writes */
    if (acb->flags & QED_AIOCB_ZERO) {
        struct iovec *iov = acb->qiov->iov;
@@ -1145,7 +1179,8 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
        if (!iov->iov_base) {
            iov->iov_base = qemu_try_blockalign(acb->bs, iov->iov_len);
            if (iov->iov_base == NULL) {
-                return -ENOMEM;
+                r = -ENOMEM;
+                goto out;
            }
            memset(iov->iov_base, 0, iov->iov_len);
        }
@@ -1155,8 +1190,11 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
    acb->cur_cluster = offset;
    qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);

-    /* Do the actual write */
-    return qed_aio_write_main(acb);
+    /* Do the actual write.  */
+    r = qed_aio_write_main(acb);
+out:
+    qemu_co_mutex_lock(&s->table_lock);
+    return r;
 }

 /**
@@ -1166,6 +1204,8 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
 * @ret:        QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1
 * @offset:     Cluster offset in bytes
 * @len:        Length in bytes
+ *
+ * Called with table_lock held.
 */
 static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
                                           uint64_t offset, size_t len)
@@ -1197,6 +1237,8 @@ static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
 * @ret:        QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1
 * @offset:     Cluster offset in bytes
 * @len:        Length in bytes
+ *
+ * Called with table_lock held.
 */
 static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
                                          uint64_t offset, size_t len)
@@ -1204,6 +1246,9 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
    QEDAIOCB *acb = opaque;
    BDRVQEDState *s = acb_to_s(acb);
    BlockDriverState *bs = acb->bs;
+    int r;
+
+    qemu_co_mutex_unlock(&s->table_lock);

    /* Adjust offset into cluster */
    offset += qed_offset_into_cluster(s, acb->cur_pos);
@@ -1212,22 +1257,23 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,

    qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);

-    /* Handle zero cluster and backing file reads */
+    /* Handle zero cluster and backing file reads, otherwise read
+     * data cluster directly.
+     */
    if (ret == QED_CLUSTER_ZERO) {
        qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
-        return 0;
+        r = 0;
    } else if (ret != QED_CLUSTER_FOUND) {
-        return qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
-                                     &acb->backing_qiov);
+        r = qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
+                                  &acb->backing_qiov);
+    } else {
+        BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+        r = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
+                           &acb->cur_qiov, 0);
    }

-    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    ret = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
-                         &acb->cur_qiov, 0);
-    if (ret < 0) {
-        return ret;
-    }
-    return 0;
+    qemu_co_mutex_lock(&s->table_lock);
+    return r;
 }

 /**
@@ -1240,6 +1286,7 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
    size_t len;
    int ret;

+    qemu_co_mutex_lock(&s->table_lock);
    while (1) {
        trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + acb->cur_qiov.size);

@@ -1279,6 +1326,7 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)

    trace_qed_aio_complete(s, acb, ret);
    qed_aio_complete(acb);
+    qemu_co_mutex_unlock(&s->table_lock);
    return ret;
 }

@@ -1342,12 +1390,19 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
                          QED_AIOCB_WRITE | QED_AIOCB_ZERO);
 }

-static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset,
+                             PreallocMode prealloc, Error **errp)
 {
    BDRVQEDState *s = bs->opaque;
    uint64_t old_image_size;
    int ret;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    if (!qed_is_image_size_valid(offset, s->header.cluster_size,
                                 s->header.table_size)) {
        error_setg(errp, "Invalid image size specified");
@@ -1467,8 +1522,14 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)

    bdrv_qed_close(bs);

-    memset(s, 0, sizeof(BDRVQEDState));
+    bdrv_qed_init_state(bs);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_lock(&s->table_lock);
+    }
    ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_unlock(&s->table_lock);
+    }
    if (local_err) {
        error_propagate(errp, local_err);
        error_prepend(errp, "Could not reopen qed layer: ");
@@ -1547,7 +1608,7 @@ static BlockDriver bdrv_qed = {
    .bdrv_check               = bdrv_qed_check,
    .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
    .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
-    .bdrv_drain               = bdrv_qed_drain,
+    .bdrv_co_drain            = bdrv_qed_co_drain,
 };

 static void bdrv_qed_init(void)
--- a/block/qed.h
+++ b/block/qed.h
@@ -151,15 +151,21 @@ typedef struct QEDAIOCB {

 typedef struct {
    BlockDriverState *bs;           /* device */
-    uint64_t file_size;             /* length of image file, in bytes */

+    /* Written only by an allocating write or the timer handler (the latter
+     * while allocating reqs are plugged).
+     */
    QEDHeader header;               /* always cpu-endian */
+
+    /* Protected by table_lock.  */
+    CoMutex table_lock;
    QEDTable *l1_table;
    L2TableCache l2_cache;          /* l2 table cache */
    uint32_t table_nelems;
    uint32_t l1_shift;
    uint32_t l2_shift;
    uint32_t l2_mask;
+    uint64_t file_size;             /* length of image file, in bytes */

    /* Allocating write request queue */
    QEDAIOCB *allocating_acb;
@@ -177,9 +183,6 @@ enum {
    QED_CLUSTER_L1,            /* cluster missing in L1 */
 };

-void qed_acquire(BDRVQEDState *s);
-void qed_release(BDRVQEDState *s);
-
 /**
 * Header functions
 */
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -312,6 +312,31 @@ static int64_t raw_getlength(BlockDriverState *bs)
    return s->size;
 }

+static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs,
+                                     Error **errp)
+{
+    BlockMeasureInfo *info;
+    int64_t required;
+
+    if (in_bs) {
+        required = bdrv_getlength(in_bs);
+        if (required < 0) {
+            error_setg_errno(errp, -required, "Unable to get image size");
+            return NULL;
+        }
+    } else {
+        required = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                            BDRV_SECTOR_SIZE);
+    }
+
+    info = g_new(BlockMeasureInfo, 1);
+    info->required = required;
+
+    /* Unallocated sectors count towards the file size in raw images */
+    info->fully_allocated = info->required;
+    return info;
+}
+
 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
    return bdrv_get_info(bs->file->bs, bdi);
@@ -327,7 +352,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int raw_truncate(BlockDriverState *bs, int64_t offset,
+                        PreallocMode prealloc, Error **errp)
 {
    BDRVRawState *s = bs->opaque;

@@ -343,7 +369,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)

    s->size = offset;
    offset += s->offset;
-    return bdrv_truncate(bs->file, offset, errp);
+    return bdrv_truncate(bs->file, offset, prealloc, errp);
 }

 static int raw_media_changed(BlockDriverState *bs)
@@ -479,6 +505,7 @@ BlockDriver bdrv_raw = {
    .bdrv_truncate        = &raw_truncate,
    .bdrv_getlength       = &raw_getlength,
    .has_variable_length  = true,
+    .bdrv_measure         = &raw_measure,
    .bdrv_get_info        = &raw_get_info,
    .bdrv_refresh_limits  = &raw_refresh_limits,
    .bdrv_probe_blocksizes = &raw_probe_blocksizes,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -555,9 +555,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
     * filename encoded options */
    filename = qdict_get_try_str(options, "filename");
    if (filename) {
-        error_report("Warning: 'filename' option specified. "
-                      "This is an unsupported option, and may be deprecated "
-                      "in the future");
+        warn_report("'filename' option specified. "
+                    "This is an unsupported option, and may be deprecated "
+                    "in the future");
        qemu_rbd_parse_filename(filename, options, &local_err);
        if (local_err) {
            r = -EINVAL;
@@ -936,11 +936,18 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
    return info.size;
 }

-static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset,
+                             PreallocMode prealloc, Error **errp)
 {
    BDRVRBDState *s = bs->opaque;
    int r;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    r = rbd_resize(s->image, offset);
    if (r < 0) {
        error_setg_errno(errp, -r, "Failed to resize file");
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -390,6 +390,7 @@ struct BDRVSheepdogState {
    QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
    QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;

+    CoMutex queue_lock;
    CoQueue overlapping_queue;
    QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
 };
@@ -488,7 +489,7 @@ static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
 retry:
    QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
        if (AIOCBOverlapping(acb, cb)) {
-            qemu_co_queue_wait(&s->overlapping_queue, NULL);
+            qemu_co_queue_wait(&s->overlapping_queue, &s->queue_lock);
            goto retry;
        }
    }
@@ -525,8 +526,10 @@ static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
        return;
    }

+    qemu_co_mutex_lock(&s->queue_lock);
    wait_for_overlapping_aiocb(s, acb);
    QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
+    qemu_co_mutex_unlock(&s->queue_lock);
 }

 static SocketAddress *sd_socket_address(const char *path,
@@ -785,6 +788,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
     * have to move all the inflight requests to the failed queue before
     * resend_aioreq() is called.
     */
+    qemu_co_mutex_lock(&s->queue_lock);
    QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) {
        QLIST_REMOVE(aio_req, aio_siblings);
        QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings);
@@ -794,8 +798,11 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
    while (!QLIST_EMPTY(&s->failed_aio_head)) {
        aio_req = QLIST_FIRST(&s->failed_aio_head);
        QLIST_REMOVE(aio_req, aio_siblings);
+        qemu_co_mutex_unlock(&s->queue_lock);
        resend_aioreq(s, aio_req);
+        qemu_co_mutex_lock(&s->queue_lock);
    }
+    qemu_co_mutex_unlock(&s->queue_lock);
 }

 /*
@@ -887,7 +894,10 @@ static void coroutine_fn aio_read_response(void *opaque)
    */
    s->co_recv = NULL;

+    qemu_co_mutex_lock(&s->queue_lock);
    QLIST_REMOVE(aio_req, aio_siblings);
+    qemu_co_mutex_unlock(&s->queue_lock);
+
    switch (rsp.result) {
    case SD_RES_SUCCESS:
        break;
@@ -1307,7 +1317,9 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
    uint64_t old_oid = aio_req->base_oid;
    bool create = aio_req->create;

+    qemu_co_mutex_lock(&s->queue_lock);
    QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
+    qemu_co_mutex_unlock(&s->queue_lock);

    if (!nr_copies) {
        error_report("bug");
@@ -1678,6 +1690,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
    bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
    pstrcpy(s->name, sizeof(s->name), vdi);
    qemu_co_mutex_init(&s->lock);
+    qemu_co_mutex_init(&s->queue_lock);
    qemu_co_queue_init(&s->overlapping_queue);
    qemu_opts_del(opts);
    g_free(buf);
@@ -2153,13 +2166,20 @@ static int64_t sd_getlength(BlockDriverState *bs)
    return s->inode.vdi_size;
 }

-static int sd_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int sd_truncate(BlockDriverState *bs, int64_t offset,
+                       PreallocMode prealloc, Error **errp)
 {
    BDRVSheepdogState *s = bs->opaque;
    int ret, fd;
    unsigned int datalen;
    uint64_t max_vdi_size;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
    if (offset < s->inode.vdi_size) {
        error_setg(errp, "shrinking is not supported");
@@ -2431,12 +2451,16 @@ static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)

 static void sd_aio_complete(SheepdogAIOCB *acb)
 {
+    BDRVSheepdogState *s;
    if (acb->aiocb_type == AIOCB_FLUSH_CACHE) {
        return;
    }

+    s = acb->s;
+    qemu_co_mutex_lock(&s->queue_lock);
    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&acb->s->overlapping_queue);
+    qemu_co_queue_restart_all(&s->overlapping_queue);
+    qemu_co_mutex_unlock(&s->queue_lock);
 }

 static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
@@ -2448,7 +2472,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
    BDRVSheepdogState *s = bs->opaque;

    if (offset > s->inode.vdi_size) {
-        ret = sd_truncate(bs, offset, NULL);
+        ret = sd_truncate(bs, offset, PREALLOC_MODE_OFF, NULL);
        if (ret < 0) {
            return ret;
        }
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -888,13 +888,22 @@ static int ssh_has_zero_init(BlockDriverState *bs)
    return has_zero_init;
 }

+typedef struct BDRVSSHRestart {
+    BlockDriverState *bs;
+    Coroutine *co;
+} BDRVSSHRestart;
+
 static void restart_coroutine(void *opaque)
 {
-    Coroutine *co = opaque;
+    BDRVSSHRestart *restart = opaque;
+    BlockDriverState *bs = restart->bs;
+    BDRVSSHState *s = bs->opaque;
+    AioContext *ctx = bdrv_get_aio_context(bs);

-    DPRINTF("co=%p", co);
+    DPRINTF("co=%p", restart->co);
+    aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL);

-    aio_co_wake(co);
+    aio_co_wake(restart->co);
 }

 /* A non-blocking call returned EAGAIN, so yield, ensuring the
@@ -905,7 +914,10 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
 {
    int r;
    IOHandler *rd_handler = NULL, *wr_handler = NULL;
-    Coroutine *co = qemu_coroutine_self();
+    BDRVSSHRestart restart = {
+        .bs = bs,
+        .co = qemu_coroutine_self()
+    };

    r = libssh2_session_block_directions(s->session);

@@ -920,11 +932,9 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
            rd_handler, wr_handler);

    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, rd_handler, wr_handler, NULL, co);
+                       false, rd_handler, wr_handler, NULL, &restart);
    qemu_coroutine_yield();
    DPRINTF("s->sock=%d - back", s->sock);
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
-                       NULL, NULL, NULL, NULL);
 }

 /* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
@@ -1114,8 +1124,8 @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
 static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
 {
    if (!s->unsafe_flush_warning) {
-        error_report("warning: ssh server %s does not support fsync",
-                     s->inet->host);
+        warn_report("ssh server %s does not support fsync",
+                    s->inet->host);
        if (what) {
            error_report("to support fsync, you need %s", what);
        }
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -61,6 +61,7 @@ typedef struct ThrottleGroup {
    QLIST_HEAD(, BlockBackendPublic) head;
    BlockBackend *tokens[2];
    bool any_timer_armed[2];
+    QEMUClockType clock_type;

    /* These two are protected by the global throttle_groups_lock */
    unsigned refcount;
@@ -98,6 +99,12 @@ ThrottleState *throttle_group_incref(const char *name)
    if (!tg) {
        tg = g_new0(ThrottleGroup, 1);
        tg->name = g_strdup(name);
+        tg->clock_type = QEMU_CLOCK_REALTIME;
+
+        if (qtest_enabled()) {
+            /* For testing block IO throttling only */
+            tg->clock_type = QEMU_CLOCK_VIRTUAL;
+        }
        qemu_mutex_init(&tg->lock);
        throttle_init(&tg->ts);
        QLIST_INIT(&tg->head);
@@ -310,7 +317,7 @@ static void schedule_next_request(BlockBackend *blk, bool is_write)
            token = blk;
        } else {
            ThrottleTimers *tt = &blk_get_public(token)->throttle_timers;
-            int64_t now = qemu_clock_get_ns(tt->clock_type);
+            int64_t now = qemu_clock_get_ns(tg->clock_type);
            timer_mod(tt->timers[is_write], now);
            tg->any_timer_armed[is_write] = true;
        }
@@ -419,18 +426,10 @@ void throttle_group_restart_blk(BlockBackend *blk)
 void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg)
 {
    BlockBackendPublic *blkp = blk_get_public(blk);
-    ThrottleTimers *tt = &blkp->throttle_timers;
    ThrottleState *ts = blkp->throttle_state;
    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
    qemu_mutex_lock(&tg->lock);
-    /* throttle_config() cancels the timers */
-    if (timer_pending(tt->timers[0])) {
-        tg->any_timer_armed[0] = false;
-    }
-    if (timer_pending(tt->timers[1])) {
-        tg->any_timer_armed[1] = false;
-    }
-    throttle_config(ts, tt, cfg);
+    throttle_config(ts, tg->clock_type, cfg);
    qemu_mutex_unlock(&tg->lock);

    throttle_group_restart_blk(blk);
@@ -497,13 +496,6 @@ void throttle_group_register_blk(BlockBackend *blk, const char *groupname)
    BlockBackendPublic *blkp = blk_get_public(blk);
    ThrottleState *ts = throttle_group_incref(groupname);
    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-    int clock_type = QEMU_CLOCK_REALTIME;
-
-    if (qtest_enabled()) {
-        /* For testing block IO throttling only */
-        clock_type = QEMU_CLOCK_VIRTUAL;
-    }
-
    blkp->throttle_state = ts;

    qemu_mutex_lock(&tg->lock);
@@ -518,7 +510,7 @@ void throttle_group_register_blk(BlockBackend *blk, const char *groupname)

    throttle_timers_init(&blkp->throttle_timers,
                         blk_get_aio_context(blk),
-                         clock_type,
+                         tg->clock_type,
                         read_timer_cb,
                         write_timer_cb,
                         blk);
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -172,7 +172,7 @@ typedef struct {
    /* VDI header (converted to host endianness). */
    VdiHeader header;

-    CoMutex write_lock;
+    CoRwlock bmap_lock;

    Error *migration_blocker;
 } BDRVVdiState;
@@ -485,7 +485,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail_free_bmap;
    }

-    qemu_co_mutex_init(&s->write_lock);
+    qemu_co_rwlock_init(&s->bmap_lock);

    return 0;

@@ -557,7 +557,9 @@ vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
               n_bytes, offset);

        /* prepare next AIO request */
+        qemu_co_rwlock_rdlock(&s->bmap_lock);
        bmap_entry = le32_to_cpu(s->bmap[block_index]);
+        qemu_co_rwlock_unlock(&s->bmap_lock);
        if (!VDI_IS_ALLOCATED(bmap_entry)) {
            /* Block not allocated, return zeros, no need to wait. */
            qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
@@ -595,6 +597,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    uint32_t block_index;
    uint32_t offset_in_block;
    uint32_t n_bytes;
+    uint64_t data_offset;
    uint32_t bmap_first = VDI_UNALLOCATED;
    uint32_t bmap_last = VDI_UNALLOCATED;
    uint8_t *block = NULL;
@@ -614,10 +617,19 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
               n_bytes, offset);

        /* prepare next AIO request */
+        qemu_co_rwlock_rdlock(&s->bmap_lock);
        bmap_entry = le32_to_cpu(s->bmap[block_index]);
        if (!VDI_IS_ALLOCATED(bmap_entry)) {
            /* Allocate new block and write to it. */
            uint64_t data_offset;
+            qemu_co_rwlock_upgrade(&s->bmap_lock);
+            bmap_entry = le32_to_cpu(s->bmap[block_index]);
+            if (VDI_IS_ALLOCATED(bmap_entry)) {
+                /* A concurrent allocation did the work for us.  */
+                qemu_co_rwlock_downgrade(&s->bmap_lock);
+                goto nonallocating_write;
+            }
+
            bmap_entry = s->header.blocks_allocated;
            s->bmap[block_index] = cpu_to_le32(bmap_entry);
            s->header.blocks_allocated++;
@@ -635,30 +647,18 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
            memset(block + offset_in_block + n_bytes, 0,
                   s->block_size - n_bytes - offset_in_block);

-            /* Note that this coroutine does not yield anywhere from reading the
-             * bmap entry until here, so in regards to all the coroutines trying
-             * to write to this cluster, the one doing the allocation will
-             * always be the first to try to acquire the lock.
-             * Therefore, it is also the first that will actually be able to
-             * acquire the lock and thus the padded cluster is written before
-             * the other coroutines can write to the affected area. */
-            qemu_co_mutex_lock(&s->write_lock);
+            /* Write the new block under CoRwLock write-side protection,
+             * so this full-cluster write does not overlap a partial write
+             * of the same cluster, issued from the "else" branch.
+             */
            ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size);
-            qemu_co_mutex_unlock(&s->write_lock);
+            qemu_co_rwlock_unlock(&s->bmap_lock);
        } else {
-            uint64_t data_offset = s->header.offset_data +
-                                   (uint64_t)bmap_entry * s->block_size +
-                                   offset_in_block;
-            qemu_co_mutex_lock(&s->write_lock);
-            /* This lock is only used to make sure the following write operation
-             * is executed after the write issued by the coroutine allocating
-             * this cluster, therefore we do not need to keep it locked.
-             * As stated above, the allocating coroutine will always try to lock
-             * the mutex before all the other concurrent accesses to that
-             * cluster, therefore at this point we can be absolutely certain
-             * that that write operation has returned (there may be other writes
-             * in flight, but they do not concern this very operation). */
-            qemu_co_mutex_unlock(&s->write_lock);
+nonallocating_write:
+            data_offset = s->header.offset_data +
+                           (uint64_t)bmap_entry * s->block_size +
+                           offset_in_block;
+            qemu_co_rwlock_unlock(&s->bmap_lock);

            qemu_iovec_reset(&local_qiov);
            qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
@@ -832,7 +832,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    if (image_type == VDI_TYPE_STATIC) {
-        ret = blk_truncate(blk, offset + blocks * block_size, errp);
+        ret = blk_truncate(blk, offset + blocks * block_size,
+                           PREALLOC_MODE_OFF, errp);
        if (ret < 0) {
            error_prepend(errp, "Failed to statically allocate %s", filename);
            goto exit;
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -548,7 +548,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
            if (new_file_size % (1024*1024)) {
                /* round up to nearest 1MB boundary */
                new_file_size = ((new_file_size >> 20) + 1) << 20;
-                bdrv_truncate(bs->file, new_file_size, NULL);
+                bdrv_truncate(bs->file, new_file_size, PREALLOC_MODE_OFF, NULL);
            }
        }
        qemu_vfree(desc_entries);
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1171,7 +1171,8 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
    /* per the spec, the address for a block is in units of 1MB */
    *new_offset = ROUND_UP(*new_offset, 1024 * 1024);

-    return bdrv_truncate(bs->file, *new_offset + s->block_size, NULL);
+    return bdrv_truncate(bs->file, *new_offset + s->block_size,
+                         PREALLOC_MODE_OFF, NULL);
 }

 /*
@@ -1607,12 +1608,13 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
    if (type == VHDX_TYPE_DYNAMIC) {
        /* All zeroes, so we can just extend the file - the end of the BAT
         * is the furthest thing we have written yet */
-        ret = blk_truncate(blk, data_file_offset, errp);
+        ret = blk_truncate(blk, data_file_offset, PREALLOC_MODE_OFF, errp);
        if (ret < 0) {
            goto exit;
        }
    } else if (type == VHDX_TYPE_FIXED) {
-        ret = blk_truncate(blk, data_file_offset + image_size, errp);
+        ret = blk_truncate(blk, data_file_offset + image_size,
+                           PREALLOC_MODE_OFF, errp);
        if (ret < 0) {
            goto exit;
        }
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -242,10 +242,11 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
    s->extents = g_renew(VmdkExtent, s->extents, s->num_extents);
 }

-static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
+/* Return -ve errno, or 0 on success and write CID into *pcid. */
+static int vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid)
 {
    char *desc;
-    uint32_t cid = 0xffffffff;
+    uint32_t cid;
    const char *p_name, *cid_str;
    size_t cid_str_size;
    BDRVVmdkState *s = bs->opaque;
@@ -254,8 +255,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
    desc = g_malloc0(DESC_SIZE);
    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
    if (ret < 0) {
-        g_free(desc);
-        return 0;
+        goto out;
    }

    if (parent) {
@@ -268,13 +268,21 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)

    desc[DESC_SIZE - 1] = '\0';
    p_name = strstr(desc, cid_str);
-    if (p_name != NULL) {
-        p_name += cid_str_size;
-        sscanf(p_name, "%" SCNx32, &cid);
+    if (p_name == NULL) {
+        ret = -EINVAL;
+        goto out;
    }
+    p_name += cid_str_size;
+    if (sscanf(p_name, "%" SCNx32, &cid) != 1) {
+        ret = -EINVAL;
+        goto out;
+    }
+    *pcid = cid;
+    ret = 0;

+out:
    g_free(desc);
-    return cid;
+    return ret;
 }

 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
@@ -322,7 +330,10 @@ static int vmdk_is_cid_valid(BlockDriverState *bs)
    if (!s->cid_checked && bs->backing) {
        BlockDriverState *p_bs = bs->backing->bs;

-        cur_pcid = vmdk_read_cid(p_bs, 0);
+        if (vmdk_read_cid(p_bs, 0, &cur_pcid) != 0) {
+            /* read failure: report as not valid */
+            return 0;
+        }
        if (s->parent_cid != cur_pcid) {
            /* CID not valid */
            return 0;
@@ -975,8 +986,14 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    if (ret) {
        goto fail;
    }
-    s->cid = vmdk_read_cid(bs, 0);
-    s->parent_cid = vmdk_read_cid(bs, 1);
+    ret = vmdk_read_cid(bs, 0, &s->cid);
+    if (ret) {
+        goto fail;
+    }
+    ret = vmdk_read_cid(bs, 1, &s->parent_cid);
+    if (ret) {
+        goto fail;
+    }
    qemu_co_mutex_init(&s->lock);

    /* Disable migration when VMDK images are used */
@@ -1714,7 +1731,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    blk_set_allow_write_beyond_eof(blk, true);

    if (flat) {
-        ret = blk_truncate(blk, filesize, errp);
+        ret = blk_truncate(blk, filesize, PREALLOC_MODE_OFF, errp);
        goto exit;
    }
    magic = cpu_to_be32(VMDK4_MAGIC);
@@ -1777,7 +1794,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
        goto exit;
    }

-    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, errp);
+    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9,
+                       PREALLOC_MODE_OFF, errp);
    if (ret < 0) {
        goto exit;
    }
@@ -2007,8 +2025,11 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
            ret = -EINVAL;
            goto exit;
        }
-        parent_cid = vmdk_read_cid(blk_bs(blk), 0);
+        ret = vmdk_read_cid(blk_bs(blk), 0, &parent_cid);
        blk_unref(blk);
+        if (ret) {
+            goto exit;
+        }
        snprintf(parent_desc_line, BUF_SIZE,
                "parentFileNameHint=\"%s\"", backing_file);
    }
@@ -2086,7 +2107,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    /* bdrv_pwrite write padding zeros to align to sector, we don't need that
     * for description file */
    if (desc_offset == 0) {
-        ret = blk_truncate(new_blk, desc_len, errp);
+        ret = blk_truncate(new_blk, desc_len, PREALLOC_MODE_OFF, errp);
    }
 exit:
    if (new_blk) {
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -460,17 +460,23 @@ static int vpc_reopen_prepare(BDRVReopenState *state,
 /*
 * Returns the absolute byte offset of the given sector in the image file.
 * If the sector is not allocated, -1 is returned instead.
+ * If an error occurred trying to write an updated block bitmap back to
+ * the file, -2 is returned, and the error value is written to *err.
+ * This can only happen for a write operation.
 *
 * The parameter write must be 1 if the offset will be used for a write
 * operation (the block bitmaps is updated then), 0 otherwise.
+ * If write is true then err must not be NULL.
 */
 static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
-                                       bool write)
+                                       bool write, int *err)
 {
    BDRVVPCState *s = bs->opaque;
    uint64_t bitmap_offset, block_offset;
    uint32_t pagetable_index, offset_in_block;

+    assert(!(write && err == NULL));
+
    pagetable_index = offset / s->block_size;
    offset_in_block = offset % s->block_size;

@@ -487,21 +493,20 @@ static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
       correctness. */
    if (write && (s->last_bitmap_offset != bitmap_offset)) {
        uint8_t bitmap[s->bitmap_size];
+        int r;

        s->last_bitmap_offset = bitmap_offset;
        memset(bitmap, 0xff, s->bitmap_size);
-        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+        r = bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+        if (r < 0) {
+            *err = r;
+            return -2;
+        }
    }

    return block_offset;
 }

-static inline int64_t get_sector_offset(BlockDriverState *bs,
-                                        int64_t sector_num, bool write)
-{
-    return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
-}
-
 /*
 * Writes the footer to the end of the image file. This is needed when the
 * file grows as it overwrites the old footer
@@ -567,7 +572,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
    if (ret < 0)
        goto fail;

-    return get_image_offset(bs, offset, false);
+    return get_image_offset(bs, offset, false, NULL);

 fail:
    s->free_data_block_offset -= (s->block_size + s->bitmap_size);
@@ -607,7 +612,7 @@ vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    qemu_iovec_init(&local_qiov, qiov->niov);

    while (bytes > 0) {
-        image_offset = get_image_offset(bs, offset, false);
+        image_offset = get_image_offset(bs, offset, false, NULL);
        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));

        if (image_offset == -1) {
@@ -656,7 +661,11 @@ vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    qemu_iovec_init(&local_qiov, qiov->niov);

    while (bytes > 0) {
-        image_offset = get_image_offset(bs, offset, true);
+        image_offset = get_image_offset(bs, offset, true, &ret);
+        if (image_offset == -2) {
+            /* Failed to write block bitmap: can't proceed with write */
+            goto fail;
+        }
        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));

        if (image_offset == -1) {
@@ -696,6 +705,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
    VHDFooter *footer = (VHDFooter*) s->footer_buf;
    int64_t start, offset;
    bool allocated;
+    int64_t ret;
    int n;

    if (be32_to_cpu(footer->type) == VHD_FIXED) {
@@ -705,10 +715,13 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
               (sector_num << BDRV_SECTOR_BITS);
    }

-    offset = get_sector_offset(bs, sector_num, 0);
+    qemu_co_mutex_lock(&s->lock);
+
+    offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false, NULL);
    start = offset;
    allocated = (offset != -1);
    *pnum = 0;
+    ret = 0;

    do {
        /* All sectors in a block are contiguous (without using the bitmap) */
@@ -723,15 +736,18 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
         * sectors since there is always a bitmap in between. */
        if (allocated) {
            *file = bs->file->bs;
-            return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+            break;
        }
        if (nb_sectors == 0) {
            break;
        }
-        offset = get_sector_offset(bs, sector_num, 0);
+        offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false,
+                                  NULL);
    } while (offset == -1);

-    return 0;
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
 }

 /*
@@ -858,7 +874,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
    /* Add footer to total size */
    total_size += HEADER_SIZE;

-    ret = blk_truncate(blk, total_size, errp);
+    ret = blk_truncate(blk, total_size, PREALLOC_MODE_OFF, errp);
    if (ret < 0) {
        return ret;
    }
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -71,6 +71,17 @@ void nonono(const char* file, int line, const char* msg) {

 #endif

+/* bootsector OEM name. see related compatibility problems at:
+ * https://jdebp.eu/FGA/volume-boot-block-oem-name-field.html
+ * http://seasip.info/Misc/oemid.html
+ */
+#define BOOTSECTOR_OEM_NAME "MSWIN4.1"
+
+#define DIR_DELETED 0xe5
+#define DIR_KANJI DIR_DELETED
+#define DIR_KANJI_FAKE 0x05
+#define DIR_FREE 0x00
+
 /* dynamic array functions */
 typedef struct array_t {
    char* pointer;
@@ -104,6 +115,7 @@ static inline int array_ensure_allocated(array_t* array, int index)
        array->pointer = g_realloc(array->pointer, new_size);
        if (!array->pointer)
            return -1;
+        memset(array->pointer + array->size, 0, new_size - array->size);
        array->size = new_size;
        array->next = index + 1;
    }
@@ -466,7 +478,7 @@ static direntry_t *create_long_filename(BDRVVVFATState *s, const char *filename)

 static char is_free(const direntry_t* direntry)
 {
-    return direntry->name[0]==0xe5 || direntry->name[0]==0x00;
+    return direntry->name[0] == DIR_DELETED || direntry->name[0] == DIR_FREE;
 }

 static char is_volume_label(const direntry_t* direntry)
@@ -487,7 +499,7 @@ static char is_short_name(const direntry_t* direntry)

 static char is_directory(const direntry_t* direntry)
 {
-    return direntry->attributes & 0x10 && direntry->name[0] != 0xe5;
+    return direntry->attributes & 0x10 && direntry->name[0] != DIR_DELETED;
 }

 static inline char is_dot(const direntry_t* direntry)
@@ -537,7 +549,7 @@ static direntry_t *create_short_filename(BDRVVVFATState *s,
    const gchar *p, *last_dot = NULL;
    gunichar c;
    bool lossy_conversion = false;
-    char tail[11];
+    char tail[8];

    if (!entry) {
        return NULL;
@@ -589,8 +601,8 @@ static direntry_t *create_short_filename(BDRVVVFATState *s,
        }
    }

-    if (entry->name[0] == 0xe5) {
-        entry->name[0] = 0x05;
+    if (entry->name[0] == DIR_KANJI) {
+        entry->name[0] = DIR_KANJI_FAKE;
    }

    /* numeric-tail generation */
@@ -602,7 +614,8 @@ static direntry_t *create_short_filename(BDRVVVFATState *s,
    for (i = lossy_conversion ? 1 : 0; i < 999999; i++) {
        direntry_t *entry1;
        if (i > 0) {
-            int len = sprintf(tail, "~%d", i);
+            int len = snprintf(tail, sizeof(tail), "~%u", (unsigned)i);
+            assert(len <= 7);
            memcpy(entry->name + MIN(j, 8 - len), tail, len);
        }
        for (entry1 = array_get(&(s->directory), directory_start);
@@ -1023,7 +1036,7 @@ static int init_directories(BDRVVVFATState* s,
    bootsector->jump[0]=0xeb;
    bootsector->jump[1]=0x3e;
    bootsector->jump[2]=0x90;
-    memcpy(bootsector->name, "MSWIN4.1", 8);
+    memcpy(bootsector->name, BOOTSECTOR_OEM_NAME, 8);
    bootsector->sector_size=cpu_to_le16(0x200);
    bootsector->sectors_per_cluster=s->sectors_per_cluster;
    bootsector->reserved_sectors=cpu_to_le16(1);
@@ -1658,6 +1671,7 @@ typedef struct {
     * filename length is 0x3f * 13 bytes.
     */
    unsigned char name[0x3f * 13 + 1];
+    gunichar2 name2[0x3f * 13 + 1];
    int checksum, len;
    int sequence_number;
 } long_file_name;
@@ -1679,16 +1693,21 @@ static int parse_long_name(long_file_name* lfn,
        return 1;

    if (pointer[0] & 0x40) {
+        /* first entry; do some initialization */
        lfn->sequence_number = pointer[0] & 0x3f;
        lfn->checksum = pointer[13];
        lfn->name[0] = 0;
        lfn->name[lfn->sequence_number * 13] = 0;
-    } else if ((pointer[0] & 0x3f) != --lfn->sequence_number)
+    } else if ((pointer[0] & 0x3f) != --lfn->sequence_number) {
+        /* not the expected sequence number */
        return -1;
-    else if (pointer[13] != lfn->checksum)
+    } else if (pointer[13] != lfn->checksum) {
+        /* not the expected checksum */
        return -2;
-    else if (pointer[12] || pointer[26] || pointer[27])
+    } else if (pointer[12] || pointer[26] || pointer[27]) {
+        /* invalid zero fields */
        return -3;
+    }

    offset = 13 * (lfn->sequence_number - 1);
    for (i = 0, j = 1; i < 13; i++, j+=2) {
@@ -1697,16 +1716,29 @@ static int parse_long_name(long_file_name* lfn,
        else if (j == 26)
            j = 28;

-        if (pointer[j+1] == 0)
-            lfn->name[offset + i] = pointer[j];
-        else if (pointer[j+1] != 0xff || (pointer[0] & 0x40) == 0)
-            return -4;
-        else
-            lfn->name[offset + i] = 0;
+        if (pointer[j] == 0 && pointer[j + 1] == 0) {
+            /* end of long file name */
+            break;
+        }
+        gunichar2 c = (pointer[j + 1] << 8) + pointer[j];
+        lfn->name2[offset + i] = c;
    }

-    if (pointer[0] & 0x40)
-        lfn->len = offset + strlen((char*)lfn->name + offset);
+    if (pointer[0] & 0x40) {
+        /* first entry; set len */
+        lfn->len = offset + i;
+    }
+    if ((pointer[0] & 0x3f) == 0x01) {
+        /* last entry; finalize entry */
+        glong olen;
+        gchar *utf8 = g_utf16_to_utf8(lfn->name2, lfn->len, NULL, &olen, NULL);
+        if (!utf8) {
+            return -4;
+        }
+        lfn->len = olen;
+        memcpy(lfn->name, utf8, olen + 1);
+        g_free(utf8);
+    }

    return 0;
 }
@@ -1722,12 +1754,14 @@ static int parse_short_name(BDRVVVFATState* s,

    for (j = 7; j >= 0 && direntry->name[j] == ' '; j--);
    for (i = 0; i <= j; i++) {
-        if (direntry->name[i] <= ' ' || direntry->name[i] > 0x7f)
+        uint8_t c = direntry->name[i];
+        if (c != to_valid_short_char(c)) {
            return -1;
-        else if (s->downcase_short_names)
+        } else if (s->downcase_short_names) {
            lfn->name[i] = qemu_tolower(direntry->name[i]);
-        else
+        } else {
            lfn->name[i] = direntry->name[i];
+        }
    }

    for (j = 2; j >= 0 && direntry->name[8 + j] == ' '; j--) {
@@ -1737,7 +1771,7 @@ static int parse_short_name(BDRVVVFATState* s,
        lfn->name[i + j + 1] = '\0';
        for (;j >= 0; j--) {
            uint8_t c = direntry->name[8 + j];
-            if (c <= ' ' || c > 0x7f) {
+            if (c != to_valid_short_char(c)) {
                return -2;
            } else if (s->downcase_short_names) {
                lfn->name[i + j] = qemu_tolower(c);
@@ -1748,8 +1782,8 @@ static int parse_short_name(BDRVVVFATState* s,
    } else
        lfn->name[i + j + 1] = '\0';

-    if (lfn->name[0] == 0x05) {
-        lfn->name[0] = 0xe5;
+    if (lfn->name[0] == DIR_KANJI_FAKE) {
+        lfn->name[0] = DIR_KANJI;
    }
    lfn->len = strlen((char*)lfn->name);

@@ -2955,7 +2989,6 @@ DLOG(checkpoint());
    /*
     * Some sanity checks:
     * - do not allow writing to the boot sector
-     * - do not allow to write non-ASCII filenames
     */

    if (sector_num < s->offset_to_fat)
@@ -2989,13 +3022,8 @@ DLOG(checkpoint());
                direntries = (direntry_t*)(buf + 0x200 * (begin - sector_num));

                for (k = 0; k < (end - begin) * 0x10; k++) {
-                    /* do not allow non-ASCII filenames */
-                    if (parse_long_name(&lfn, direntries + k) < 0) {
-                        fprintf(stderr, "Warning: non-ASCII filename\n");
-                        return -1;
-                    }
                    /* no access to the direntry of a read-only file */
-                    else if (is_short_name(direntries+k) &&
+                    if (is_short_name(direntries + k) &&
                            (direntries[k].attributes & 1)) {
                        if (memcmp(direntries + k,
                                    array_get(&(s->directory), dir_index + k),
@@ -3078,8 +3106,14 @@ static int coroutine_fn
 write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                    QEMUIOVector *qiov, int flags)
 {
+    int ret;
+
    BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    return try_commit(s);
+    qemu_co_mutex_lock(&s->lock);
+    ret = try_commit(s);
+    qemu_co_mutex_unlock(&s->lock);
+
+    return ret;
 }

 static void write_target_close(BlockDriverState *bs) {
--- a/blockdev.c
+++ b/blockdev.c
@@ -593,10 +593,6 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,

        bs->detect_zeroes = detect_zeroes;

-        if (bdrv_key_required(bs)) {
-            autostart = 0;
-        }
-
        block_acct_setup(blk_get_stats(blk), account_invalid, account_failed);

        if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) {
@@ -914,7 +910,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    copy_on_read = qemu_opt_get_bool(legacy_opts, "copy-on-read", false);

    if (read_only && copy_on_read) {
-        error_report("warning: disabling copy-on-read on read-only drive");
+        warn_report("disabling copy-on-read on read-only drive");
        copy_on_read = false;
    }

@@ -1714,7 +1710,8 @@ static void external_snapshot_prepare(BlkActionState *common,
        }

        flags = state->old_bs->open_flags;
-        flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
+        flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_COPY_ON_READ);
+        flags |= BDRV_O_NO_BACKING;

        /* create new image w/backing file */
        mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
@@ -1739,8 +1736,6 @@ static void external_snapshot_prepare(BlkActionState *common,
            qdict_put_str(options, "node-name", snapshot_node_name);
        }
        qdict_put_str(options, "driver", format);
-
-        flags |= BDRV_O_NO_BACKING;
    }

    state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags,
@@ -1987,6 +1982,8 @@ static void block_dirty_bitmap_add_prepare(BlkActionState *common,
    /* AIO context taken and released within qmp_block_dirty_bitmap_add */
    qmp_block_dirty_bitmap_add(action->node, action->name,
                               action->has_granularity, action->granularity,
+                               action->has_persistent, action->persistent,
+                               action->has_autoload, action->autoload,
                               &local_err);

    if (!local_err) {
@@ -2037,6 +2034,9 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
    } else if (!bdrv_dirty_bitmap_enabled(state->bitmap)) {
        error_setg(errp, "Cannot clear a disabled bitmap");
        return;
+    } else if (bdrv_dirty_bitmap_readonly(state->bitmap)) {
+        error_setg(errp, "Cannot clear a readonly bitmap");
+        return;
    }

    bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
@@ -2265,24 +2265,8 @@ void qmp_block_passwd(bool has_device, const char *device,
                      bool has_node_name, const char *node_name,
                      const char *password, Error **errp)
 {
-    Error *local_err = NULL;
-    BlockDriverState *bs;
-    AioContext *aio_context;
-
-    bs = bdrv_lookup_bs(has_device ? device : NULL,
-                        has_node_name ? node_name : NULL,
-                        &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
-
-    bdrv_add_key(bs, password, errp);
-
-    aio_context_release(aio_context);
+    error_setg(errp,
+               "Setting block passwords directly is no longer supported");
 }

 /*
@@ -2591,12 +2575,6 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
        goto fail;
    }

-    bdrv_add_key(medium_bs, NULL, &err);
-    if (err) {
-        error_propagate(errp, err);
-        goto fail;
-    }
-
    rc = do_open_tray(has_device ? device : NULL,
                      has_id ? id : NULL,
                      false, &err);
@@ -2731,9 +2709,12 @@ out:

 void qmp_block_dirty_bitmap_add(const char *node, const char *name,
                                bool has_granularity, uint32_t granularity,
+                                bool has_persistent, bool persistent,
+                                bool has_autoload, bool autoload,
                                Error **errp)
 {
    BlockDriverState *bs;
+    BdrvDirtyBitmap *bitmap;

    if (!name || name[0] == '\0') {
        error_setg(errp, "Bitmap name cannot be empty");
@@ -2756,7 +2737,32 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
        granularity = bdrv_get_default_bitmap_granularity(bs);
    }

-    bdrv_create_dirty_bitmap(bs, granularity, name, errp);
+    if (!has_persistent) {
+        persistent = false;
+    }
+    if (!has_autoload) {
+        autoload = false;
+    }
+
+    if (has_autoload && !persistent) {
+        error_setg(errp, "Autoload flag must be used only for persistent "
+                         "bitmaps");
+        return;
+    }
+
+    if (persistent &&
+        !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp))
+    {
+        return;
+    }
+
+    bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp);
+    if (bitmap == NULL) {
+        return;
+    }
+
+    bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
+    bdrv_dirty_bitmap_set_autoload(bitmap, autoload);
 }

 void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
@@ -2764,6 +2770,7 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
 {
    BlockDriverState *bs;
    BdrvDirtyBitmap *bitmap;
+    Error *local_err = NULL;

    bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
    if (!bitmap || !bs) {
@@ -2776,6 +2783,15 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
                   name);
        return;
    }
+
+    if (bdrv_dirty_bitmap_get_persistance(bitmap)) {
+        bdrv_remove_persistent_dirty_bitmap(bs, name, &local_err);
+        if (local_err != NULL) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
    bdrv_dirty_bitmap_make_anon(bitmap);
    bdrv_release_dirty_bitmap(bs, bitmap);
 }
@@ -2805,11 +2821,39 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
                   "Bitmap '%s' is currently disabled and cannot be cleared",
                   name);
        return;
+    } else if (bdrv_dirty_bitmap_readonly(bitmap)) {
+        error_setg(errp, "Bitmap '%s' is readonly and cannot be cleared", name);
+        return;
    }

    bdrv_clear_dirty_bitmap(bitmap, NULL);
 }

+BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
+                                                              const char *name,
+                                                              Error **errp)
+{
+    BdrvDirtyBitmap *bitmap;
+    BlockDriverState *bs;
+    BlockDirtyBitmapSha256 *ret = NULL;
+    char *sha256;
+
+    bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
+    if (!bitmap || !bs) {
+        return NULL;
+    }
+
+    sha256 = bdrv_dirty_bitmap_sha256(bitmap, errp);
+    if (sha256 == NULL) {
+        return NULL;
+    }
+
+    ret = g_new(BlockDirtyBitmapSha256, 1);
+    ret->sha256 = sha256;
+
+    return ret;
+}
+
 void hmp_drive_del(Monitor *mon, const QDict *qdict)
 {
    const char *id = qdict_get_str(qdict, "id");
@@ -2913,7 +2957,7 @@ void qmp_block_resize(bool has_device, const char *device,
    }

    bdrv_drained_begin(bs);
-    ret = blk_truncate(blk, size, errp);
+    ret = blk_truncate(blk, size, PREALLOC_MODE_OFF, errp);
    bdrv_drained_end(bs);

 out:
@@ -3503,6 +3547,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
        backing_mode = MIRROR_OPEN_BACKING_CHAIN;
    }

+    /* Don't open backing image in create() */
+    flags |= BDRV_O_NO_BACKING;
+
    if ((arg->sync == MIRROR_SYNC_MODE_FULL || !source)
        && arg->mode != NEW_IMAGE_MODE_EXISTING)
    {
@@ -3542,8 +3589,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
    /* Mirroring takes care of copy-on-write using the source's backing
     * file.
     */
-    target_bs = bdrv_open(arg->target, NULL, options,
-                          flags | BDRV_O_NO_BACKING, errp);
+    target_bs = bdrv_open(arg->target, NULL, options, flags, errp);
    if (!target_bs) {
        goto out;
    }
@@ -3866,13 +3912,6 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)

    QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);

-    if (bs && bdrv_key_required(bs)) {
-        QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
-        bdrv_unref(bs);
-        error_setg(errp, "blockdev-add doesn't support encrypted devices");
-        goto fail;
-    }
-
 fail:
    visit_free(v);
 }
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -85,6 +85,8 @@ struct emulated_sigtable {
 /* NOTE: we force a big alignment so that the stack stored after is
   aligned too */
 typedef struct TaskState {
+    pid_t ts_tid;     /* tid (or pid) of this task */
+
    struct TaskState *next;
    int used; /* non zero if used */
    struct image_info *info;
--- a/chardev/char-fe.c
+++ b/chardev/char-fe.c
@@ -179,9 +179,21 @@ void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...)

 Chardev *qemu_chr_fe_get_driver(CharBackend *be)
 {
+    /* this is unsafe for the users that support chardev hotswap */
+    assert(be->chr_be_change == NULL);
    return be->chr;
 }

+bool qemu_chr_fe_backend_connected(CharBackend *be)
+{
+    return !!be->chr;
+}
+
+bool qemu_chr_fe_backend_open(CharBackend *be)
+{
+    return be->chr && be->chr->be_open;
+}
+
 bool qemu_chr_fe_init(CharBackend *b, Chardev *s, Error **errp)
 {
    int tag = 0;
@@ -216,7 +228,7 @@ void qemu_chr_fe_deinit(CharBackend *b, bool del)
    assert(b);

    if (b->chr) {
-        qemu_chr_fe_set_handlers(b, NULL, NULL, NULL, NULL, NULL, true);
+        qemu_chr_fe_set_handlers(b, NULL, NULL, NULL, NULL, NULL, NULL, true);
        if (b->chr->be == b) {
            b->chr->be = NULL;
        }
@@ -235,6 +247,7 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
                              IOCanReadHandler *fd_can_read,
                              IOReadHandler *fd_read,
                              IOEventHandler *fd_event,
+                              BackendChangeHandler *be_change,
                              void *opaque,
                              GMainContext *context,
                              bool set_open)
@@ -258,6 +271,7 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
    b->chr_can_read = fd_can_read;
    b->chr_read = fd_read;
    b->chr_event = fd_event;
+    b->chr_be_change = be_change;
    b->opaque = opaque;
    if (cc->chr_update_read_handler) {
        cc->chr_update_read_handler(s, context);
--- a/chardev/char-mux.c
+++ b/chardev/char-mux.c
@@ -278,6 +278,7 @@ void mux_chr_set_handlers(Chardev *chr, GMainContext *context)
                             mux_chr_can_read,
                             mux_chr_read,
                             mux_chr_event,
+                             NULL,
                             chr,
                             context, true);
 }
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -454,7 +454,9 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
        return 0;
    }

+    qio_channel_set_blocking(s->ioc, true, NULL);
    size = tcp_chr_recv(chr, (void *) buf, len);
+    qio_channel_set_blocking(s->ioc, false, NULL);
    if (size == 0) {
        /* connection closed */
        tcp_chr_disconnect(chr);
@@ -765,8 +767,8 @@ static int tcp_chr_wait_connected(Chardev *chr, Error **errp)
     * in TLS and telnet cases, only wait for an accepted socket */
    while (!s->ioc) {
        if (s->is_listen) {
-            error_report("QEMU waiting for connection on: %s",
-                         chr->filename);
+            info_report("QEMU waiting for connection on: %s",
+                        chr->filename);
            qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), true, NULL);
            tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr);
            qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL);
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -556,17 +556,23 @@ help_string_append(const char *name, void *opaque)
    g_string_append_printf(str, "\n%s", name);
 }

-Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
-                                Error **errp)
+static const char *chardev_alias_translate(const char *name)
+{
+    int i;
+    for (i = 0; i < (int)ARRAY_SIZE(chardev_alias_table); i++) {
+        if (g_strcmp0(chardev_alias_table[i].alias, name) == 0) {
+            return chardev_alias_table[i].typename;
+        }
+    }
+    return name;
+}
+
+ChardevBackend *qemu_chr_parse_opts(QemuOpts *opts, Error **errp)
 {
    Error *local_err = NULL;
    const ChardevClass *cc;
-    Chardev *chr;
-    int i;
    ChardevBackend *backend = NULL;
-    const char *name = qemu_opt_get(opts, "backend");
-    const char *id = qemu_opts_id(opts);
-    char *bid = NULL;
+    const char *name = chardev_alias_translate(qemu_opt_get(opts, "backend"));

    if (name == NULL) {
        error_setg(errp, "chardev: \"%s\" missing backend",
@@ -574,7 +580,40 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
        return NULL;
    }

-    if (is_help_option(name)) {
+    cc = char_get_class(name, errp);
+    if (cc == NULL) {
+        return NULL;
+    }
+
+    backend = g_new0(ChardevBackend, 1);
+    backend->type = CHARDEV_BACKEND_KIND_NULL;
+
+    if (cc->parse) {
+        cc->parse(opts, backend, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            qapi_free_ChardevBackend(backend);
+            return NULL;
+        }
+    } else {
+        ChardevCommon *ccom = g_new0(ChardevCommon, 1);
+        qemu_chr_parse_common(opts, ccom);
+        backend->u.null.data = ccom; /* Any ChardevCommon member would work */
+    }
+
+    return backend;
+}
+
+Chardev *qemu_chr_new_from_opts(QemuOpts *opts, Error **errp)
+{
+    const ChardevClass *cc;
+    Chardev *chr = NULL;
+    ChardevBackend *backend = NULL;
+    const char *name = chardev_alias_translate(qemu_opt_get(opts, "backend"));
+    const char *id = qemu_opts_id(opts);
+    char *bid = NULL;
+
+    if (name && is_help_option(name)) {
        GString *str = g_string_new("");

        chardev_name_foreach(help_string_append, str);
@@ -589,38 +628,20 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
        return NULL;
    }

-    for (i = 0; i < (int)ARRAY_SIZE(chardev_alias_table); i++) {
-        if (g_strcmp0(chardev_alias_table[i].alias, name) == 0) {
-            name = chardev_alias_table[i].typename;
-            break;
-        }
+    backend = qemu_chr_parse_opts(opts, errp);
+    if (backend == NULL) {
+        return NULL;
    }

    cc = char_get_class(name, errp);
    if (cc == NULL) {
-        return NULL;
+        goto out;
    }

-    backend = g_new0(ChardevBackend, 1);
-    backend->type = CHARDEV_BACKEND_KIND_NULL;
-
    if (qemu_opt_get_bool(opts, "mux", 0)) {
        bid = g_strdup_printf("%s-base", id);
    }

-    chr = NULL;
-    if (cc->parse) {
-        cc->parse(opts, backend, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            goto out;
-        }
-    } else {
-        ChardevCommon *ccom = g_new0(ChardevCommon, 1);
-        qemu_chr_parse_common(opts, ccom);
-        backend->u.null.data = ccom; /* Any ChardevCommon member would work */
-    }
-
    chr = qemu_chardev_new(bid ? bid : id,
                           object_class_get_name(OBJECT_CLASS(cc)),
                           backend, errp);
@@ -930,6 +951,89 @@ ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend,
    return ret;
 }

+ChardevReturn *qmp_chardev_change(const char *id, ChardevBackend *backend,
+                                  Error **errp)
+{
+    CharBackend *be;
+    const ChardevClass *cc;
+    Chardev *chr, *chr_new;
+    bool closed_sent = false;
+    ChardevReturn *ret;
+
+    chr = qemu_chr_find(id);
+    if (!chr) {
+        error_setg(errp, "Chardev '%s' does not exist", id);
+        return NULL;
+    }
+
+    if (CHARDEV_IS_MUX(chr)) {
+        error_setg(errp, "Mux device hotswap not supported yet");
+        return NULL;
+    }
+
+    if (qemu_chr_replay(chr)) {
+        error_setg(errp,
+            "Chardev '%s' cannot be changed in record/replay mode", id);
+        return NULL;
+    }
+
+    be = chr->be;
+    if (!be) {
+        /* easy case */
+        object_unparent(OBJECT(chr));
+        return qmp_chardev_add(id, backend, errp);
+    }
+
+    if (!be->chr_be_change) {
+        error_setg(errp, "Chardev user does not support chardev hotswap");
+        return NULL;
+    }
+
+    cc = char_get_class(ChardevBackendKind_lookup[backend->type], errp);
+    if (!cc) {
+        return NULL;
+    }
+
+    chr_new = qemu_chardev_new(NULL, object_class_get_name(OBJECT_CLASS(cc)),
+                               backend, errp);
+    if (!chr_new) {
+        return NULL;
+    }
+    chr_new->label = g_strdup(id);
+
+    if (chr->be_open && !chr_new->be_open) {
+        qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+        closed_sent = true;
+    }
+
+    chr->be = NULL;
+    qemu_chr_fe_init(be, chr_new, &error_abort);
+
+    if (be->chr_be_change(be->opaque) < 0) {
+        error_setg(errp, "Chardev '%s' change failed", chr_new->label);
+        chr_new->be = NULL;
+        qemu_chr_fe_init(be, chr, &error_abort);
+        if (closed_sent) {
+            qemu_chr_be_event(chr, CHR_EVENT_OPENED);
+        }
+        object_unref(OBJECT(chr_new));
+        return NULL;
+    }
+
+    object_unparent(OBJECT(chr));
+    object_property_add_child(get_chardevs_root(), chr_new->label,
+                              OBJECT(chr_new), &error_abort);
+    object_unref(OBJECT(chr_new));
+
+    ret = g_new0(ChardevReturn, 1);
+    if (CHARDEV_IS_PTY(chr_new)) {
+        ret->pty = g_strdup(chr_new->filename + 4);
+        ret->has_pty = true;
+    }
+
+    return ret;
+}
+
 void qmp_chardev_remove(const char *id, Error **errp)
 {
    Chardev *chr;
--- a/86
+++ b/86
@@ -375,6 +375,7 @@ libnfs=""
 coroutine=""
 coroutine_pool=""
 debug_stack_usage="no"
+crypto_afalg="no"
 seccomp=""
 glusterfs=""
 glusterfs_xlator_opt="no"
@@ -1124,6 +1125,10 @@ for opt do
  ;;
  --enable-debug-stack-usage) debug_stack_usage="yes"
  ;;
+  --enable-crypto-afalg) crypto_afalg="yes"
+  ;;
+  --disable-crypto-afalg) crypto_afalg="no"
+  ;;
  --disable-docs) docs="no"
  ;;
  --enable-docs) docs="yes"
@@ -1518,6 +1523,7 @@ disabled with --disable-FEATURE, default is enabled if available:
  qom-cast-debug  cast debugging support
  tools           build qemu-io, qemu-nbd and qemu-image tools
  vxhs            Veritas HyperScale vDisk backend support
+  crypto-afalg    Linux AF_ALG crypto backend driver

 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -1583,7 +1589,7 @@ gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits"
 gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags"
 gcc_flags="-Wno-missing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
 gcc_flags="-Wendif-labels -Wno-shift-negative-value $gcc_flags"
-gcc_flags="-Wno-initializer-overrides $gcc_flags"
+gcc_flags="-Wno-initializer-overrides -Wexpansion-to-defined $gcc_flags"
 gcc_flags="-Wno-string-plus-int $gcc_flags"
 # Note that we do not add -Werror to gcc_flags here, because that would
 # enable it for all configure tests. If a configure test failed due
@@ -2107,6 +2113,24 @@ EOF
    # Xen unstable
    elif
        cat > $TMPC <<EOF &&
+#undef XC_WANT_COMPAT_MAP_FOREIGN_API
+#include <xenforeignmemory.h>
+int main(void) {
+  xenforeignmemory_handle *xfmem;
+
+  xfmem = xenforeignmemory_open(0, 0);
+  xenforeignmemory_map2(xfmem, 0, 0, 0, 0, 0, 0, 0);
+
+  return 0;
+}
+EOF
+        compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs"
+      then
+      xen_stable_libs="-lxendevicemodel $xen_stable_libs"
+      xen_ctrl_version=41000
+      xen=yes
+    elif
+        cat > $TMPC <<EOF &&
 #undef XC_WANT_COMPAT_DEVICEMODEL_API
 #define __XEN_TOOLS__
 #include <xendevicemodel.h>
@@ -2733,7 +2757,11 @@ int main( void ) { return SDL_Init (SDL_INIT_VIDEO); }
 EOF
  sdl_cflags=$($sdlconfig --cflags 2>/dev/null)
  if test "$static" = "yes" ; then
-    sdl_libs=$($sdlconfig --static-libs 2>/dev/null)
+    if $pkg_config $sdlname --exists; then
+      sdl_libs=$($pkg_config $sdlname --static --libs 2>/dev/null)
+    else
+      sdl_libs=$($sdlconfig --static-libs 2>/dev/null)
+    fi
  else
    sdl_libs=$($sdlconfig --libs 2>/dev/null)
  fi
@@ -4767,7 +4795,7 @@ fi
 if test "$fortify_source" != "no"; then
  if echo | $cc -dM -E - | grep __clang__ > /dev/null 2>&1 ; then
    fortify_source="no";
-  elif test -n "$cxx" &&
+  elif test -n "$cxx" && has $cxx &&
       echo | $cxx -dM -E - | grep __clang__ >/dev/null 2>&1 ; then
    fortify_source="no";
  else
@@ -4830,6 +4858,32 @@ if compile_prog "" "" ; then
    have_af_vsock=yes
 fi

+##########################################
+# check for usable AF_ALG environment
+hava_afalg=no
+cat > $TMPC << EOF
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/if_alg.h>
+int main(void) {
+    int sock;
+    sock = socket(AF_ALG, SOCK_SEQPACKET, 0);
+    return sock;
+}
+EOF
+if compile_prog "" "" ; then
+    have_afalg=yes
+fi
+if test "$crypto_afalg" = "yes"
+then
+    if test "$have_afalg" != "yes"
+    then
+	error_exit "AF_ALG requested but could not be detected"
+    fi
+fi
+
+
 #################################################
 # Sparc implicitly links with --relax, which is
 # incompatible with -r, so --no-relax should be
@@ -4910,6 +4964,21 @@ if compile_prog "" "" ; then
    have_static_assert=yes
 fi

+##########################################
+# check for utmpx.h, it is missing e.g. on OpenBSD
+
+have_utmpx=no
+cat > $TMPC << EOF
+#include <utmpx.h>
+struct utmpx user_info;
+int main(void) {
+    return 0;
+}
+EOF
+if compile_prog "" "" ; then
+    have_utmpx=yes
+fi
+
 ##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
@@ -5296,6 +5365,7 @@ echo "seccomp support   $seccomp"
 echo "coroutine backend $coroutine"
 echo "coroutine pool    $coroutine_pool"
 echo "debug stack usage $debug_stack_usage"
+echo "crypto afalg      $crypto_afalg"
 echo "GlusterFS support $glusterfs"
 echo "gcov              $gcov_tool"
 echo "gcov enabled      $gcov"
@@ -5807,6 +5877,10 @@ if test "$debug_stack_usage" = "yes" ; then
  echo "CONFIG_DEBUG_STACK_USAGE=y" >> $config_host_mak
 fi

+if test "$crypto_afalg" = "yes" ; then
+  echo "CONFIG_AF_ALG=y" >> $config_host_mak
+fi
+
 if test "$open_by_handle_at" = "yes" ; then
  echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak
 fi
@@ -5955,6 +6029,10 @@ if test "$have_static_assert" = "yes" ; then
  echo "CONFIG_STATIC_ASSERT=y" >> $config_host_mak
 fi

+if test "$have_utmpx" = "yes" ; then
+  echo "HAVE_UTMPX=y" >> $config_host_mak
+fi
+
 # Hold two types of flag:
 #   CONFIG_THREAD_SETNAME_BYTHREAD  - we've got a way of setting the name on
 #                                     a thread we have a handle to
@@ -6227,7 +6305,7 @@ case "$target_name" in
    echo "TARGET_ABI32=y" >> $config_target_mak
  ;;
  s390x)
-    gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml s390-cr.xml s390-virt.xml"
+    gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml s390-cr.xml s390-virt.xml s390-gs.xml"
  ;;
  tilegx)
  ;;
--- a/cpus.c
+++ b/cpus.c
@@ -557,7 +557,7 @@ void qemu_start_warp_timer(void)
    if (deadline < 0) {
        static bool notified;
        if (!icount_sleep && !notified) {
-            error_report("WARNING: icount sleep disabled and no active timers");
+            warn_report("icount sleep disabled and no active timers");
            notified = true;
        }
        return;
--- a/crypto/Makefile.objs
+++ b/crypto/Makefile.objs
@@ -10,6 +10,9 @@ crypto-obj-$(if $(CONFIG_NETTLE),n,$(if $(CONFIG_GCRYPT_HMAC),n,y)) += hmac-glib
 crypto-obj-y += aes.o
 crypto-obj-y += desrfb.o
 crypto-obj-y += cipher.o
+crypto-obj-$(CONFIG_AF_ALG) += afalg.o
+crypto-obj-$(CONFIG_AF_ALG) += cipher-afalg.o
+crypto-obj-$(CONFIG_AF_ALG) += hash-afalg.o
 crypto-obj-y += tlscreds.o
 crypto-obj-y += tlscredsanon.o
 crypto-obj-y += tlscredsx509.o
--- a/crypto/afalg.c
+++ b/crypto/afalg.c
@@ -0,0 +1,116 @@
+/*
+ * QEMU Crypto af_alg support
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Longpeng(Mike) <longpeng2@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/sockets.h"
+#include "qapi/error.h"
+#include "afalgpriv.h"
+
+static bool
+qcrypto_afalg_build_saddr(const char *type, const char *name,
+                          struct sockaddr_alg *salg, Error **errp)
+{
+    salg->salg_family = AF_ALG;
+
+    if (strnlen(type, SALG_TYPE_LEN_MAX) >= SALG_TYPE_LEN_MAX) {
+        error_setg(errp, "Afalg type(%s) is larger than %d bytes",
+                   type, SALG_TYPE_LEN_MAX);
+        return false;
+    }
+
+    if (strnlen(name, SALG_NAME_LEN_MAX) >= SALG_NAME_LEN_MAX) {
+        error_setg(errp, "Afalg name(%s) is larger than %d bytes",
+                   name, SALG_NAME_LEN_MAX);
+        return false;
+    }
+
+    pstrcpy((char *)salg->salg_type, SALG_TYPE_LEN_MAX, type);
+    pstrcpy((char *)salg->salg_name, SALG_NAME_LEN_MAX, name);
+
+    return true;
+}
+
+static int
+qcrypto_afalg_socket_bind(const char *type, const char *name,
+                          Error **errp)
+{
+    int sbind;
+    struct sockaddr_alg salg = {0};
+
+    if (!qcrypto_afalg_build_saddr(type, name, &salg, errp)) {
+        return -1;
+    }
+
+    sbind = qemu_socket(AF_ALG, SOCK_SEQPACKET, 0);
+    if (sbind < 0) {
+        error_setg_errno(errp, errno, "Failed to create socket");
+        return -1;
+    }
+
+    if (bind(sbind, (const struct sockaddr *)&salg, sizeof(salg)) != 0) {
+        error_setg_errno(errp, errno, "Failed to bind socket");
+        closesocket(sbind);
+        return -1;
+    }
+
+    return sbind;
+}
+
+QCryptoAFAlg *
+qcrypto_afalg_comm_alloc(const char *type, const char *name,
+                         Error **errp)
+{
+    QCryptoAFAlg *afalg;
+
+    afalg = g_new0(QCryptoAFAlg, 1);
+    /* initilize crypto API socket */
+    afalg->opfd = -1;
+    afalg->tfmfd = qcrypto_afalg_socket_bind(type, name, errp);
+    if (afalg->tfmfd == -1) {
+        goto error;
+    }
+
+    afalg->opfd = qemu_accept(afalg->tfmfd, NULL, 0);
+    if (afalg->opfd == -1) {
+        error_setg_errno(errp, errno, "Failed to accept socket");
+        goto error;
+    }
+
+    return afalg;
+
+error:
+    qcrypto_afalg_comm_free(afalg);
+    return NULL;
+}
+
+void qcrypto_afalg_comm_free(QCryptoAFAlg *afalg)
+{
+    if (!afalg) {
+        return;
+    }
+
+    if (afalg->msg) {
+        g_free(afalg->msg->msg_control);
+        g_free(afalg->msg);
+    }
+
+    if (afalg->tfmfd != -1) {
+        closesocket(afalg->tfmfd);
+    }
+
+    if (afalg->opfd != -1) {
+        closesocket(afalg->opfd);
+    }
+
+    g_free(afalg);
+}
--- a/crypto/afalgpriv.h
+++ b/crypto/afalgpriv.h
@@ -0,0 +1,64 @@
+/*
+ * QEMU Crypto af_alg support
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Longpeng(Mike) <longpeng2@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef QCRYPTO_AFALGPRIV_H
+#define QCRYPTO_AFALGPRIV_H
+
+#include <linux/if_alg.h>
+
+#define SALG_TYPE_LEN_MAX 14
+#define SALG_NAME_LEN_MAX 64
+
+#ifndef SOL_ALG
+#define SOL_ALG 279
+#endif
+
+#define AFALG_TYPE_CIPHER "skcipher"
+#define AFALG_TYPE_HASH "hash"
+
+#define ALG_OPTYPE_LEN 4
+#define ALG_MSGIV_LEN(len) (sizeof(struct af_alg_iv) + (len))
+
+typedef struct QCryptoAFAlg QCryptoAFAlg;
+
+struct QCryptoAFAlg {
+    int tfmfd;
+    int opfd;
+    struct msghdr *msg;
+    struct cmsghdr *cmsg;
+};
+
+/**
+ * qcrypto_afalg_comm_alloc:
+ * @type: the type of crypto operation
+ * @name: the name of crypto operation
+ *
+ * Allocate a QCryptoAFAlg object and bind itself to
+ * a AF_ALG socket.
+ *
+ * Returns:
+ *  a new QCryptoAFAlg object, or NULL in error.
+ */
+QCryptoAFAlg *
+qcrypto_afalg_comm_alloc(const char *type, const char *name,
+                         Error **errp);
+
+/**
+ * afalg_comm_free:
+ * @afalg: the QCryptoAFAlg object
+ *
+ * Free the @afalg.
+ */
+void qcrypto_afalg_comm_free(QCryptoAFAlg *afalg);
+
+#endif
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -638,6 +638,7 @@ qcrypto_block_luks_find_key(QCryptoBlock *block,
 static int
 qcrypto_block_luks_open(QCryptoBlock *block,
                        QCryptoBlockOpenOptions *options,
+                        const char *optprefix,
                        QCryptoBlockReadFunc readfunc,
                        void *opaque,
                        unsigned int flags,
@@ -661,7 +662,8 @@ qcrypto_block_luks_open(QCryptoBlock *block,

    if (!(flags & QCRYPTO_BLOCK_OPEN_NO_IO)) {
        if (!options->u.luks.key_secret) {
-            error_setg(errp, "Parameter 'key-secret' is required for cipher");
+            error_setg(errp, "Parameter '%skey-secret' is required for cipher",
+                       optprefix ? optprefix : "");
            return -1;
        }
        password = qcrypto_secret_lookup_as_utf8(
@@ -885,6 +887,7 @@ qcrypto_block_luks_uuid_gen(uint8_t *uuidstr)
 static int
 qcrypto_block_luks_create(QCryptoBlock *block,
                          QCryptoBlockCreateOptions *options,
+                          const char *optprefix,
                          QCryptoBlockInitFunc initfunc,
                          QCryptoBlockWriteFunc writefunc,
                          void *opaque,
@@ -937,7 +940,8 @@ qcrypto_block_luks_create(QCryptoBlock *block,
     * be silently ignored, for compatibility with dm-crypt */

    if (!options->u.luks.key_secret) {
-        error_setg(errp, "Parameter 'key-secret' is required for cipher");
+        error_setg(errp, "Parameter '%skey-secret' is required for cipher",
+                   optprefix ? optprefix : "");
        return -1;
    }
    password = qcrypto_secret_lookup_as_utf8(luks_opts.key_secret, errp);
--- a/crypto/block-qcow.c
+++ b/crypto/block-qcow.c
@@ -94,6 +94,7 @@ qcrypto_block_qcow_init(QCryptoBlock *block,
 static int
 qcrypto_block_qcow_open(QCryptoBlock *block,
                        QCryptoBlockOpenOptions *options,
+                        const char *optprefix,
                        QCryptoBlockReadFunc readfunc G_GNUC_UNUSED,
                        void *opaque G_GNUC_UNUSED,
                        unsigned int flags,
@@ -104,7 +105,8 @@ qcrypto_block_qcow_open(QCryptoBlock *block,
    } else {
        if (!options->u.qcow.key_secret) {
            error_setg(errp,
-                       "Parameter 'key-secret' is required for cipher");
+                       "Parameter '%skey-secret' is required for cipher",
+                       optprefix ? optprefix : "");
            return -1;
        }
        return qcrypto_block_qcow_init(block,
@@ -116,13 +118,15 @@ qcrypto_block_qcow_open(QCryptoBlock *block,
 static int
 qcrypto_block_qcow_create(QCryptoBlock *block,
                          QCryptoBlockCreateOptions *options,
+                          const char *optprefix,
                          QCryptoBlockInitFunc initfunc G_GNUC_UNUSED,
                          QCryptoBlockWriteFunc writefunc G_GNUC_UNUSED,
                          void *opaque G_GNUC_UNUSED,
                          Error **errp)
 {
    if (!options->u.qcow.key_secret) {
-        error_setg(errp, "Parameter 'key-secret' is required for cipher");
+        error_setg(errp, "Parameter '%skey-secret' is required for cipher",
+                   optprefix ? optprefix : "");
        return -1;
    }
    /* QCow2 has no special header, since everything is hardwired */
--- a/crypto/block.c
+++ b/crypto/block.c
@@ -48,6 +48,7 @@ bool qcrypto_block_has_format(QCryptoBlockFormat format,


 QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
+                                 const char *optprefix,
                                 QCryptoBlockReadFunc readfunc,
                                 void *opaque,
                                 unsigned int flags,
@@ -67,7 +68,7 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,

    block->driver = qcrypto_block_drivers[options->format];

-    if (block->driver->open(block, options,
+    if (block->driver->open(block, options, optprefix,
                            readfunc, opaque, flags, errp) < 0) {
        g_free(block);
        return NULL;
@@ -78,6 +79,7 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,


 QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
+                                   const char *optprefix,
                                   QCryptoBlockInitFunc initfunc,
                                   QCryptoBlockWriteFunc writefunc,
                                   void *opaque,
@@ -97,7 +99,7 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,

    block->driver = qcrypto_block_drivers[options->format];

-    if (block->driver->create(block, options, initfunc,
+    if (block->driver->create(block, options, optprefix, initfunc,
                              writefunc, opaque, errp) < 0) {
        g_free(block);
        return NULL;
--- a/crypto/blockpriv.h
+++ b/crypto/blockpriv.h
@@ -41,6 +41,7 @@ struct QCryptoBlock {
 struct QCryptoBlockDriver {
    int (*open)(QCryptoBlock *block,
                QCryptoBlockOpenOptions *options,
+                const char *optprefix,
                QCryptoBlockReadFunc readfunc,
                void *opaque,
                unsigned int flags,
@@ -48,6 +49,7 @@ struct QCryptoBlockDriver {

    int (*create)(QCryptoBlock *block,
                  QCryptoBlockCreateOptions *options,
+                  const char *optprefix,
                  QCryptoBlockInitFunc initfunc,
                  QCryptoBlockWriteFunc writefunc,
                  void *opaque,
--- a/crypto/cipher-afalg.c
+++ b/crypto/cipher-afalg.c
@@ -0,0 +1,226 @@
+/*
+ * QEMU Crypto af_alg-backend cipher support
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Longpeng(Mike) <longpeng2@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/sockets.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "crypto/cipher.h"
+#include "cipherpriv.h"
+
+
+static char *
+qcrypto_afalg_cipher_format_name(QCryptoCipherAlgorithm alg,
+                                 QCryptoCipherMode mode,
+                                 Error **errp)
+{
+    char *name;
+    const char *alg_name;
+    const char *mode_name;
+
+    switch (alg) {
+    case QCRYPTO_CIPHER_ALG_AES_128:
+    case QCRYPTO_CIPHER_ALG_AES_192:
+    case QCRYPTO_CIPHER_ALG_AES_256:
+        alg_name = "aes";
+        break;
+    case QCRYPTO_CIPHER_ALG_CAST5_128:
+        alg_name = "cast5";
+        break;
+    case QCRYPTO_CIPHER_ALG_SERPENT_128:
+    case QCRYPTO_CIPHER_ALG_SERPENT_192:
+    case QCRYPTO_CIPHER_ALG_SERPENT_256:
+        alg_name = "serpent";
+        break;
+    case QCRYPTO_CIPHER_ALG_TWOFISH_128:
+    case QCRYPTO_CIPHER_ALG_TWOFISH_192:
+    case QCRYPTO_CIPHER_ALG_TWOFISH_256:
+        alg_name = "twofish";
+        break;
+
+    default:
+        error_setg(errp, "Unsupported cipher algorithm %d", alg);
+        return NULL;
+    }
+
+    mode_name = QCryptoCipherMode_lookup[mode];
+    name = g_strdup_printf("%s(%s)", mode_name, alg_name);
+
+    return name;
+}
+
+QCryptoAFAlg *
+qcrypto_afalg_cipher_ctx_new(QCryptoCipherAlgorithm alg,
+                             QCryptoCipherMode mode,
+                             const uint8_t *key,
+                             size_t nkey, Error **errp)
+{
+    QCryptoAFAlg *afalg;
+    size_t expect_niv;
+    char *name;
+
+    name = qcrypto_afalg_cipher_format_name(alg, mode, errp);
+    if (!name) {
+        return NULL;
+    }
+
+    afalg = qcrypto_afalg_comm_alloc(AFALG_TYPE_CIPHER, name, errp);
+    if (!afalg) {
+        g_free(name);
+        return NULL;
+    }
+
+    g_free(name);
+
+    /* setkey */
+    if (qemu_setsockopt(afalg->tfmfd, SOL_ALG, ALG_SET_KEY, key,
+                        nkey) != 0) {
+        error_setg_errno(errp, errno, "Set key failed");
+        qcrypto_afalg_comm_free(afalg);
+        return NULL;
+    }
+
+    /* prepare msg header */
+    afalg->msg = g_new0(struct msghdr, 1);
+    afalg->msg->msg_controllen += CMSG_SPACE(ALG_OPTYPE_LEN);
+    expect_niv = qcrypto_cipher_get_iv_len(alg, mode);
+    if (expect_niv) {
+        afalg->msg->msg_controllen += CMSG_SPACE(ALG_MSGIV_LEN(expect_niv));
+    }
+    afalg->msg->msg_control = g_new0(uint8_t, afalg->msg->msg_controllen);
+
+    /* We use 1st msghdr for crypto-info and 2nd msghdr for IV-info */
+    afalg->cmsg = CMSG_FIRSTHDR(afalg->msg);
+    afalg->cmsg->cmsg_type = ALG_SET_OP;
+    afalg->cmsg->cmsg_len = CMSG_SPACE(ALG_OPTYPE_LEN);
+    if (expect_niv) {
+        afalg->cmsg = CMSG_NXTHDR(afalg->msg, afalg->cmsg);
+        afalg->cmsg->cmsg_type = ALG_SET_IV;
+        afalg->cmsg->cmsg_len = CMSG_SPACE(ALG_MSGIV_LEN(expect_niv));
+    }
+    afalg->cmsg = CMSG_FIRSTHDR(afalg->msg);
+
+    return afalg;
+}
+
+static int
+qcrypto_afalg_cipher_setiv(QCryptoCipher *cipher,
+                           const uint8_t *iv,
+                           size_t niv, Error **errp)
+{
+    struct af_alg_iv *alg_iv;
+    size_t expect_niv;
+    QCryptoAFAlg *afalg = cipher->opaque;
+
+    expect_niv = qcrypto_cipher_get_iv_len(cipher->alg, cipher->mode);
+    if (niv != expect_niv) {
+        error_setg(errp, "Set IV len(%zu) not match expected(%zu)",
+                   niv, expect_niv);
+        return -1;
+    }
+
+    /* move ->cmsg to next msghdr, for IV-info */
+    afalg->cmsg = CMSG_NXTHDR(afalg->msg, afalg->cmsg);
+
+    /* build setiv msg */
+    afalg->cmsg->cmsg_level = SOL_ALG;
+    alg_iv = (struct af_alg_iv *)CMSG_DATA(afalg->cmsg);
+    alg_iv->ivlen = niv;
+    memcpy(alg_iv->iv, iv, niv);
+
+    return 0;
+}
+
+static int
+qcrypto_afalg_cipher_op(QCryptoAFAlg *afalg,
+                        const void *in, void *out,
+                        size_t len, bool do_encrypt,
+                        Error **errp)
+{
+    uint32_t *type = NULL;
+    struct iovec iov;
+    size_t ret, rlen, done = 0;
+    uint32_t origin_controllen;
+
+    origin_controllen = afalg->msg->msg_controllen;
+    /* movev ->cmsg to first header, for crypto-info */
+    afalg->cmsg = CMSG_FIRSTHDR(afalg->msg);
+
+    /* build encrypt msg */
+    afalg->cmsg->cmsg_level = SOL_ALG;
+    afalg->msg->msg_iov = &iov;
+    afalg->msg->msg_iovlen = 1;
+    type = (uint32_t *)CMSG_DATA(afalg->cmsg);
+    if (do_encrypt) {
+        *type = ALG_OP_ENCRYPT;
+    } else {
+        *type = ALG_OP_DECRYPT;
+    }
+
+    do {
+        iov.iov_base = (void *)in + done;
+        iov.iov_len = len - done;
+
+        /* send info to AF_ALG core */
+        ret = sendmsg(afalg->opfd, afalg->msg, 0);
+        if (ret == -1) {
+            error_setg_errno(errp, errno, "Send data to AF_ALG core failed");
+            return -1;
+        }
+
+        /* encrypto && get result */
+        rlen = read(afalg->opfd, out, ret);
+        if (rlen == -1) {
+            error_setg_errno(errp, errno, "Get result from AF_ALG core failed");
+            return -1;
+        }
+        assert(rlen == ret);
+
+        /* do not update IV for following chunks */
+        afalg->msg->msg_controllen = 0;
+        done += ret;
+    } while (done < len);
+
+    afalg->msg->msg_controllen = origin_controllen;
+
+    return 0;
+}
+
+static int
+qcrypto_afalg_cipher_encrypt(QCryptoCipher *cipher,
+                             const void *in, void *out,
+                             size_t len, Error **errp)
+{
+    return qcrypto_afalg_cipher_op(cipher->opaque, in, out,
+                                   len, true, errp);
+}
+
+static int
+qcrypto_afalg_cipher_decrypt(QCryptoCipher *cipher,
+                             const void *in, void *out,
+                             size_t len, Error **errp)
+{
+    return qcrypto_afalg_cipher_op(cipher->opaque, in, out,
+                                   len, false, errp);
+}
+
+static void qcrypto_afalg_comm_ctx_free(QCryptoCipher *cipher)
+{
+    qcrypto_afalg_comm_free(cipher->opaque);
+}
+
+struct QCryptoCipherDriver qcrypto_cipher_afalg_driver = {
+    .cipher_encrypt = qcrypto_afalg_cipher_encrypt,
+    .cipher_decrypt = qcrypto_afalg_cipher_decrypt,
+    .cipher_setiv = qcrypto_afalg_cipher_setiv,
+    .cipher_free = qcrypto_afalg_comm_ctx_free,
+};
--- a/crypto/cipher-builtin.c
+++ b/crypto/cipher-builtin.c
@@ -22,6 +22,7 @@
 #include "crypto/aes.h"
 #include "crypto/desrfb.h"
 #include "crypto/xts.h"
+#include "cipherpriv.h"

 typedef struct QCryptoCipherBuiltinAESContext QCryptoCipherBuiltinAESContext;
 struct QCryptoCipherBuiltinAESContext {
@@ -235,23 +236,24 @@ static int qcrypto_cipher_setiv_aes(QCryptoCipher *cipher,



-static int qcrypto_cipher_init_aes(QCryptoCipher *cipher,
-                                   const uint8_t *key, size_t nkey,
-                                   Error **errp)
+static QCryptoCipherBuiltin *
+qcrypto_cipher_init_aes(QCryptoCipherMode mode,
+                        const uint8_t *key, size_t nkey,
+                        Error **errp)
 {
    QCryptoCipherBuiltin *ctxt;

-    if (cipher->mode != QCRYPTO_CIPHER_MODE_CBC &&
-        cipher->mode != QCRYPTO_CIPHER_MODE_ECB &&
-        cipher->mode != QCRYPTO_CIPHER_MODE_XTS) {
+    if (mode != QCRYPTO_CIPHER_MODE_CBC &&
+        mode != QCRYPTO_CIPHER_MODE_ECB &&
+        mode != QCRYPTO_CIPHER_MODE_XTS) {
        error_setg(errp, "Unsupported cipher mode %s",
-                   QCryptoCipherMode_lookup[cipher->mode]);
-        return -1;
+                   QCryptoCipherMode_lookup[mode]);
+        return NULL;
    }

    ctxt = g_new0(QCryptoCipherBuiltin, 1);

-    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
        if (AES_set_encrypt_key(key, nkey * 4, &ctxt->state.aes.key.enc) != 0) {
            error_setg(errp, "Failed to set encryption key");
            goto error;
@@ -291,13 +293,11 @@ static int qcrypto_cipher_init_aes(QCryptoCipher *cipher,
    ctxt->encrypt = qcrypto_cipher_encrypt_aes;
    ctxt->decrypt = qcrypto_cipher_decrypt_aes;

-    cipher->opaque = ctxt;
-
-    return 0;
+    return ctxt;

 error:
    g_free(ctxt);
-    return -1;
+    return NULL;
 }


@@ -370,16 +370,17 @@ static int qcrypto_cipher_setiv_des_rfb(QCryptoCipher *cipher,
 }


-static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
-                                       const uint8_t *key, size_t nkey,
-                                       Error **errp)
+static QCryptoCipherBuiltin *
+qcrypto_cipher_init_des_rfb(QCryptoCipherMode mode,
+                            const uint8_t *key, size_t nkey,
+                            Error **errp)
 {
    QCryptoCipherBuiltin *ctxt;

-    if (cipher->mode != QCRYPTO_CIPHER_MODE_ECB) {
+    if (mode != QCRYPTO_CIPHER_MODE_ECB) {
        error_setg(errp, "Unsupported cipher mode %s",
-                   QCryptoCipherMode_lookup[cipher->mode]);
-        return -1;
+                   QCryptoCipherMode_lookup[mode]);
+        return NULL;
    }

    ctxt = g_new0(QCryptoCipherBuiltin, 1);
@@ -394,9 +395,7 @@ static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
    ctxt->encrypt = qcrypto_cipher_encrypt_des_rfb;
    ctxt->decrypt = qcrypto_cipher_decrypt_des_rfb;

-    cipher->opaque = ctxt;
-
-    return 0;
+    return ctxt;
 }


@@ -426,12 +425,13 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
 }


-QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
-                                  QCryptoCipherMode mode,
-                                  const uint8_t *key, size_t nkey,
-                                  Error **errp)
+static QCryptoCipherBuiltin *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
+                                                    QCryptoCipherMode mode,
+                                                    const uint8_t *key,
+                                                    size_t nkey,
+                                                    Error **errp)
 {
-    QCryptoCipher *cipher;
+    QCryptoCipherBuiltin *ctxt;

    switch (mode) {
    case QCRYPTO_CIPHER_MODE_ECB:
@@ -444,60 +444,45 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        return NULL;
    }

-    cipher = g_new0(QCryptoCipher, 1);
-    cipher->alg = alg;
-    cipher->mode = mode;
-
    if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) {
-        goto error;
+        return NULL;
    }

-    switch (cipher->alg) {
+    switch (alg) {
    case QCRYPTO_CIPHER_ALG_DES_RFB:
-        if (qcrypto_cipher_init_des_rfb(cipher, key, nkey, errp) < 0) {
-            goto error;
-        }
+        ctxt = qcrypto_cipher_init_des_rfb(mode, key, nkey, errp);
        break;
    case QCRYPTO_CIPHER_ALG_AES_128:
    case QCRYPTO_CIPHER_ALG_AES_192:
    case QCRYPTO_CIPHER_ALG_AES_256:
-        if (qcrypto_cipher_init_aes(cipher, key, nkey, errp) < 0) {
-            goto error;
-        }
+        ctxt = qcrypto_cipher_init_aes(mode, key, nkey, errp);
        break;
    default:
        error_setg(errp,
                   "Unsupported cipher algorithm %s",
-                   QCryptoCipherAlgorithm_lookup[cipher->alg]);
-        goto error;
+                   QCryptoCipherAlgorithm_lookup[alg]);
+        return NULL;
    }

-    return cipher;
-
- error:
-    g_free(cipher);
-    return NULL;
+    return ctxt;
 }

-void qcrypto_cipher_free(QCryptoCipher *cipher)
+static void
+qcrypto_builtin_cipher_ctx_free(QCryptoCipher *cipher)
 {
    QCryptoCipherBuiltin *ctxt;

-    if (!cipher) {
-        return;
-    }
-
    ctxt = cipher->opaque;
    ctxt->free(cipher);
-    g_free(cipher);
 }


-int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
-                           const void *in,
-                           void *out,
-                           size_t len,
-                           Error **errp)
+static int
+qcrypto_builtin_cipher_encrypt(QCryptoCipher *cipher,
+                               const void *in,
+                               void *out,
+                               size_t len,
+                               Error **errp)
 {
    QCryptoCipherBuiltin *ctxt = cipher->opaque;

@@ -511,11 +496,12 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
 }


-int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
-                           const void *in,
-                           void *out,
-                           size_t len,
-                           Error **errp)
+static int
+qcrypto_builtin_cipher_decrypt(QCryptoCipher *cipher,
+                               const void *in,
+                               void *out,
+                               size_t len,
+                               Error **errp)
 {
    QCryptoCipherBuiltin *ctxt = cipher->opaque;

@@ -529,11 +515,20 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
 }


-int qcrypto_cipher_setiv(QCryptoCipher *cipher,
-                         const uint8_t *iv, size_t niv,
-                         Error **errp)
+static int
+qcrypto_builtin_cipher_setiv(QCryptoCipher *cipher,
+                             const uint8_t *iv, size_t niv,
+                             Error **errp)
 {
    QCryptoCipherBuiltin *ctxt = cipher->opaque;

    return ctxt->setiv(cipher, iv, niv, errp);
 }
+
+
+static struct QCryptoCipherDriver qcrypto_cipher_lib_driver = {
+    .cipher_encrypt = qcrypto_builtin_cipher_encrypt,
+    .cipher_decrypt = qcrypto_builtin_cipher_decrypt,
+    .cipher_setiv = qcrypto_builtin_cipher_setiv,
+    .cipher_free = qcrypto_builtin_cipher_ctx_free,
+};
--- a/crypto/cipher-gcrypt.c
+++ b/crypto/cipher-gcrypt.c
@@ -20,6 +20,7 @@

 #include "qemu/osdep.h"
 #include "crypto/xts.h"
+#include "cipherpriv.h"

 #include <gcrypt.h>

@@ -64,12 +65,29 @@ struct QCryptoCipherGcrypt {
    uint8_t *iv;
 };

-QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
-                                  QCryptoCipherMode mode,
-                                  const uint8_t *key, size_t nkey,
-                                  Error **errp)
+static void
+qcrypto_gcrypt_cipher_free_ctx(QCryptoCipherGcrypt *ctx,
+                               QCryptoCipherMode mode)
+{
+    if (!ctx) {
+        return;
+    }
+
+    gcry_cipher_close(ctx->handle);
+    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+        gcry_cipher_close(ctx->tweakhandle);
+    }
+    g_free(ctx->iv);
+    g_free(ctx);
+}
+
+
+static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
+                                                   QCryptoCipherMode mode,
+                                                   const uint8_t *key,
+                                                   size_t nkey,
+                                                   Error **errp)
 {
-    QCryptoCipher *cipher;
    QCryptoCipherGcrypt *ctx;
    gcry_error_t err;
    int gcryalg, gcrymode;
@@ -146,10 +164,6 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        return NULL;
    }

-    cipher = g_new0(QCryptoCipher, 1);
-    cipher->alg = alg;
-    cipher->mode = mode;
-
    ctx = g_new0(QCryptoCipherGcrypt, 1);

    err = gcry_cipher_open(&ctx->handle, gcryalg, gcrymode, 0);
@@ -158,7 +172,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
                   gcry_strerror(err));
        goto error;
    }
-    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
        err = gcry_cipher_open(&ctx->tweakhandle, gcryalg, gcrymode, 0);
        if (err != 0) {
            error_setg(errp, "Cannot initialize cipher: %s",
@@ -167,7 +181,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        }
    }

-    if (cipher->alg == QCRYPTO_CIPHER_ALG_DES_RFB) {
+    if (alg == QCRYPTO_CIPHER_ALG_DES_RFB) {
        /* We're using standard DES cipher from gcrypt, so we need
         * to munge the key so that the results are the same as the
         * bizarre RFB variant of DES :-)
@@ -177,7 +191,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        g_free(rfbkey);
        ctx->blocksize = 8;
    } else {
-        if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
            nkey /= 2;
            err = gcry_cipher_setkey(ctx->handle, key, nkey);
            if (err != 0) {
@@ -194,7 +208,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
                       gcry_strerror(err));
            goto error;
        }
-        switch (cipher->alg) {
+        switch (alg) {
        case QCRYPTO_CIPHER_ALG_AES_128:
        case QCRYPTO_CIPHER_ALG_AES_192:
        case QCRYPTO_CIPHER_ALG_AES_256:
@@ -214,7 +228,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        }
    }

-    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
        if (ctx->blocksize != XTS_BLOCK_SIZE) {
            error_setg(errp,
                       "Cipher block size %zu must equal XTS block size %d",
@@ -224,34 +238,18 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        ctx->iv = g_new0(uint8_t, ctx->blocksize);
    }

-    cipher->opaque = ctx;
-    return cipher;
+    return ctx;

 error:
-    gcry_cipher_close(ctx->handle);
-    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
-        gcry_cipher_close(ctx->tweakhandle);
-    }
-    g_free(ctx);
-    g_free(cipher);
+    qcrypto_gcrypt_cipher_free_ctx(ctx, mode);
    return NULL;
 }


-void qcrypto_cipher_free(QCryptoCipher *cipher)
+static void
+qcrypto_gcrypt_cipher_ctx_free(QCryptoCipher *cipher)
 {
-    QCryptoCipherGcrypt *ctx;
-    if (!cipher) {
-        return;
-    }
-    ctx = cipher->opaque;
-    gcry_cipher_close(ctx->handle);
-    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
-        gcry_cipher_close(ctx->tweakhandle);
-    }
-    g_free(ctx->iv);
-    g_free(ctx);
-    g_free(cipher);
+    qcrypto_gcrypt_cipher_free_ctx(cipher->opaque, cipher->mode);
 }


@@ -275,11 +273,12 @@ static void qcrypto_gcrypt_xts_decrypt(const void *ctx,
    g_assert(err == 0);
 }

-int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
-                           const void *in,
-                           void *out,
-                           size_t len,
-                           Error **errp)
+static int
+qcrypto_gcrypt_cipher_encrypt(QCryptoCipher *cipher,
+                              const void *in,
+                              void *out,
+                              size_t len,
+                              Error **errp)
 {
    QCryptoCipherGcrypt *ctx = cipher->opaque;
    gcry_error_t err;
@@ -310,11 +309,12 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
 }


-int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
-                           const void *in,
-                           void *out,
-                           size_t len,
-                           Error **errp)
+static int
+qcrypto_gcrypt_cipher_decrypt(QCryptoCipher *cipher,
+                              const void *in,
+                              void *out,
+                              size_t len,
+                              Error **errp)
 {
    QCryptoCipherGcrypt *ctx = cipher->opaque;
    gcry_error_t err;
@@ -344,9 +344,10 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
    return 0;
 }

-int qcrypto_cipher_setiv(QCryptoCipher *cipher,
-                         const uint8_t *iv, size_t niv,
-                         Error **errp)
+static int
+qcrypto_gcrypt_cipher_setiv(QCryptoCipher *cipher,
+                            const uint8_t *iv, size_t niv,
+                            Error **errp)
 {
    QCryptoCipherGcrypt *ctx = cipher->opaque;
    gcry_error_t err;
@@ -380,3 +381,11 @@ int qcrypto_cipher_setiv(QCryptoCipher *cipher,

    return 0;
 }
+
+
+static struct QCryptoCipherDriver qcrypto_cipher_lib_driver = {
+    .cipher_encrypt = qcrypto_gcrypt_cipher_encrypt,
+    .cipher_decrypt = qcrypto_gcrypt_cipher_decrypt,
+    .cipher_setiv = qcrypto_gcrypt_cipher_setiv,
+    .cipher_free = qcrypto_gcrypt_cipher_ctx_free,
+};
--- a/crypto/cipher-nettle.c
+++ b/crypto/cipher-nettle.c
@@ -20,6 +20,7 @@

 #include "qemu/osdep.h"
 #include "crypto/xts.h"
+#include "cipherpriv.h"

 #include <nettle/nettle-types.h>
 #include <nettle/aes.h>
@@ -249,12 +250,26 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
 }


-QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
-                                  QCryptoCipherMode mode,
-                                  const uint8_t *key, size_t nkey,
-                                  Error **errp)
+static void
+qcrypto_nettle_cipher_free_ctx(QCryptoCipherNettle *ctx)
+{
+    if (!ctx) {
+        return;
+    }
+
+    g_free(ctx->iv);
+    g_free(ctx->ctx);
+    g_free(ctx->ctx_tweak);
+    g_free(ctx);
+}
+
+
+static QCryptoCipherNettle *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
+                                                   QCryptoCipherMode mode,
+                                                   const uint8_t *key,
+                                                   size_t nkey,
+                                                   Error **errp)
 {
-    QCryptoCipher *cipher;
    QCryptoCipherNettle *ctx;
    uint8_t *rfbkey;

@@ -274,12 +289,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        return NULL;
    }

-    cipher = g_new0(QCryptoCipher, 1);
-    cipher->alg = alg;
-    cipher->mode = mode;
-
    ctx = g_new0(QCryptoCipherNettle, 1);
-    cipher->opaque = ctx;

    switch (alg) {
    case QCRYPTO_CIPHER_ALG_DES_RFB:
@@ -423,36 +433,30 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,

    ctx->iv = g_new0(uint8_t, ctx->blocksize);

-    return cipher;
+    return ctx;

 error:
-    qcrypto_cipher_free(cipher);
+    qcrypto_nettle_cipher_free_ctx(ctx);
    return NULL;
 }


-void qcrypto_cipher_free(QCryptoCipher *cipher)
+static void
+qcrypto_nettle_cipher_ctx_free(QCryptoCipher *cipher)
 {
    QCryptoCipherNettle *ctx;

-    if (!cipher) {
-        return;
-    }
-
    ctx = cipher->opaque;
-    g_free(ctx->iv);
-    g_free(ctx->ctx);
-    g_free(ctx->ctx_tweak);
-    g_free(ctx);
-    g_free(cipher);
+    qcrypto_nettle_cipher_free_ctx(ctx);
 }


-int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
-                           const void *in,
-                           void *out,
-                           size_t len,
-                           Error **errp)
+static int
+qcrypto_nettle_cipher_encrypt(QCryptoCipher *cipher,
+                              const void *in,
+                              void *out,
+                              size_t len,
+                              Error **errp)
 {
    QCryptoCipherNettle *ctx = cipher->opaque;

@@ -494,11 +498,12 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
 }


-int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
-                           const void *in,
-                           void *out,
-                           size_t len,
-                           Error **errp)
+static int
+qcrypto_nettle_cipher_decrypt(QCryptoCipher *cipher,
+                              const void *in,
+                              void *out,
+                              size_t len,
+                              Error **errp)
 {
    QCryptoCipherNettle *ctx = cipher->opaque;

@@ -538,9 +543,10 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
    return 0;
 }

-int qcrypto_cipher_setiv(QCryptoCipher *cipher,
-                         const uint8_t *iv, size_t niv,
-                         Error **errp)
+static int
+qcrypto_nettle_cipher_setiv(QCryptoCipher *cipher,
+                            const uint8_t *iv, size_t niv,
+                            Error **errp)
 {
    QCryptoCipherNettle *ctx = cipher->opaque;
    if (niv != ctx->blocksize) {
@@ -551,3 +557,11 @@ int qcrypto_cipher_setiv(QCryptoCipher *cipher,
    memcpy(ctx->iv, iv, niv);
    return 0;
 }
+
+
+static struct QCryptoCipherDriver qcrypto_cipher_lib_driver = {
+    .cipher_encrypt = qcrypto_nettle_cipher_encrypt,
+    .cipher_decrypt = qcrypto_nettle_cipher_decrypt,
+    .cipher_setiv = qcrypto_nettle_cipher_setiv,
+    .cipher_free = qcrypto_nettle_cipher_ctx_free,
+};
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -21,6 +21,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/cipher.h"
+#include "cipherpriv.h"


 static size_t alg_key_len[QCRYPTO_CIPHER_ALG__MAX] = {
@@ -155,3 +156,82 @@ qcrypto_cipher_munge_des_rfb_key(const uint8_t *key,
 #else
 #include "crypto/cipher-builtin.c"
 #endif
+
+QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
+                                  QCryptoCipherMode mode,
+                                  const uint8_t *key, size_t nkey,
+                                  Error **errp)
+{
+    QCryptoCipher *cipher;
+    void *ctx = NULL;
+    Error *err2 = NULL;
+    QCryptoCipherDriver *drv = NULL;
+
+#ifdef CONFIG_AF_ALG
+    ctx = qcrypto_afalg_cipher_ctx_new(alg, mode, key, nkey, &err2);
+    if (ctx) {
+        drv = &qcrypto_cipher_afalg_driver;
+    }
+#endif
+
+    if (!ctx) {
+        ctx = qcrypto_cipher_ctx_new(alg, mode, key, nkey, errp);
+        if (!ctx) {
+            error_free(err2);
+            return NULL;
+        }
+
+        drv = &qcrypto_cipher_lib_driver;
+        error_free(err2);
+    }
+
+    cipher = g_new0(QCryptoCipher, 1);
+    cipher->alg = alg;
+    cipher->mode = mode;
+    cipher->opaque = ctx;
+    cipher->driver = (void *)drv;
+
+    return cipher;
+}
+
+
+int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
+                           const void *in,
+                           void *out,
+                           size_t len,
+                           Error **errp)
+{
+    QCryptoCipherDriver *drv = cipher->driver;
+    return drv->cipher_encrypt(cipher, in, out, len, errp);
+}
+
+
+int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
+                           const void *in,
+                           void *out,
+                           size_t len,
+                           Error **errp)
+{
+    QCryptoCipherDriver *drv = cipher->driver;
+    return drv->cipher_decrypt(cipher, in, out, len, errp);
+}
+
+
+int qcrypto_cipher_setiv(QCryptoCipher *cipher,
+                         const uint8_t *iv, size_t niv,
+                         Error **errp)
+{
+    QCryptoCipherDriver *drv = cipher->driver;
+    return drv->cipher_setiv(cipher, iv, niv, errp);
+}
+
+
+void qcrypto_cipher_free(QCryptoCipher *cipher)
+{
+    QCryptoCipherDriver *drv;
+    if (cipher) {
+        drv = cipher->driver;
+        drv->cipher_free(cipher);
+        g_free(cipher);
+    }
+}
--- a/crypto/cipherpriv.h
+++ b/crypto/cipherpriv.h
@@ -0,0 +1,56 @@
+/*
+ * QEMU Crypto cipher driver supports
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Longpeng(Mike) <longpeng2@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ *
+ */
+
+#ifndef QCRYPTO_CIPHERPRIV_H
+#define QCRYPTO_CIPHERPRIV_H
+
+#include "qapi-types.h"
+
+typedef struct QCryptoCipherDriver QCryptoCipherDriver;
+
+struct QCryptoCipherDriver {
+    int (*cipher_encrypt)(QCryptoCipher *cipher,
+                          const void *in,
+                          void *out,
+                          size_t len,
+                          Error **errp);
+
+    int (*cipher_decrypt)(QCryptoCipher *cipher,
+                          const void *in,
+                          void *out,
+                          size_t len,
+                          Error **errp);
+
+    int (*cipher_setiv)(QCryptoCipher *cipher,
+                        const uint8_t *iv, size_t niv,
+                        Error **errp);
+
+    void (*cipher_free)(QCryptoCipher *cipher);
+};
+
+#ifdef CONFIG_AF_ALG
+
+#include "afalgpriv.h"
+
+extern QCryptoAFAlg *
+qcrypto_afalg_cipher_ctx_new(QCryptoCipherAlgorithm alg,
+                             QCryptoCipherMode mode,
+                             const uint8_t *key,
+                             size_t nkey, Error **errp);
+
+extern struct QCryptoCipherDriver qcrypto_cipher_afalg_driver;
+
+#endif
+
+#endif
--- a/crypto/hash-afalg.c
+++ b/crypto/hash-afalg.c
@@ -0,0 +1,214 @@
+/*
+ * QEMU Crypto af_alg-backend hash/hmac support
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Longpeng(Mike) <longpeng2@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "qemu/sockets.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "crypto/hash.h"
+#include "crypto/hmac.h"
+#include "hashpriv.h"
+#include "hmacpriv.h"
+
+static char *
+qcrypto_afalg_hash_format_name(QCryptoHashAlgorithm alg,
+                               bool is_hmac,
+                               Error **errp)
+{
+    char *name;
+    const char *alg_name;
+
+    switch (alg) {
+    case QCRYPTO_HASH_ALG_MD5:
+        alg_name = "md5";
+        break;
+    case QCRYPTO_HASH_ALG_SHA1:
+        alg_name = "sha1";
+        break;
+    case QCRYPTO_HASH_ALG_SHA224:
+        alg_name = "sha224";
+        break;
+    case QCRYPTO_HASH_ALG_SHA256:
+        alg_name = "sha256";
+        break;
+    case QCRYPTO_HASH_ALG_SHA384:
+        alg_name = "sha384";
+        break;
+    case QCRYPTO_HASH_ALG_SHA512:
+        alg_name = "sha512";
+        break;
+    case QCRYPTO_HASH_ALG_RIPEMD160:
+        alg_name = "rmd160";
+        break;
+
+    default:
+        error_setg(errp, "Unsupported hash algorithm %d", alg);
+        return NULL;
+    }
+
+    if (is_hmac) {
+        name = g_strdup_printf("hmac(%s)", alg_name);
+    } else {
+        name = g_strdup_printf("%s", alg_name);
+    }
+
+    return name;
+}
+
+static QCryptoAFAlg *
+qcrypto_afalg_hash_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                                const uint8_t *key, size_t nkey,
+                                bool is_hmac, Error **errp)
+{
+    QCryptoAFAlg *afalg;
+    char *name;
+
+    name = qcrypto_afalg_hash_format_name(alg, is_hmac, errp);
+    if (!name) {
+        return NULL;
+    }
+
+    afalg = qcrypto_afalg_comm_alloc(AFALG_TYPE_HASH, name, errp);
+    if (!afalg) {
+        g_free(name);
+        return NULL;
+    }
+
+    g_free(name);
+
+    /* HMAC needs setkey */
+    if (is_hmac) {
+        if (qemu_setsockopt(afalg->tfmfd, SOL_ALG, ALG_SET_KEY,
+                            key, nkey) != 0) {
+            error_setg_errno(errp, errno, "Set hmac key failed");
+            qcrypto_afalg_comm_free(afalg);
+            return NULL;
+        }
+    }
+
+    return afalg;
+}
+
+static QCryptoAFAlg *
+qcrypto_afalg_hash_ctx_new(QCryptoHashAlgorithm alg,
+                           Error **errp)
+{
+    return qcrypto_afalg_hash_hmac_ctx_new(alg, NULL, 0, false, errp);
+}
+
+QCryptoAFAlg *
+qcrypto_afalg_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                           const uint8_t *key, size_t nkey,
+                           Error **errp)
+{
+    return qcrypto_afalg_hash_hmac_ctx_new(alg, key, nkey, true, errp);
+}
+
+static int
+qcrypto_afalg_hash_hmac_bytesv(QCryptoAFAlg *hmac,
+                               QCryptoHashAlgorithm alg,
+                               const struct iovec *iov,
+                               size_t niov, uint8_t **result,
+                               size_t *resultlen,
+                               Error **errp)
+{
+    QCryptoAFAlg *afalg;
+    struct iovec outv;
+    int ret = 0;
+    bool is_hmac = (hmac != NULL) ? true : false;
+    const int expect_len = qcrypto_hash_digest_len(alg);
+
+    if (*resultlen == 0) {
+        *resultlen = expect_len;
+        *result = g_new0(uint8_t, *resultlen);
+    } else if (*resultlen != expect_len) {
+        error_setg(errp,
+                   "Result buffer size %zu is not match hash %d",
+                   *resultlen, expect_len);
+        return -1;
+    }
+
+    if (is_hmac) {
+        afalg = hmac;
+    } else {
+        afalg = qcrypto_afalg_hash_ctx_new(alg, errp);
+        if (!afalg) {
+            return -1;
+        }
+    }
+
+    /* send data to kernel's crypto core */
+    ret = iov_send_recv(afalg->opfd, iov, niov,
+                        0, iov_size(iov, niov), true);
+    if (ret < 0) {
+        error_setg_errno(errp, errno, "Send data to afalg-core failed");
+        goto out;
+    }
+
+    /* hash && get result */
+    outv.iov_base = *result;
+    outv.iov_len = *resultlen;
+    ret = iov_send_recv(afalg->opfd, &outv, 1,
+                        0, iov_size(&outv, 1), false);
+    if (ret < 0) {
+        error_setg_errno(errp, errno, "Recv result from afalg-core failed");
+    } else {
+        ret = 0;
+    }
+
+out:
+    if (!is_hmac) {
+        qcrypto_afalg_comm_free(afalg);
+    }
+    return ret;
+}
+
+static int
+qcrypto_afalg_hash_bytesv(QCryptoHashAlgorithm alg,
+                          const struct iovec *iov,
+                          size_t niov, uint8_t **result,
+                          size_t *resultlen,
+                          Error **errp)
+{
+    return qcrypto_afalg_hash_hmac_bytesv(NULL, alg, iov, niov, result,
+                                          resultlen, errp);
+}
+
+static int
+qcrypto_afalg_hmac_bytesv(QCryptoHmac *hmac,
+                          const struct iovec *iov,
+                          size_t niov, uint8_t **result,
+                          size_t *resultlen,
+                          Error **errp)
+{
+    return qcrypto_afalg_hash_hmac_bytesv(hmac->opaque, hmac->alg,
+                                          iov, niov, result, resultlen,
+                                          errp);
+}
+
+static void qcrypto_afalg_hmac_ctx_free(QCryptoHmac *hmac)
+{
+    QCryptoAFAlg *afalg;
+
+    afalg = hmac->opaque;
+    qcrypto_afalg_comm_free(afalg);
+}
+
+QCryptoHashDriver qcrypto_hash_afalg_driver = {
+    .hash_bytesv = qcrypto_afalg_hash_bytesv,
+};
+
+QCryptoHmacDriver qcrypto_hmac_afalg_driver = {
+    .hmac_bytesv = qcrypto_afalg_hmac_bytesv,
+    .hmac_free = qcrypto_afalg_hmac_ctx_free,
+};
--- a/crypto/hash-gcrypt.c
+++ b/crypto/hash-gcrypt.c
@@ -22,6 +22,7 @@
 #include <gcrypt.h>
 #include "qapi/error.h"
 #include "crypto/hash.h"
+#include "hashpriv.h"


 static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
@@ -44,12 +45,13 @@ gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
 }


-int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
-                        const struct iovec *iov,
-                        size_t niov,
-                        uint8_t **result,
-                        size_t *resultlen,
-                        Error **errp)
+static int
+qcrypto_gcrypt_hash_bytesv(QCryptoHashAlgorithm alg,
+                           const struct iovec *iov,
+                           size_t niov,
+                           uint8_t **result,
+                           size_t *resultlen,
+                           Error **errp)
 {
    int i, ret;
    gcry_md_hd_t md;
@@ -107,3 +109,8 @@ int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
    gcry_md_close(md);
    return -1;
 }
+
+
+QCryptoHashDriver qcrypto_hash_lib_driver = {
+    .hash_bytesv = qcrypto_gcrypt_hash_bytesv,
+};
--- a/crypto/hash-glib.c
+++ b/crypto/hash-glib.c
@@ -21,6 +21,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hash.h"
+#include "hashpriv.h"


 static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
@@ -47,12 +48,13 @@ gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
 }


-int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
-                        const struct iovec *iov,
-                        size_t niov,
-                        uint8_t **result,
-                        size_t *resultlen,
-                        Error **errp)
+static int
+qcrypto_glib_hash_bytesv(QCryptoHashAlgorithm alg,
+                         const struct iovec *iov,
+                         size_t niov,
+                         uint8_t **result,
+                         size_t *resultlen,
+                         Error **errp)
 {
    int i, ret;
    GChecksum *cs;
@@ -95,3 +97,8 @@ int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
    g_checksum_free(cs);
    return -1;
 }
+
+
+QCryptoHashDriver qcrypto_hash_lib_driver = {
+    .hash_bytesv = qcrypto_glib_hash_bytesv,
+};
--- a/crypto/hash-nettle.c
+++ b/crypto/hash-nettle.c
@@ -21,6 +21,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hash.h"
+#include "hashpriv.h"
 #include <nettle/md5.h>
 #include <nettle/sha.h>
 #include <nettle/ripemd160.h>
@@ -103,12 +104,13 @@ gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
 }


-int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
-                        const struct iovec *iov,
-                        size_t niov,
-                        uint8_t **result,
-                        size_t *resultlen,
-                        Error **errp)
+static int
+qcrypto_nettle_hash_bytesv(QCryptoHashAlgorithm alg,
+                           const struct iovec *iov,
+                           size_t niov,
+                           uint8_t **result,
+                           size_t *resultlen,
+                           Error **errp)
 {
    int i;
    union qcrypto_hash_ctx ctx;
@@ -152,3 +154,8 @@ int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,

    return 0;
 }
+
+
+QCryptoHashDriver qcrypto_hash_lib_driver = {
+    .hash_bytesv = qcrypto_nettle_hash_bytesv,
+};
--- a/crypto/hash.c
+++ b/crypto/hash.c
@@ -21,6 +21,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hash.h"
+#include "hashpriv.h"

 static size_t qcrypto_hash_alg_size[QCRYPTO_HASH_ALG__MAX] = {
    [QCRYPTO_HASH_ALG_MD5] = 16,
@@ -38,6 +39,35 @@ size_t qcrypto_hash_digest_len(QCryptoHashAlgorithm alg)
    return qcrypto_hash_alg_size[alg];
 }

+int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
+                        const struct iovec *iov,
+                        size_t niov,
+                        uint8_t **result,
+                        size_t *resultlen,
+                        Error **errp)
+{
+#ifdef CONFIG_AF_ALG
+    int ret;
+
+    ret = qcrypto_hash_afalg_driver.hash_bytesv(alg, iov, niov,
+                                                result, resultlen,
+                                                errp);
+    if (ret == 0) {
+        return ret;
+    }
+
+    /*
+     * TODO:
+     * Maybe we should treat some afalg errors as fatal
+     */
+    error_free(*errp);
+#endif
+
+    return qcrypto_hash_lib_driver.hash_bytesv(alg, iov, niov,
+                                               result, resultlen,
+                                               errp);
+}
+

 int qcrypto_hash_bytes(QCryptoHashAlgorithm alg,
                       const char *buf,
--- a/crypto/hashpriv.h
+++ b/crypto/hashpriv.h
@@ -0,0 +1,39 @@
+/*
+ * QEMU Crypto hash driver supports
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Longpeng(Mike) <longpeng2@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ *
+ */
+
+#ifndef QCRYPTO_HASHPRIV_H
+#define QCRYPTO_HASHPRIV_H
+
+typedef struct QCryptoHashDriver QCryptoHashDriver;
+
+struct QCryptoHashDriver {
+    int (*hash_bytesv)(QCryptoHashAlgorithm alg,
+                       const struct iovec *iov,
+                       size_t niov,
+                       uint8_t **result,
+                       size_t *resultlen,
+                       Error **errp);
+};
+
+extern QCryptoHashDriver qcrypto_hash_lib_driver;
+
+#ifdef CONFIG_AF_ALG
+
+#include "afalgpriv.h"
+
+extern QCryptoHashDriver qcrypto_hash_afalg_driver;
+
+#endif
+
+#endif
--- a/crypto/hmac-gcrypt.c
+++ b/crypto/hmac-gcrypt.c
@@ -15,6 +15,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hmac.h"
+#include "hmacpriv.h"
 #include <gcrypt.h>

 static int qcrypto_hmac_alg_map[QCRYPTO_HASH_ALG__MAX] = {
@@ -42,11 +43,10 @@ bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg)
    return false;
 }

-QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
-                              const uint8_t *key, size_t nkey,
-                              Error **errp)
+void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                           const uint8_t *key, size_t nkey,
+                           Error **errp)
 {
-    QCryptoHmac *hmac;
    QCryptoHmacGcrypt *ctx;
    gcry_error_t err;

@@ -56,9 +56,6 @@ QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
        return NULL;
    }

-    hmac = g_new0(QCryptoHmac, 1);
-    hmac->alg = alg;
-
    ctx = g_new0(QCryptoHmacGcrypt, 1);

    err = gcry_mac_open(&ctx->handle, qcrypto_hmac_alg_map[alg],
@@ -73,39 +70,35 @@ QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
    if (err != 0) {
        error_setg(errp, "Cannot set key: %s",
                   gcry_strerror(err));
+        gcry_mac_close(ctx->handle);
        goto error;
    }

-    hmac->opaque = ctx;
-    return hmac;
+    return ctx;

 error:
    g_free(ctx);
-    g_free(hmac);
    return NULL;
 }

-void qcrypto_hmac_free(QCryptoHmac *hmac)
+static void
+qcrypto_gcrypt_hmac_ctx_free(QCryptoHmac *hmac)
 {
    QCryptoHmacGcrypt *ctx;

-    if (!hmac) {
-        return;
-    }
-
    ctx = hmac->opaque;
    gcry_mac_close(ctx->handle);

    g_free(ctx);
-    g_free(hmac);
 }

-int qcrypto_hmac_bytesv(QCryptoHmac *hmac,
-                        const struct iovec *iov,
-                        size_t niov,
-                        uint8_t **result,
-                        size_t *resultlen,
-                        Error **errp)
+static int
+qcrypto_gcrypt_hmac_bytesv(QCryptoHmac *hmac,
+                           const struct iovec *iov,
+                           size_t niov,
+                           uint8_t **result,
+                           size_t *resultlen,
+                           Error **errp)
 {
    QCryptoHmacGcrypt *ctx;
    gcry_error_t err;
@@ -150,3 +143,8 @@ int qcrypto_hmac_bytesv(QCryptoHmac *hmac,

    return 0;
 }
+
+QCryptoHmacDriver qcrypto_hmac_lib_driver = {
+    .hmac_bytesv = qcrypto_gcrypt_hmac_bytesv,
+    .hmac_free = qcrypto_gcrypt_hmac_ctx_free,
+};
--- a/crypto/hmac-glib.c
+++ b/crypto/hmac-glib.c
@@ -15,6 +15,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hmac.h"
+#include "hmacpriv.h"

 /* Support for HMAC Algos has been added in GLib 2.30 */
 #if GLIB_CHECK_VERSION(2, 30, 0)
@@ -49,11 +50,10 @@ bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg)
    return false;
 }

-QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
-                              const uint8_t *key, size_t nkey,
-                              Error **errp)
+void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                           const uint8_t *key, size_t nkey,
+                           Error **errp)
 {
-    QCryptoHmac *hmac;
    QCryptoHmacGlib *ctx;

    if (!qcrypto_hmac_supports(alg)) {
@@ -62,9 +62,6 @@ QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
        return NULL;
    }

-    hmac = g_new0(QCryptoHmac, 1);
-    hmac->alg = alg;
-
    ctx = g_new0(QCryptoHmacGlib, 1);

    ctx->ghmac = g_hmac_new(qcrypto_hmac_alg_map[alg],
@@ -74,36 +71,31 @@ QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
        goto error;
    }

-    hmac->opaque = ctx;
-    return hmac;
+    return ctx;

 error:
    g_free(ctx);
-    g_free(hmac);
    return NULL;
 }

-void qcrypto_hmac_free(QCryptoHmac *hmac)
+static void
+qcrypto_glib_hmac_ctx_free(QCryptoHmac *hmac)
 {
    QCryptoHmacGlib *ctx;

-    if (!hmac) {
-        return;
-    }
-
    ctx = hmac->opaque;
    g_hmac_unref(ctx->ghmac);

    g_free(ctx);
-    g_free(hmac);
 }

-int qcrypto_hmac_bytesv(QCryptoHmac *hmac,
-                        const struct iovec *iov,
-                        size_t niov,
-                        uint8_t **result,
-                        size_t *resultlen,
-                        Error **errp)
+static int
+qcrypto_glib_hmac_bytesv(QCryptoHmac *hmac,
+                         const struct iovec *iov,
+                         size_t niov,
+                         uint8_t **result,
+                         size_t *resultlen,
+                         Error **errp)
 {
    QCryptoHmacGlib *ctx;
    int i, ret;
@@ -141,26 +133,33 @@ bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg)
    return false;
 }

-QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
-                              const uint8_t *key, size_t nkey,
-                              Error **errp)
+void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                           const uint8_t *key, size_t nkey,
+                           Error **errp)
 {
    return NULL;
 }

-void qcrypto_hmac_free(QCryptoHmac *hmac)
+static void
+qcrypto_glib_hmac_ctx_free(QCryptoHmac *hmac)
 {
    return;
 }

-int qcrypto_hmac_bytesv(QCryptoHmac *hmac,
-                        const struct iovec *iov,
-                        size_t niov,
-                        uint8_t **result,
-                        size_t *resultlen,
-                        Error **errp)
+static int
+qcrypto_glib_hmac_bytesv(QCryptoHmac *hmac,
+                         const struct iovec *iov,
+                         size_t niov,
+                         uint8_t **result,
+                         size_t *resultlen,
+                         Error **errp)
 {
    return -1;
 }

 #endif
+
+QCryptoHmacDriver qcrypto_hmac_lib_driver = {
+    .hmac_bytesv = qcrypto_glib_hmac_bytesv,
+    .hmac_free = qcrypto_glib_hmac_ctx_free,
+};
--- a/crypto/hmac-nettle.c
+++ b/crypto/hmac-nettle.c
@@ -15,6 +15,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hmac.h"
+#include "hmacpriv.h"
 #include <nettle/hmac.h>

 typedef void (*qcrypto_nettle_hmac_setkey)(void *ctx,
@@ -97,11 +98,10 @@ bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg)
    return false;
 }

-QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
-                              const uint8_t *key, size_t nkey,
-                              Error **errp)
+void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                           const uint8_t *key, size_t nkey,
+                           Error **errp)
 {
-    QCryptoHmac *hmac;
    QCryptoHmacNettle *ctx;

    if (!qcrypto_hmac_supports(alg)) {
@@ -110,38 +110,29 @@ QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
        return NULL;
    }

-    hmac = g_new0(QCryptoHmac, 1);
-    hmac->alg = alg;
-
    ctx = g_new0(QCryptoHmacNettle, 1);

    qcrypto_hmac_alg_map[alg].setkey(&ctx->u, nkey, key);

-    hmac->opaque = ctx;
-
-    return hmac;
+    return ctx;
 }

-void qcrypto_hmac_free(QCryptoHmac *hmac)
+static void
+qcrypto_nettle_hmac_ctx_free(QCryptoHmac *hmac)
 {
    QCryptoHmacNettle *ctx;

-    if (!hmac) {
-        return;
-    }
-
    ctx = hmac->opaque;
-
    g_free(ctx);
-    g_free(hmac);
 }

-int qcrypto_hmac_bytesv(QCryptoHmac *hmac,
-                        const struct iovec *iov,
-                        size_t niov,
-                        uint8_t **result,
-                        size_t *resultlen,
-                        Error **errp)
+static int
+qcrypto_nettle_hmac_bytesv(QCryptoHmac *hmac,
+                           const struct iovec *iov,
+                           size_t niov,
+                           uint8_t **result,
+                           size_t *resultlen,
+                           Error **errp)
 {
    QCryptoHmacNettle *ctx;
    int i;
@@ -173,3 +164,8 @@ int qcrypto_hmac_bytesv(QCryptoHmac *hmac,

    return 0;
 }
+
+QCryptoHmacDriver qcrypto_hmac_lib_driver = {
+    .hmac_bytesv = qcrypto_nettle_hmac_bytesv,
+    .hmac_free = qcrypto_nettle_hmac_ctx_free,
+};
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -12,9 +12,22 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hmac.h"
+#include "hmacpriv.h"

 static const char hex[] = "0123456789abcdef";

+int qcrypto_hmac_bytesv(QCryptoHmac *hmac,
+                        const struct iovec *iov,
+                        size_t niov,
+                        uint8_t **result,
+                        size_t *resultlen,
+                        Error **errp)
+{
+    QCryptoHmacDriver *drv = hmac->driver;
+
+    return drv->hmac_bytesv(hmac, iov, niov, result, resultlen, errp);
+}
+
 int qcrypto_hmac_bytes(QCryptoHmac *hmac,
                       const char *buf,
                       size_t len,
@@ -70,3 +83,48 @@ int qcrypto_hmac_digest(QCryptoHmac *hmac,

    return qcrypto_hmac_digestv(hmac, &iov, 1, digest, errp);
 }
+
+QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
+                              const uint8_t *key, size_t nkey,
+                              Error **errp)
+{
+    QCryptoHmac *hmac;
+    void *ctx = NULL;
+    Error *err2 = NULL;
+    QCryptoHmacDriver *drv = NULL;
+
+#ifdef CONFIG_AF_ALG
+    ctx = qcrypto_afalg_hmac_ctx_new(alg, key, nkey, &err2);
+    if (ctx) {
+        drv = &qcrypto_hmac_afalg_driver;
+    }
+#endif
+
+    if (!ctx) {
+        ctx = qcrypto_hmac_ctx_new(alg, key, nkey, errp);
+        if (!ctx) {
+            return NULL;
+        }
+
+        drv = &qcrypto_hmac_lib_driver;
+        error_free(err2);
+    }
+
+    hmac = g_new0(QCryptoHmac, 1);
+    hmac->alg = alg;
+    hmac->opaque = ctx;
+    hmac->driver = (void *)drv;
+
+    return hmac;
+}
+
+void qcrypto_hmac_free(QCryptoHmac *hmac)
+{
+    QCryptoHmacDriver *drv;
+
+    if (hmac) {
+        drv = hmac->driver;
+        drv->hmac_free(hmac);
+        g_free(hmac);
+    }
+}
--- a/crypto/hmacpriv.h
+++ b/crypto/hmacpriv.h
@@ -0,0 +1,48 @@
+/*
+ * QEMU Crypto hmac driver supports
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Longpeng(Mike) <longpeng2@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ *
+ */
+
+#ifndef QCRYPTO_HMACPRIV_H
+#define QCRYPTO_HMACPRIV_H
+
+typedef struct QCryptoHmacDriver QCryptoHmacDriver;
+
+struct QCryptoHmacDriver {
+    int (*hmac_bytesv)(QCryptoHmac *hmac,
+                       const struct iovec *iov,
+                       size_t niov,
+                       uint8_t **result,
+                       size_t *resultlen,
+                       Error **errp);
+
+    void (*hmac_free)(QCryptoHmac *hmac);
+};
+
+extern void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                                  const uint8_t *key, size_t nkey,
+                                  Error **errp);
+extern QCryptoHmacDriver qcrypto_hmac_lib_driver;
+
+#ifdef CONFIG_AF_ALG
+
+#include "afalgpriv.h"
+
+extern QCryptoAFAlg *
+qcrypto_afalg_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                           const uint8_t *key, size_t nkey,
+                           Error **errp);
+extern QCryptoHmacDriver qcrypto_hmac_afalg_driver;
+
+#endif
+
+#endif
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -83,6 +83,7 @@ CONFIG_ONENAND=y
 CONFIG_TUSB6010=y
 CONFIG_IMX=y
 CONFIG_MAINSTONE=y
+CONFIG_MPS2=y
 CONFIG_NSERIES=y
 CONFIG_RASPI=y
 CONFIG_REALVIEW=y
@@ -95,6 +96,11 @@ CONFIG_STM32F2XX_ADC=y
 CONFIG_STM32F2XX_SPI=y
 CONFIG_STM32F205_SOC=y

+CONFIG_CMSDK_APB_TIMER=y
+CONFIG_CMSDK_APB_UART=y
+
+CONFIG_MPS2_SCC=y
+
 CONFIG_VERSATILE_PCI=y
 CONFIG_VERSATILE_I2C=y

--- a/docs/colo-proxy.txt
+++ b/docs/colo-proxy.txt
@@ -182,6 +182,32 @@ Secondary(ip:3.3.3.8):
 -chardev socket,id=red1,host=3.3.3.3,port=9004
 -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
 -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
+-object filter-rewriter,id=f3,netdev=hn0,queue=all
+
+If you want to use virtio-net-pci or other driver with vnet_header:
+
+Primary(ip:3.3.3.3):
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
+-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
+-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
+-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
+-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
+-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
+-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
+-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
+-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
+-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out,vnet_hdr_support
+-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0,vnet_hdr_support
+-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
+
+Secondary(ip:3.3.3.8):
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
+-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
+-chardev socket,id=red0,host=3.3.3.3,port=9003
+-chardev socket,id=red1,host=3.3.3.3,port=9004
+-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0,vnet_hdr_support
+-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1,vnet_hdr_support
+-object filter-rewriter,id=f3,netdev=hn0,queue=all,vnet_hdr_support

 Note:
  a.COLO-proxy must work with COLO-frame and Block-replication.
--- a/docs/devel/bitmaps.md
+++ b/docs/devel/bitmaps.md
@@ -1,505 +0,0 @@
-<!--
-Copyright 2015 John Snow <jsnow@redhat.com> and Red Hat, Inc.
-All rights reserved.
-
-This file is licensed via The FreeBSD Documentation License, the full text of
-which is included at the end of this document.
-->
-
-# Dirty Bitmaps and Incremental Backup
-
-* Dirty Bitmaps are objects that track which data needs to be backed up for the
-  next incremental backup.
-
-* Dirty bitmaps can be created at any time and attached to any node
-  (not just complete drives.)
-
-## Dirty Bitmap Names
-
-* A dirty bitmap's name is unique to the node, but bitmaps attached to different
-  nodes can share the same name.
-
-* Dirty bitmaps created for internal use by QEMU may be anonymous and have no
-  name, but any user-created bitmaps may not be. There can be any number of
-  anonymous bitmaps per node.
-
-* The name of a user-created bitmap must not be empty ("").
-
-## Bitmap Modes
-
-* A Bitmap can be "frozen," which means that it is currently in-use by a backup
-  operation and cannot be deleted, renamed, written to, reset,
-  etc.
-
-* The normal operating mode for a bitmap is "active."
-
-## Basic QMP Usage
-
-### Supported Commands ###
-
-* block-dirty-bitmap-add
-* block-dirty-bitmap-remove
-* block-dirty-bitmap-clear
-
-### Creation
-
-* To create a new bitmap, enabled, on the drive with id=drive0:
-
-```json
-{ "execute": "block-dirty-bitmap-add",
-  "arguments": {
-    "node": "drive0",
-    "name": "bitmap0"
-  }
-}
-```
-
-* This bitmap will have a default granularity that matches the cluster size of
-  its associated drive, if available, clamped to between [4KiB, 64KiB].
-  The current default for qcow2 is 64KiB.
-
-* To create a new bitmap that tracks changes in 32KiB segments:
-
-```json
-{ "execute": "block-dirty-bitmap-add",
-  "arguments": {
-    "node": "drive0",
-    "name": "bitmap0",
-    "granularity": 32768
-  }
-}
-```
-
-### Deletion
-
-* Bitmaps that are frozen cannot be deleted.
-
-* Deleting the bitmap does not impact any other bitmaps attached to the same
-  node, nor does it affect any backups already created from this node.
-
-* Because bitmaps are only unique to the node to which they are attached,
-  you must specify the node/drive name here, too.
-
-```json
-{ "execute": "block-dirty-bitmap-remove",
-  "arguments": {
-    "node": "drive0",
-    "name": "bitmap0"
-  }
-}
-```
-
-### Resetting
-
-* Resetting a bitmap will clear all information it holds.
-
-* An incremental backup created from an empty bitmap will copy no data,
-  as if nothing has changed.
-
-```json
-{ "execute": "block-dirty-bitmap-clear",
-  "arguments": {
-    "node": "drive0",
-    "name": "bitmap0"
-  }
-}
-```
-
-## Transactions
-
-### Justification
-
-Bitmaps can be safely modified when the VM is paused or halted by using
-the basic QMP commands. For instance, you might perform the following actions:
-
-1. Boot the VM in a paused state.
-2. Create a full drive backup of drive0.
-3. Create a new bitmap attached to drive0.
-4. Resume execution of the VM.
-5. Incremental backups are ready to be created.
-
-At this point, the bitmap and drive backup would be correctly in sync,
-and incremental backups made from this point forward would be correctly aligned
-to the full drive backup.
-
-This is not particularly useful if we decide we want to start incremental
-backups after the VM has been running for a while, for which we will need to
-perform actions such as the following:
-
-1. Boot the VM and begin execution.
-2. Using a single transaction, perform the following operations:
-    * Create bitmap0.
-    * Create a full drive backup of drive0.
-3. Incremental backups are now ready to be created.
-
-### Supported Bitmap Transactions
-
-* block-dirty-bitmap-add
-* block-dirty-bitmap-clear
-
-The usages are identical to their respective QMP commands, but see below
-for examples.
-
-### Example: New Incremental Backup
-
-As outlined in the justification, perhaps we want to create a new incremental
-backup chain attached to a drive.
-
-```json
-{ "execute": "transaction",
-  "arguments": {
-    "actions": [
-      {"type": "block-dirty-bitmap-add",
-       "data": {"node": "drive0", "name": "bitmap0"} },
-      {"type": "drive-backup",
-       "data": {"device": "drive0", "target": "/path/to/full_backup.img",
-                "sync": "full", "format": "qcow2"} }
-    ]
-  }
-}
-```
-
-### Example: New Incremental Backup Anchor Point
-
-Maybe we just want to create a new full backup with an existing bitmap and
-want to reset the bitmap to track the new chain.
-
-```json
-{ "execute": "transaction",
-  "arguments": {
-    "actions": [
-      {"type": "block-dirty-bitmap-clear",
-       "data": {"node": "drive0", "name": "bitmap0"} },
-      {"type": "drive-backup",
-       "data": {"device": "drive0", "target": "/path/to/new_full_backup.img",
-                "sync": "full", "format": "qcow2"} }
-    ]
-  }
-}
-```
-
-## Incremental Backups
-
-The star of the show.
-
-**Nota Bene!** Only incremental backups of entire drives are supported for now.
-So despite the fact that you can attach a bitmap to any arbitrary node, they are
-only currently useful when attached to the root node. This is because
-drive-backup only supports drives/devices instead of arbitrary nodes.
-
-### Example: First Incremental Backup
-
-1. Create a full backup and sync it to the dirty bitmap, as in the transactional
-examples above; or with the VM offline, manually create a full copy and then
-create a new bitmap before the VM begins execution.
-
-    * Let's assume the full backup is named 'full_backup.img'.
-    * Let's assume the bitmap you created is 'bitmap0' attached to 'drive0'.
-
-2. Create a destination image for the incremental backup that utilizes the
-full backup as a backing image.
-
-    * Let's assume it is named 'incremental.0.img'.
-
-    ```sh
-    # qemu-img create -f qcow2 incremental.0.img -b full_backup.img -F qcow2
-    ```
-
-3. Issue the incremental backup command:
-
-    ```json
-    { "execute": "drive-backup",
-      "arguments": {
-        "device": "drive0",
-        "bitmap": "bitmap0",
-        "target": "incremental.0.img",
-        "format": "qcow2",
-        "sync": "incremental",
-        "mode": "existing"
-      }
-    }
-    ```
-
-### Example: Second Incremental Backup
-
-1. Create a new destination image for the incremental backup that points to the
-   previous one, e.g.: 'incremental.1.img'
-
-    ```sh
-    # qemu-img create -f qcow2 incremental.1.img -b incremental.0.img -F qcow2
-    ```
-
-2. Issue a new incremental backup command. The only difference here is that we
-   have changed the target image below.
-
-    ```json
-    { "execute": "drive-backup",
-      "arguments": {
-        "device": "drive0",
-        "bitmap": "bitmap0",
-        "target": "incremental.1.img",
-        "format": "qcow2",
-        "sync": "incremental",
-        "mode": "existing"
-      }
-    }
-    ```
-
-## Errors
-
-* In the event of an error that occurs after a backup job is successfully
-  launched, either by a direct QMP command or a QMP transaction, the user
-  will receive a BLOCK_JOB_COMPLETE event with a failure message, accompanied
-  by a BLOCK_JOB_ERROR event.
-
-* In the case of an event being cancelled, the user will receive a
-  BLOCK_JOB_CANCELLED event instead of a pair of COMPLETE and ERROR events.
-
-* In either case, the incremental backup data contained within the bitmap is
-  safely rolled back, and the data within the bitmap is not lost. The image
-  file created for the failed attempt can be safely deleted.
-
-* Once the underlying problem is fixed (e.g. more storage space is freed up),
-  you can simply retry the incremental backup command with the same bitmap.
-
-### Example
-
-1. Create a target image:
-
-    ```sh
-    # qemu-img create -f qcow2 incremental.0.img -b full_backup.img -F qcow2
-    ```
-
-2. Attempt to create an incremental backup via QMP:
-
-    ```json
-    { "execute": "drive-backup",
-      "arguments": {
-        "device": "drive0",
-        "bitmap": "bitmap0",
-        "target": "incremental.0.img",
-        "format": "qcow2",
-        "sync": "incremental",
-        "mode": "existing"
-      }
-    }
-    ```
-
-3. Receive an event notifying us of failure:
-
-    ```json
-    { "timestamp": { "seconds": 1424709442, "microseconds": 844524 },
-      "data": { "speed": 0, "offset": 0, "len": 67108864,
-                "error": "No space left on device",
-                "device": "drive1", "type": "backup" },
-      "event": "BLOCK_JOB_COMPLETED" }
-    ```
-
-4. Delete the failed incremental, and re-create the image.
-
-    ```sh
-    # rm incremental.0.img
-    # qemu-img create -f qcow2 incremental.0.img -b full_backup.img -F qcow2
-    ```
-
-5. Retry the command after fixing the underlying problem,
-   such as freeing up space on the backup volume:
-
-    ```json
-    { "execute": "drive-backup",
-      "arguments": {
-        "device": "drive0",
-        "bitmap": "bitmap0",
-        "target": "incremental.0.img",
-        "format": "qcow2",
-        "sync": "incremental",
-        "mode": "existing"
-      }
-    }
-    ```
-
-6. Receive confirmation that the job completed successfully:
-
-    ```json
-    { "timestamp": { "seconds": 1424709668, "microseconds": 526525 },
-      "data": { "device": "drive1", "type": "backup",
-                "speed": 0, "len": 67108864, "offset": 67108864},
-      "event": "BLOCK_JOB_COMPLETED" }
-    ```
-
-### Partial Transactional Failures
-
-* Sometimes, a transaction will succeed in launching and return success,
-  but then later the backup jobs themselves may fail. It is possible that
-  a management application may have to deal with a partial backup failure
-  after a successful transaction.
-
-* If multiple backup jobs are specified in a single transaction, when one of
-  them fails, it will not interact with the other backup jobs in any way.
-
-* The job(s) that succeeded will clear the dirty bitmap associated with the
-  operation, but the job(s) that failed will not. It is not "safe" to delete
-  any incremental backups that were created successfully in this scenario,
-  even though others failed.
-
-#### Example
-
-* QMP example highlighting two backup jobs:
-
-    ```json
-    { "execute": "transaction",
-      "arguments": {
-        "actions": [
-          { "type": "drive-backup",
-            "data": { "device": "drive0", "bitmap": "bitmap0",
-                      "format": "qcow2", "mode": "existing",
-                      "sync": "incremental", "target": "d0-incr-1.qcow2" } },
-          { "type": "drive-backup",
-            "data": { "device": "drive1", "bitmap": "bitmap1",
-                      "format": "qcow2", "mode": "existing",
-                      "sync": "incremental", "target": "d1-incr-1.qcow2" } },
-        ]
-      }
-    }
-    ```
-
-* QMP example response, highlighting one success and one failure:
-    * Acknowledgement that the Transaction was accepted and jobs were launched:
-        ```json
-        { "return": {} }
-        ```
-
-    * Later, QEMU sends notice that the first job was completed:
-        ```json
-        { "timestamp": { "seconds": 1447192343, "microseconds": 615698 },
-          "data": { "device": "drive0", "type": "backup",
-                     "speed": 0, "len": 67108864, "offset": 67108864 },
-          "event": "BLOCK_JOB_COMPLETED"
-        }
-        ```
-
-    * Later yet, QEMU sends notice that the second job has failed:
-        ```json
-        { "timestamp": { "seconds": 1447192399, "microseconds": 683015 },
-          "data": { "device": "drive1", "action": "report",
-                    "operation": "read" },
-          "event": "BLOCK_JOB_ERROR" }
-        ```
-
-        ```json
-        { "timestamp": { "seconds": 1447192399, "microseconds": 685853 },
-          "data": { "speed": 0, "offset": 0, "len": 67108864,
-                    "error": "Input/output error",
-                    "device": "drive1", "type": "backup" },
-          "event": "BLOCK_JOB_COMPLETED" }
-
-* In the above example, "d0-incr-1.qcow2" is valid and must be kept,
-  but "d1-incr-1.qcow2" is invalid and should be deleted. If a VM-wide
-  incremental backup of all drives at a point-in-time is to be made,
-  new backups for both drives will need to be made, taking into account
-  that a new incremental backup for drive0 needs to be based on top of
-  "d0-incr-1.qcow2."
-
-### Grouped Completion Mode
-
-* While jobs launched by transactions normally complete or fail on their own,
-  it is possible to instruct them to complete or fail together as a group.
-
-* QMP transactions take an optional properties structure that can affect
-  the semantics of the transaction.
-
-* The "completion-mode" transaction property can be either "individual"
-  which is the default, legacy behavior described above, or "grouped,"
-  a new behavior detailed below.
-
-* Delayed Completion: In grouped completion mode, no jobs will report
-  success until all jobs are ready to report success.
-
-* Grouped failure: If any job fails in grouped completion mode, all remaining
-  jobs will be cancelled. Any incremental backups will restore their dirty
-  bitmap objects as if no backup command was ever issued.
-
-    * Regardless of if QEMU reports a particular incremental backup job as
-      CANCELLED or as an ERROR, the in-memory bitmap will be restored.
-
-#### Example
-
-* Here's the same example scenario from above with the new property:
-
-    ```json
-    { "execute": "transaction",
-      "arguments": {
-        "actions": [
-          { "type": "drive-backup",
-            "data": { "device": "drive0", "bitmap": "bitmap0",
-                      "format": "qcow2", "mode": "existing",
-                      "sync": "incremental", "target": "d0-incr-1.qcow2" } },
-          { "type": "drive-backup",
-            "data": { "device": "drive1", "bitmap": "bitmap1",
-                      "format": "qcow2", "mode": "existing",
-                      "sync": "incremental", "target": "d1-incr-1.qcow2" } },
-        ],
-        "properties": {
-          "completion-mode": "grouped"
-        }
-      }
-    }
-    ```
-
-* QMP example response, highlighting a failure for drive2:
-    * Acknowledgement that the Transaction was accepted and jobs were launched:
-        ```json
-        { "return": {} }
-        ```
-
-    * Later, QEMU sends notice that the second job has errored out,
-      but that the first job was also cancelled:
-        ```json
-        { "timestamp": { "seconds": 1447193702, "microseconds": 632377 },
-          "data": { "device": "drive1", "action": "report",
-                    "operation": "read" },
-          "event": "BLOCK_JOB_ERROR" }
-        ```
-
-        ```json
-        { "timestamp": { "seconds": 1447193702, "microseconds": 640074 },
-          "data": { "speed": 0, "offset": 0, "len": 67108864,
-                    "error": "Input/output error",
-                    "device": "drive1", "type": "backup" },
-          "event": "BLOCK_JOB_COMPLETED" }
-        ```
-
-        ```json
-        { "timestamp": { "seconds": 1447193702, "microseconds": 640163 },
-          "data": { "device": "drive0", "type": "backup", "speed": 0,
-                    "len": 67108864, "offset": 16777216 },
-          "event": "BLOCK_JOB_CANCELLED" }
-        ```
-
-<!--
-The FreeBSD Documentation License
-
-Redistribution and use in source (Markdown) and 'compiled' forms (SGML, HTML,
-PDF, PostScript, RTF and so forth) with or without modification, are permitted
-provided that the following conditions are met:
-
-Redistributions of source code (Markdown) must retain the above copyright
-notice, this list of conditions and the following disclaimer of this file
-unmodified.
-
-Redistributions in compiled form (transformed to other DTDs, converted to PDF,
-PostScript, RTF and other formats) must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation and/or
-other materials provided with the distribution.
-
-THIS DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR  PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS  BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
-THIS DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
--- a/docs/devel/memory.txt
+++ b/docs/devel/memory.txt
@@ -91,6 +91,37 @@ one of whose subregions is a low priority "background" region covering
 the whole address range; this is often clearer and is preferred.
 Subregions cannot be added to an alias region.

+Migration
+---------
+
+Where the memory region is backed by host memory (RAM, ROM and
+ROM device memory region types), this host memory needs to be
+copied to the destination on migration. These APIs which allocate
+the host memory for you will also register the memory so it is
+migrated:
+ - memory_region_init_ram()
+ - memory_region_init_rom()
+ - memory_region_init_rom_device()
+
+For most devices and boards this is the correct thing. If you
+have a special case where you need to manage the migration of
+the backing memory yourself, you can call the functions:
+ - memory_region_init_ram_nomigrate()
+ - memory_region_init_rom_nomigrate()
+ - memory_region_init_rom_device_nomigrate()
+which only initialize the MemoryRegion and leave handling
+migration to the caller.
+
+The functions:
+ - memory_region_init_resizeable_ram()
+ - memory_region_init_ram_from_file()
+ - memory_region_init_ram_from_fd()
+ - memory_region_init_ram_ptr()
+ - memory_region_init_ram_device_ptr()
+are for special cases only, and so they do not automatically
+register the backing memory for migration; the caller must
+manage migration if necessary.
+
 Region names
 ------------

--- a/docs/devel/tracing.txt
+++ b/docs/devel/tracing.txt
@@ -14,8 +14,7 @@ for debugging, profiling, and observing execution.

 2. Create a file with the events you want to trace:

-   echo bdrv_aio_readv   > /tmp/events
-   echo bdrv_aio_writev >> /tmp/events
+   echo memory_region_ops_read >/tmp/events

 3. Run the virtual machine to produce a trace file:

--- a/docs/interop/bitmaps.rst
+++ b/docs/interop/bitmaps.rst
@@ -0,0 +1,555 @@
+..
+   Copyright 2015 John Snow <jsnow@redhat.com> and Red Hat, Inc.
+   All rights reserved.
+
+   This file is licensed via The FreeBSD Documentation License, the full
+   text of which is included at the end of this document.
+
+====================================
+Dirty Bitmaps and Incremental Backup
+====================================
+
+-  Dirty Bitmaps are objects that track which data needs to be backed up
+   for the next incremental backup.
+
+-  Dirty bitmaps can be created at any time and attached to any node
+   (not just complete drives).
+
+.. contents::
+
+Dirty Bitmap Names
+------------------
+
+-  A dirty bitmap's name is unique to the node, but bitmaps attached to
+   different nodes can share the same name.
+
+-  Dirty bitmaps created for internal use by QEMU may be anonymous and
+   have no name, but any user-created bitmaps must have a name. There
+   can be any number of anonymous bitmaps per node.
+
+-  The name of a user-created bitmap must not be empty ("").
+
+Bitmap Modes
+------------
+
+-  A bitmap can be "frozen," which means that it is currently in-use by
+   a backup operation and cannot be deleted, renamed, written to, reset,
+   etc.
+
+-  The normal operating mode for a bitmap is "active."
+
+Basic QMP Usage
+---------------
+
+Supported Commands
+~~~~~~~~~~~~~~~~~~
+
+- ``block-dirty-bitmap-add``
+- ``block-dirty-bitmap-remove``
+- ``block-dirty-bitmap-clear``
+
+Creation
+~~~~~~~~
+
+-  To create a new bitmap, enabled, on the drive with id=drive0:
+
+.. code:: json
+
+    { "execute": "block-dirty-bitmap-add",
+      "arguments": {
+        "node": "drive0",
+        "name": "bitmap0"
+      }
+    }
+
+-  This bitmap will have a default granularity that matches the cluster
+   size of its associated drive, if available, clamped to between [4KiB,
+   64KiB]. The current default for qcow2 is 64KiB.
+
+-  To create a new bitmap that tracks changes in 32KiB segments:
+
+.. code:: json
+
+    { "execute": "block-dirty-bitmap-add",
+      "arguments": {
+        "node": "drive0",
+        "name": "bitmap0",
+        "granularity": 32768
+      }
+    }
+
+Deletion
+~~~~~~~~
+
+-  Bitmaps that are frozen cannot be deleted.
+
+-  Deleting the bitmap does not impact any other bitmaps attached to the
+   same node, nor does it affect any backups already created from this
+   node.
+
+-  Because bitmaps are only unique to the node to which they are
+   attached, you must specify the node/drive name here, too.
+
+.. code:: json
+
+    { "execute": "block-dirty-bitmap-remove",
+      "arguments": {
+        "node": "drive0",
+        "name": "bitmap0"
+      }
+    }
+
+Resetting
+~~~~~~~~~
+
+-  Resetting a bitmap will clear all information it holds.
+
+-  An incremental backup created from an empty bitmap will copy no data,
+   as if nothing has changed.
+
+.. code:: json
+
+    { "execute": "block-dirty-bitmap-clear",
+      "arguments": {
+        "node": "drive0",
+        "name": "bitmap0"
+      }
+    }
+
+Transactions
+------------
+
+Justification
+~~~~~~~~~~~~~
+
+Bitmaps can be safely modified when the VM is paused or halted by using
+the basic QMP commands. For instance, you might perform the following
+actions:
+
+1. Boot the VM in a paused state.
+2. Create a full drive backup of drive0.
+3. Create a new bitmap attached to drive0.
+4. Resume execution of the VM.
+5. Incremental backups are ready to be created.
+
+At this point, the bitmap and drive backup would be correctly in sync,
+and incremental backups made from this point forward would be correctly
+aligned to the full drive backup.
+
+This is not particularly useful if we decide we want to start
+incremental backups after the VM has been running for a while, for which
+we will need to perform actions such as the following:
+
+1. Boot the VM and begin execution.
+2. Using a single transaction, perform the following operations:
+
+   -  Create ``bitmap0``.
+   -  Create a full drive backup of ``drive0``.
+
+3. Incremental backups are now ready to be created.
+
+Supported Bitmap Transactions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+-  ``block-dirty-bitmap-add``
+-  ``block-dirty-bitmap-clear``
+
+The usages are identical to their respective QMP commands, but see below
+for examples.
+
+Example: New Incremental Backup
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As outlined in the justification, perhaps we want to create a new
+incremental backup chain attached to a drive.
+
+.. code:: json
+
+    { "execute": "transaction",
+      "arguments": {
+        "actions": [
+          {"type": "block-dirty-bitmap-add",
+           "data": {"node": "drive0", "name": "bitmap0"} },
+          {"type": "drive-backup",
+           "data": {"device": "drive0", "target": "/path/to/full_backup.img",
+                    "sync": "full", "format": "qcow2"} }
+        ]
+      }
+    }
+
+Example: New Incremental Backup Anchor Point
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Maybe we just want to create a new full backup with an existing bitmap
+and want to reset the bitmap to track the new chain.
+
+.. code:: json
+
+    { "execute": "transaction",
+      "arguments": {
+        "actions": [
+          {"type": "block-dirty-bitmap-clear",
+           "data": {"node": "drive0", "name": "bitmap0"} },
+          {"type": "drive-backup",
+           "data": {"device": "drive0", "target": "/path/to/new_full_backup.img",
+                    "sync": "full", "format": "qcow2"} }
+        ]
+      }
+    }
+
+Incremental Backups
+-------------------
+
+The star of the show.
+
+**Nota Bene!** Only incremental backups of entire drives are supported
+for now. So despite the fact that you can attach a bitmap to any
+arbitrary node, they are only currently useful when attached to the root
+node. This is because drive-backup only supports drives/devices instead
+of arbitrary nodes.
+
+Example: First Incremental Backup
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1. Create a full backup and sync it to the dirty bitmap, as in the
+   transactional examples above; or with the VM offline, manually create
+   a full copy and then create a new bitmap before the VM begins
+   execution.
+
+   -  Let's assume the full backup is named ``full_backup.img``.
+   -  Let's assume the bitmap you created is ``bitmap0`` attached to
+      ``drive0``.
+
+2. Create a destination image for the incremental backup that utilizes
+   the full backup as a backing image.
+
+   -  Let's assume the new incremental image is named
+      ``incremental.0.img``.
+
+   .. code:: bash
+
+       $ qemu-img create -f qcow2 incremental.0.img -b full_backup.img -F qcow2
+
+3. Issue the incremental backup command:
+
+   .. code:: json
+
+       { "execute": "drive-backup",
+         "arguments": {
+           "device": "drive0",
+           "bitmap": "bitmap0",
+           "target": "incremental.0.img",
+           "format": "qcow2",
+           "sync": "incremental",
+           "mode": "existing"
+         }
+       }
+
+Example: Second Incremental Backup
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1. Create a new destination image for the incremental backup that points
+   to the previous one, e.g.: ``incremental.1.img``
+
+   .. code:: bash
+
+       $ qemu-img create -f qcow2 incremental.1.img -b incremental.0.img -F qcow2
+
+2. Issue a new incremental backup command. The only difference here is
+   that we have changed the target image below.
+
+   .. code:: json
+
+       { "execute": "drive-backup",
+         "arguments": {
+           "device": "drive0",
+           "bitmap": "bitmap0",
+           "target": "incremental.1.img",
+           "format": "qcow2",
+           "sync": "incremental",
+           "mode": "existing"
+         }
+       }
+
+Errors
+------
+
+-  In the event of an error that occurs after a backup job is
+   successfully launched, either by a direct QMP command or a QMP
+   transaction, the user will receive a ``BLOCK_JOB_COMPLETE`` event with
+   a failure message, accompanied by a ``BLOCK_JOB_ERROR`` event.
+
+-  In the case of an event being cancelled, the user will receive a
+   ``BLOCK_JOB_CANCELLED`` event instead of a pair of COMPLETE and ERROR
+   events.
+
+-  In either case, the incremental backup data contained within the
+   bitmap is safely rolled back, and the data within the bitmap is not
+   lost. The image file created for the failed attempt can be safely
+   deleted.
+
+-  Once the underlying problem is fixed (e.g. more storage space is
+   freed up), you can simply retry the incremental backup command with
+   the same bitmap.
+
+Example
+~~~~~~~
+
+1. Create a target image:
+
+   .. code:: bash
+
+       $ qemu-img create -f qcow2 incremental.0.img -b full_backup.img -F qcow2
+
+2. Attempt to create an incremental backup via QMP:
+
+   .. code:: json
+
+       { "execute": "drive-backup",
+         "arguments": {
+           "device": "drive0",
+           "bitmap": "bitmap0",
+           "target": "incremental.0.img",
+           "format": "qcow2",
+           "sync": "incremental",
+           "mode": "existing"
+         }
+       }
+
+3. Receive an event notifying us of failure:
+
+   .. code:: json
+
+       { "timestamp": { "seconds": 1424709442, "microseconds": 844524 },
+         "data": { "speed": 0, "offset": 0, "len": 67108864,
+                   "error": "No space left on device",
+                   "device": "drive1", "type": "backup" },
+         "event": "BLOCK_JOB_COMPLETED" }
+
+4. Delete the failed incremental, and re-create the image.
+
+   .. code:: bash
+
+       $ rm incremental.0.img
+       $ qemu-img create -f qcow2 incremental.0.img -b full_backup.img -F qcow2
+
+5. Retry the command after fixing the underlying problem, such as
+   freeing up space on the backup volume:
+
+   .. code:: json
+
+       { "execute": "drive-backup",
+         "arguments": {
+           "device": "drive0",
+           "bitmap": "bitmap0",
+           "target": "incremental.0.img",
+           "format": "qcow2",
+           "sync": "incremental",
+           "mode": "existing"
+         }
+       }
+
+6. Receive confirmation that the job completed successfully:
+
+   .. code:: json
+
+       { "timestamp": { "seconds": 1424709668, "microseconds": 526525 },
+         "data": { "device": "drive1", "type": "backup",
+                   "speed": 0, "len": 67108864, "offset": 67108864},
+         "event": "BLOCK_JOB_COMPLETED" }
+
+Partial Transactional Failures
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+-  Sometimes, a transaction will succeed in launching and return
+   success, but then later the backup jobs themselves may fail. It is
+   possible that a management application may have to deal with a
+   partial backup failure after a successful transaction.
+
+-  If multiple backup jobs are specified in a single transaction, when
+   one of them fails, it will not interact with the other backup jobs in
+   any way.
+
+-  The job(s) that succeeded will clear the dirty bitmap associated with
+   the operation, but the job(s) that failed will not. It is not "safe"
+   to delete any incremental backups that were created successfully in
+   this scenario, even though others failed.
+
+Example
+^^^^^^^
+
+-  QMP example highlighting two backup jobs:
+
+   .. code:: json
+
+       { "execute": "transaction",
+         "arguments": {
+           "actions": [
+             { "type": "drive-backup",
+               "data": { "device": "drive0", "bitmap": "bitmap0",
+                         "format": "qcow2", "mode": "existing",
+                         "sync": "incremental", "target": "d0-incr-1.qcow2" } },
+             { "type": "drive-backup",
+               "data": { "device": "drive1", "bitmap": "bitmap1",
+                         "format": "qcow2", "mode": "existing",
+                         "sync": "incremental", "target": "d1-incr-1.qcow2" } },
+           ]
+         }
+       }
+
+-  QMP example response, highlighting one success and one failure:
+
+   -  Acknowledgement that the Transaction was accepted and jobs were
+      launched:
+
+      .. code:: json
+
+          { "return": {} }
+
+   -  Later, QEMU sends notice that the first job was completed:
+
+      .. code:: json
+
+          { "timestamp": { "seconds": 1447192343, "microseconds": 615698 },
+            "data": { "device": "drive0", "type": "backup",
+                       "speed": 0, "len": 67108864, "offset": 67108864 },
+            "event": "BLOCK_JOB_COMPLETED"
+          }
+
+   -  Later yet, QEMU sends notice that the second job has failed:
+
+      .. code:: json
+
+          { "timestamp": { "seconds": 1447192399, "microseconds": 683015 },
+            "data": { "device": "drive1", "action": "report",
+                      "operation": "read" },
+            "event": "BLOCK_JOB_ERROR" }
+
+      .. code:: json
+
+          { "timestamp": { "seconds": 1447192399, "microseconds":
+          685853 }, "data": { "speed": 0, "offset": 0, "len": 67108864,
+          "error": "Input/output error", "device": "drive1", "type":
+          "backup" }, "event": "BLOCK_JOB_COMPLETED" }
+
+-  In the above example, ``d0-incr-1.qcow2`` is valid and must be kept,
+   but ``d1-incr-1.qcow2`` is invalid and should be deleted. If a VM-wide
+   incremental backup of all drives at a point-in-time is to be made,
+   new backups for both drives will need to be made, taking into account
+   that a new incremental backup for drive0 needs to be based on top of
+   ``d0-incr-1.qcow2``.
+
+Grouped Completion Mode
+~~~~~~~~~~~~~~~~~~~~~~~
+
+-  While jobs launched by transactions normally complete or fail on
+   their own, it is possible to instruct them to complete or fail
+   together as a group.
+
+-  QMP transactions take an optional properties structure that can
+   affect the semantics of the transaction.
+
+-  The "completion-mode" transaction property can be either "individual"
+   which is the default, legacy behavior described above, or "grouped,"
+   a new behavior detailed below.
+
+-  Delayed Completion: In grouped completion mode, no jobs will report
+   success until all jobs are ready to report success.
+
+-  Grouped failure: If any job fails in grouped completion mode, all
+   remaining jobs will be cancelled. Any incremental backups will
+   restore their dirty bitmap objects as if no backup command was ever
+   issued.
+
+   -  Regardless of if QEMU reports a particular incremental backup job
+      as CANCELLED or as an ERROR, the in-memory bitmap will be
+      restored.
+
+Example
+^^^^^^^
+
+-  Here's the same example scenario from above with the new property:
+
+   .. code:: json
+
+       { "execute": "transaction",
+         "arguments": {
+           "actions": [
+             { "type": "drive-backup",
+               "data": { "device": "drive0", "bitmap": "bitmap0",
+                         "format": "qcow2", "mode": "existing",
+                         "sync": "incremental", "target": "d0-incr-1.qcow2" } },
+             { "type": "drive-backup",
+               "data": { "device": "drive1", "bitmap": "bitmap1",
+                         "format": "qcow2", "mode": "existing",
+                         "sync": "incremental", "target": "d1-incr-1.qcow2" } },
+           ],
+           "properties": {
+             "completion-mode": "grouped"
+           }
+         }
+       }
+
+-  QMP example response, highlighting a failure for ``drive2``:
+
+   -  Acknowledgement that the Transaction was accepted and jobs were
+      launched:
+
+      .. code:: json
+
+          { "return": {} }
+
+   -  Later, QEMU sends notice that the second job has errored out, but
+      that the first job was also cancelled:
+
+      .. code:: json
+
+          { "timestamp": { "seconds": 1447193702, "microseconds": 632377 },
+            "data": { "device": "drive1", "action": "report",
+                      "operation": "read" },
+            "event": "BLOCK_JOB_ERROR" }
+
+      .. code:: json
+
+          { "timestamp": { "seconds": 1447193702, "microseconds": 640074 },
+            "data": { "speed": 0, "offset": 0, "len": 67108864,
+                      "error": "Input/output error",
+                      "device": "drive1", "type": "backup" },
+            "event": "BLOCK_JOB_COMPLETED" }
+
+      .. code:: json
+
+          { "timestamp": { "seconds": 1447193702, "microseconds": 640163 },
+            "data": { "device": "drive0", "type": "backup", "speed": 0,
+                      "len": 67108864, "offset": 16777216 },
+            "event": "BLOCK_JOB_CANCELLED" }
+
+.. raw:: html
+
+   <!--
+   The FreeBSD Documentation License
+
+   Redistribution and use in source (Markdown) and 'compiled' forms (SGML, HTML,
+   PDF, PostScript, RTF and so forth) with or without modification, are permitted
+   provided that the following conditions are met:
+
+   Redistributions of source code (Markdown) must retain the above copyright
+   notice, this list of conditions and the following disclaimer of this file
+   unmodified.
+
+   Redistributions in compiled form (transformed to other DTDs, converted to PDF,
+   PostScript, RTF and other formats) must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation and/or
+   other materials provided with the distribution.
+
+   THIS DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR  PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS  BE LIABLE
+   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+   THIS DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+   -->
--- a/docs/interop/live-block-operations.rst
+++ b/docs/interop/live-block-operations.rst
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -45,6 +45,7 @@ The first cluster of a qcow2 image contains the file header:
         32 - 35:   crypt_method
                    0 for no encryption
                    1 for AES encryption
+                    2 for LUKS encryption

         36 - 39:   l1_size
                    Number of entries in the active L1 table
@@ -135,6 +136,7 @@ be stored. Each extension has a structure like the following:
                        0xE2792ACA - Backing file format name
                        0x6803f857 - Feature name table
                        0x23852875 - Bitmaps extension
+                        0x0537be77 - Full disk encryption header pointer
                        other      - Unknown header extension, can be safely
                                     ignored

@@ -201,12 +203,113 @@ The fields of the bitmaps extension are:

          8 - 15:  bitmap_directory_size
                   Size of the bitmap directory in bytes. It is the cumulative
-                   size of all (nb_bitmaps) bitmap headers.
+                   size of all (nb_bitmaps) bitmap directory entries.

         16 - 23:  bitmap_directory_offset
                   Offset into the image file at which the bitmap directory
                   starts. Must be aligned to a cluster boundary.

+== Full disk encryption header pointer ==
+
+The full disk encryption header must be present if, and only if, the
+'crypt_method' header requires metadata. Currently this is only true
+of the 'LUKS' crypt method. The header extension must be absent for
+other methods.
+
+This header provides the offset at which the crypt method can store
+its additional data, as well as the length of such data.
+
+    Byte  0 -  7:   Offset into the image file at which the encryption
+                    header starts in bytes. Must be aligned to a cluster
+                    boundary.
+    Byte  8 - 15:   Length of the written encryption header in bytes.
+                    Note actual space allocated in the qcow2 file may
+                    be larger than this value, since it will be rounded
+                    to the nearest multiple of the cluster size. Any
+                    unused bytes in the allocated space will be initialized
+                    to 0.
+
+For the LUKS crypt method, the encryption header works as follows.
+
+The first 592 bytes of the header clusters will contain the LUKS
+partition header. This is then followed by the key material data areas.
+The size of the key material data areas is determined by the number of
+stripes in the key slot and key size. Refer to the LUKS format
+specification ('docs/on-disk-format.pdf' in the cryptsetup source
+package) for details of the LUKS partition header format.
+
+In the LUKS partition header, the "payload-offset" field will be
+calculated as normal for the LUKS spec. ie the size of the LUKS
+header, plus key material regions, plus padding, relative to the
+start of the LUKS header. This offset value is not required to be
+qcow2 cluster aligned. Its value is currently never used in the
+context of qcow2, since the qcow2 file format itself defines where
+the real payload offset is, but none the less a valid payload offset
+should always be present.
+
+In the LUKS key slots header, the "key-material-offset" is relative
+to the start of the LUKS header clusters in the qcow2 container,
+not the start of the qcow2 file.
+
+Logically the layout looks like
+
+  +-----------------------------+
+  | QCow2 header                |
+  | QCow2 header extension X    |
+  | QCow2 header extension FDE  |
+  | QCow2 header extension ...  |
+  | QCow2 header extension Z    |
+  +-----------------------------+
+  | ....other QCow2 tables....  |
+  .                             .
+  .                             .
+  +-----------------------------+
+  | +-------------------------+ |
+  | | LUKS partition header   | |
+  | +-------------------------+ |
+  | | LUKS key material 1     | |
+  | +-------------------------+ |
+  | | LUKS key material 2     | |
+  | +-------------------------+ |
+  | | LUKS key material ...   | |
+  | +-------------------------+ |
+  | | LUKS key material 8     | |
+  | +-------------------------+ |
+  +-----------------------------+
+  | QCow2 cluster payload       |
+  .                             .
+  .                             .
+  .                             .
+  |                             |
+  +-----------------------------+
+
+== Data encryption ==
+
+When an encryption method is requested in the header, the image payload
+data must be encrypted/decrypted on every write/read. The image headers
+and metadata are never encrypted.
+
+The algorithms used for encryption vary depending on the method
+
+ - AES:
+
+   The AES cipher, in CBC mode, with 256 bit keys.
+
+   Initialization vectors generated using plain64 method, with
+   the virtual disk sector as the input tweak.
+
+   This format is no longer supported in QEMU system emulators, due
+   to a number of design flaws affecting its security. It is only
+   supported in the command line tools for the sake of back compatibility
+   and data liberation.
+
+ - LUKS:
+
+   The algorithms are specified in the LUKS header.
+
+   Initialization vectors generated using the method specified
+   in the LUKS header, with the physical disk sector as the
+   input tweak.

 == Host cluster management ==

@@ -426,8 +529,7 @@ Each bitmap saved in the image is described in a bitmap directory entry. The
 bitmap directory is a contiguous area in the image file, whose starting offset
 and length are given by the header extension fields bitmap_directory_offset and
 bitmap_directory_size. The entries of the bitmap directory have variable
-length, depending on the lengths of the bitmap name and extra data. These
-entries are also called bitmap headers.
+length, depending on the lengths of the bitmap name and extra data.

 Structure of a bitmap directory entry:

@@ -472,8 +574,7 @@ Structure of a bitmap directory entry:
             17:    granularity_bits
                    Granularity bits. Valid values: 0 - 63.

-                    Note: Qemu currently doesn't support granularity_bits
-                    greater than 31.
+                    Note: Qemu currently supports only values 9 - 31.

                    Granularity is calculated as
                        granularity = 1 << granularity_bits
--- a/docs/live-block-ops.txt
+++ b/docs/live-block-ops.txt
@@ -1,72 +0,0 @@
-LIVE BLOCK OPERATIONS
-=====================
-
-High level description of live block operations. Note these are not
-supported for use with the raw format at the moment.
-
-Note also that this document is incomplete and it currently only
-covers the 'stream' operation. Other operations supported by QEMU such
-as 'commit', 'mirror' and 'backup' are not described here yet. Please
-refer to the qapi/block-core.json file for an overview of those.
-
-Snapshot live merge
-===================
-
-Given a snapshot chain, described in this document in the following
-format:
-
-[A] <- [B] <- [C] <- [D] <- [E]
-
-Where the rightmost object ([E] in the example) described is the current
-image which the guest OS has write access to. To the left of it is its base
-image, and so on accordingly until the leftmost image, which has no
-base.
-
-The snapshot live merge operation transforms such a chain into a
-smaller one with fewer elements, such as this transformation relative
-to the first example:
-
-[A] <- [E]
-
-Data is copied in the right direction with destination being the
-rightmost image, but any other intermediate image can be specified
-instead. In this example data is copied from [C] into [D], so [D] can
-be backed by [B]:
-
-[A] <- [B] <- [D] <- [E]
-
-The operation is implemented in QEMU through image streaming facilities.
-
-The basic idea is to execute 'block_stream virtio0' while the guest is
-running. Progress can be monitored using 'info block-jobs'. When the
-streaming operation completes it raises a QMP event. 'block_stream'
-copies data from the backing file(s) into the active image. When finished,
-it adjusts the backing file pointer.
-
-The 'base' parameter specifies an image which data need not be
-streamed from. This image will be used as the backing file for the
-destination image when the operation is finished.
-
-In the first example above, the command would be:
-
-(qemu) block_stream virtio0 file-A.img
-
-In order to specify a destination image different from the active
-(rightmost) one we can use its node name instead.
-
-In the second example above, the command would be:
-
-(qemu) block_stream node-D file-B.img
-
-Live block copy
-===============
-
-To copy an in use image to another destination in the filesystem, one
-should create a live snapshot in the desired destination, then stream
-into that image. Example:
-
-(qemu) snapshot_blkdev ide0-hd0 /new-path/disk.img qcow2
-
-(qemu) block_stream ide0-hd0
-
-
--- a/exec.c
+++ b/exec.c
@@ -27,6 +27,7 @@
 #include "exec/target_page.h"
 #include "tcg.h"
 #include "hw/qdev-core.h"
+#include "hw/qdev-properties.h"
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/boards.h"
 #include "hw/xen/xen.h"
@@ -480,19 +481,21 @@ static MemoryRegionSection address_space_do_translate(AddressSpace *as,
 {
    IOMMUTLBEntry iotlb;
    MemoryRegionSection *section;
-    MemoryRegion *mr;
+    IOMMUMemoryRegion *iommu_mr;
+    IOMMUMemoryRegionClass *imrc;

    for (;;) {
        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
        section = address_space_translate_internal(d, addr, &addr, plen, is_mmio);
-        mr = section->mr;

-        if (!mr->iommu_ops) {
+        iommu_mr = memory_region_get_iommu(section->mr);
+        if (!iommu_mr) {
            break;
        }
+        imrc = memory_region_get_iommu_class_nocheck(iommu_mr);

-        iotlb = mr->iommu_ops->translate(mr, addr, is_write ?
-                                         IOMMU_WO : IOMMU_RO);
+        iotlb = imrc->translate(iommu_mr, addr, is_write ?
+                                IOMMU_WO : IOMMU_RO);
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                | (addr & iotlb.addr_mask));
        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
@@ -588,7 +591,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,

    section = address_space_translate_internal(d, addr, xlat, plen, false);

-    assert(!section->mr->iommu_ops);
+    assert(!memory_region_is_iommu(section->mr));
    return section;
 }
 #endif
@@ -735,6 +738,20 @@ void cpu_exec_unrealizefn(CPUState *cpu)
    }
 }

+Property cpu_common_props[] = {
+#ifndef CONFIG_USER_ONLY
+    /* Create a memory property for softmmu CPU object,
+     * so users can wire up its memory. (This can't go in qom/cpu.c
+     * because that file is compiled only once for both user-mode
+     * and system builds.) The default if no link is set up is to use
+     * the system address space.
+     */
+    DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION,
+                     MemoryRegion *),
+#endif
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 void cpu_exec_initfn(CPUState *cpu)
 {
    cpu->as = NULL;
@@ -742,18 +759,6 @@ void cpu_exec_initfn(CPUState *cpu)

 #ifndef CONFIG_USER_ONLY
    cpu->thread_id = qemu_get_thread_id();
-
-    /* This is a softmmu CPU object, so create a property for it
-     * so users can wire up its memory. (This can't go in qom/cpu.c
-     * because that file is compiled only once for both user-mode
-     * and system builds.) The default if no link is set up is to use
-     * the system address space.
-     */
-    object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
-                             (Object **)&cpu->memory,
-                             qdev_prop_allow_set_link_before_realize,
-                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
-                             &error_abort);
    cpu->memory = system_memory;
    object_ref(OBJECT(cpu->memory));
 #endif
@@ -775,15 +780,28 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 #endif
 }

+#if defined(CONFIG_USER_ONLY)
 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 {
-    /* Flush the whole TB as this will not have race conditions
-     * even if we don't have proper locking yet.
-     * Ideally we would just invalidate the TBs for the
-     * specified PC.
-     */
-    tb_flush(cpu);
+    mmap_lock();
+    tb_lock();
+    tb_invalidate_phys_page_range(pc, pc + 1, 0);
+    tb_unlock();
+    mmap_unlock();
 }
+#else
+static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
+{
+    MemTxAttrs attrs;
+    hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
+    int asidx = cpu_asidx_from_attrs(cpu, attrs);
+    if (phys != -1) {
+        /* Locks grabbed by tb_invalidate_phys_addr */
+        tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
+                                phys | (pc & ~TARGET_PAGE_MASK));
+    }
+}
+#endif

 #if defined(CONFIG_USER_ONLY)
 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
@@ -2929,7 +2947,7 @@ static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
            }
        } else {
            /* RAM case */
-            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
+            ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l);
            memcpy(ptr, buf, l);
            invalidate_and_set_dirty(mr, addr1, l);
        }
@@ -3020,7 +3038,7 @@ MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
            }
        } else {
            /* RAM case */
-            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
+            ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l);
            memcpy(buf, ptr, l);
        }

--- a/fsdev/qemu-fsdev-throttle.c
+++ b/fsdev/qemu-fsdev-throttle.c
@@ -86,7 +86,7 @@ void fsdev_throttle_init(FsThrottle *fst)
                             fsdev_throttle_read_timer_cb,
                             fsdev_throttle_write_timer_cb,
                             fst);
-        throttle_config(&fst->ts, &fst->tt, &fst->cfg);
+        throttle_config(&fst->ts, QEMU_CLOCK_REALTIME, &fst->cfg);
        qemu_co_queue_init(&fst->throttled_reqs[0]);
        qemu_co_queue_init(&fst->throttled_reqs[1]);
    }
--- a/gdb-xml/s390-gs.xml
+++ b/gdb-xml/s390-gs.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+<!-- Copyright 2017 IBM Corp.
+
+     This work is licensed under the terms of the GNU GPL, version 2 or
+     (at your option) any later version. See the COPYING file in the
+     top-level directory. -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.s390.gs">
+  <reg name="gs_reserved" bitsize="64" type="uint64" group="system"/>
+  <reg name="gsd" bitsize="64" type="uint64" group="system"/>
+  <reg name="gssm" bitsize="64" type="uint64" group="system"/>
+  <reg name="gsepla" bitsize="64" type="data_ptr" group="system"/>
+</feature>
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -56,6 +56,21 @@ static inline int target_memory_rw_debug(CPUState *cpu, target_ulong addr,
    return cpu_memory_rw_debug(cpu, addr, buf, len, is_write);
 }

+/* Return the GDB index for a given vCPU state.
+ *
+ * For user mode this is simply the thread id. In system mode GDB
+ * numbers CPUs from 1 as 0 is reserved as an "any cpu" index.
+ */
+static inline int cpu_gdb_index(CPUState *cpu)
+{
+#if defined(CONFIG_USER_ONLY)
+    TaskState *ts = (TaskState *) cpu->opaque;
+    return ts->ts_tid;
+#else
+    return cpu->cpu_index + 1;
+#endif
+}
+
 enum {
    GDB_SIGNAL_0 = 0,
    GDB_SIGNAL_INT = 2,
@@ -272,7 +287,20 @@ static int gdb_signal_to_target (int sig)
        return -1;
 }

-//#define DEBUG_GDB
+/* #define DEBUG_GDB */
+
+#ifdef DEBUG_GDB
+# define DEBUG_GDB_GATE 1
+#else
+# define DEBUG_GDB_GATE 0
+#endif
+
+#define gdb_debug(fmt, ...) do { \
+    if (DEBUG_GDB_GATE) { \
+        fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
+    } \
+} while (0)
+

 typedef struct GDBRegisterState {
    int base_reg;
@@ -548,9 +576,7 @@ static int put_packet_binary(GDBState *s, const char *buf, int len)
 /* return -1 if error, 0 if OK */
 static int put_packet(GDBState *s, const char *buf)
 {
-#ifdef DEBUG_GDB
-    printf("reply='%s'\n", buf);
-#endif
+    gdb_debug("reply='%s'\n", buf);

    return put_packet_binary(s, buf, strlen(buf));
 }
@@ -827,7 +853,7 @@ static CPUState *find_cpu(uint32_t thread_id)
    CPUState *cpu;

    CPU_FOREACH(cpu) {
-        if (cpu_index(cpu) == thread_id) {
+        if (cpu_gdb_index(cpu) == thread_id) {
            return cpu;
        }
    }
@@ -912,23 +938,16 @@ static int gdb_handle_vcont(GDBState *s, const char *p)
            if (res) {
                goto out;
            }
-            idx = tmp;
-            /* 0 means any thread, so we pick the first valid CPU */
-            if (!idx) {
-                idx = cpu_index(first_cpu);
-            }

-            /*
-             * If we are in user mode, the thread specified is actually a
-             * thread id, and not an index. We need to find the actual
-             * CPU first, and only then we can use its index.
-             */
-            cpu = find_cpu(idx);
+            /* 0 means any thread, so we pick the first valid CPU */
+            cpu = tmp ? find_cpu(tmp) : first_cpu;
+
            /* invalid CPU/thread specified */
-            if (!idx || !cpu) {
+            if (!cpu) {
                res = -EINVAL;
                goto out;
            }
+
            /* only use if no previous match occourred */
            if (newstates[cpu->cpu_index] == 1) {
                newstates[cpu->cpu_index] = cur_action;
@@ -956,16 +975,16 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
    uint8_t *registers;
    target_ulong addr, len;

-#ifdef DEBUG_GDB
-    printf("command='%s'\n", line_buf);
-#endif
+
+    gdb_debug("command='%s'\n", line_buf);
+
    p = line_buf;
    ch = *p++;
    switch(ch) {
    case '?':
        /* TODO: Make this return the correct value for user-mode.  */
        snprintf(buf, sizeof(buf), "T%02xthread:%02x;", GDB_SIGNAL_TRAP,
-                 cpu_index(s->c_cpu));
+                 cpu_gdb_index(s->c_cpu));
        put_packet(s, buf);
        /* Remove all the breakpoints when this query is issued,
         * because gdb is doing and initial connect and the state
@@ -1233,7 +1252,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
        } else if (strcmp(p,"sThreadInfo") == 0) {
        report_cpuinfo:
            if (s->query_cpu) {
-                snprintf(buf, sizeof(buf), "m%x", cpu_index(s->query_cpu));
+                snprintf(buf, sizeof(buf), "m%x", cpu_gdb_index(s->query_cpu));
                put_packet(s, buf);
                s->query_cpu = CPU_NEXT(s->query_cpu);
            } else
@@ -1390,7 +1409,7 @@ static void gdb_vm_state_change(void *opaque, int running, RunState state)
            }
            snprintf(buf, sizeof(buf),
                     "T%02xthread:%02x;%swatch:" TARGET_FMT_lx ";",
-                     GDB_SIGNAL_TRAP, cpu_index(cpu), type,
+                     GDB_SIGNAL_TRAP, cpu_gdb_index(cpu), type,
                     (target_ulong)cpu->watchpoint_hit->vaddr);
            cpu->watchpoint_hit = NULL;
            goto send_packet;
@@ -1424,7 +1443,7 @@ static void gdb_vm_state_change(void *opaque, int running, RunState state)
        break;
    }
    gdb_set_stop_cpu(cpu);
-    snprintf(buf, sizeof(buf), "T%02xthread:%02x;", ret, cpu_index(cpu));
+    snprintf(buf, sizeof(buf), "T%02xthread:%02x;", ret, cpu_gdb_index(cpu));

 send_packet:
    put_packet(s, buf);
@@ -1519,17 +1538,14 @@ static void gdb_read_byte(GDBState *s, int ch)
        /* Waiting for a response to the last packet.  If we see the start
           of a new command then abandon the previous response.  */
        if (ch == '-') {
-#ifdef DEBUG_GDB
-            printf("Got NACK, retransmitting\n");
-#endif
+            gdb_debug("Got NACK, retransmitting\n");
            put_buffer(s, (uint8_t *)s->last_packet, s->last_packet_len);
+        } else if (ch == '+') {
+            gdb_debug("Got ACK\n");
+        } else {
+            gdb_debug("Got '%c' when expecting ACK/NACK\n", ch);
        }
-#ifdef DEBUG_GDB
-        else if (ch == '+')
-            printf("Got ACK\n");
-        else
-            printf("Got '%c' when expecting ACK/NACK\n", ch);
-#endif
+
        if (ch == '+' || ch == '$')
            s->last_packet_len = 0;
        if (ch != '$')
@@ -1550,9 +1566,7 @@ static void gdb_read_byte(GDBState *s, int ch)
                s->line_sum = 0;
                s->state = RS_GETLINE;
            } else {
-#ifdef DEBUG_GDB
-                printf("gdbstub received garbage between packets: 0x%x\n", ch);
-#endif
+                gdb_debug("received garbage between packets: 0x%x\n", ch);
            }
            break;
        case RS_GETLINE:
@@ -1568,9 +1582,7 @@ static void gdb_read_byte(GDBState *s, int ch)
                /* end of command, start of checksum*/
                s->state = RS_CHKSUM1;
            } else if (s->line_buf_index >= sizeof(s->line_buf) - 1) {
-#ifdef DEBUG_GDB
-                printf("gdbstub command buffer overrun, dropping command\n");
-#endif
+                gdb_debug("command buffer overrun, dropping command\n");
                s->state = RS_IDLE;
            } else {
                /* unescaped command character */
@@ -1584,9 +1596,7 @@ static void gdb_read_byte(GDBState *s, int ch)
                s->state = RS_CHKSUM1;
            } else if (s->line_buf_index >= sizeof(s->line_buf) - 1) {
                /* command buffer overrun */
-#ifdef DEBUG_GDB
-                printf("gdbstub command buffer overrun, dropping command\n");
-#endif
+                gdb_debug("command buffer overrun, dropping command\n");
                s->state = RS_IDLE;
            } else {
                /* parse escaped character and leave escape state */
@@ -1598,25 +1608,18 @@ static void gdb_read_byte(GDBState *s, int ch)
        case RS_GETLINE_RLE:
            if (ch < ' ') {
                /* invalid RLE count encoding */
-#ifdef DEBUG_GDB
-                printf("gdbstub got invalid RLE count: 0x%x\n", ch);
-#endif
+                gdb_debug("got invalid RLE count: 0x%x\n", ch);
                s->state = RS_GETLINE;
            } else {
                /* decode repeat length */
                int repeat = (unsigned char)ch - ' ' + 3;
                if (s->line_buf_index + repeat >= sizeof(s->line_buf) - 1) {
                    /* that many repeats would overrun the command buffer */
-#ifdef DEBUG_GDB
-                    printf("gdbstub command buffer overrun,"
-                           " dropping command\n");
-#endif
+                    gdb_debug("command buffer overrun, dropping command\n");
                    s->state = RS_IDLE;
                } else if (s->line_buf_index < 1) {
                    /* got a repeat but we have nothing to repeat */
-#ifdef DEBUG_GDB
-                    printf("gdbstub got invalid RLE sequence\n");
-#endif
+                    gdb_debug("got invalid RLE sequence\n");
                    s->state = RS_GETLINE;
                } else {
                    /* repeat the last character */
@@ -1631,9 +1634,7 @@ static void gdb_read_byte(GDBState *s, int ch)
        case RS_CHKSUM1:
            /* get high hex digit of checksum */
            if (!isxdigit(ch)) {
-#ifdef DEBUG_GDB
-                printf("gdbstub got invalid command checksum digit\n");
-#endif
+                gdb_debug("got invalid command checksum digit\n");
                s->state = RS_GETLINE;
                break;
            }
@@ -1644,21 +1645,17 @@ static void gdb_read_byte(GDBState *s, int ch)
        case RS_CHKSUM2:
            /* get low hex digit of checksum */
            if (!isxdigit(ch)) {
-#ifdef DEBUG_GDB
-                printf("gdbstub got invalid command checksum digit\n");
-#endif
+                gdb_debug("got invalid command checksum digit\n");
                s->state = RS_GETLINE;
                break;
            }
            s->line_csum |= fromhex(ch);

            if (s->line_csum != (s->line_sum & 0xff)) {
+                gdb_debug("got command packet with incorrect checksum\n");
                /* send NAK reply */
                reply = '-';
                put_buffer(s, &reply, 1);
-#ifdef DEBUG_GDB
-                printf("gdbstub got command packet with incorrect checksum\n");
-#endif
                s->state = RS_IDLE;
            } else {
                /* send ACK reply */
@@ -2003,7 +2000,7 @@ int gdbserver_start(const char *device)
    if (chr) {
        qemu_chr_fe_init(&s->chr, chr, &error_abort);
        qemu_chr_fe_set_handlers(&s->chr, gdb_chr_can_receive, gdb_chr_receive,
-                                 gdb_chr_event, NULL, NULL, true);
+                                 gdb_chr_event, NULL, NULL, NULL, true);
    }
    s->state = chr ? RS_IDLE : RS_INACTIVE;
    s->mon_chr = mon_chr;
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -775,6 +775,22 @@ STEXI
@item info skeys @var{address}
@findex skeys
 Display the value of a storage key (s390 only)
+ETEXI
+
+#if defined(TARGET_S390X)
+    {
+        .name       = "cmma",
+        .args_type  = "addr:l,count:l?",
+        .params     = "address [count]",
+        .help       = "Display the values of the CMMA storage attributes for a range of pages",
+        .cmd        = hmp_info_cmma,
+    },
+#endif
+
+STEXI
+@item info cmma @var{address}
+@findex cmma
+Display the values of the CMMA storage attributes for a range of pages (s390 only)
 ETEXI

    {
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1151,6 +1151,22 @@ STEXI
@item dump-skeys @var{filename}
@findex dump-skeys
 Save guest storage keys to a file.
+ETEXI
+
+#if defined(TARGET_S390X)
+    {
+        .name       = "migration_mode",
+        .args_type  = "mode:i",
+        .params     = "mode",
+        .help       = "Enables or disables migration mode\n",
+        .cmd        = hmp_migrationmode,
+    },
+#endif
+
+STEXI
+@item migration_mode @var{mode}
+@findex migration_mode
+Enables or disables migration mode.
 ETEXI

    {
@@ -1646,6 +1662,8 @@ STEXI
@item block_passwd @var{device} @var{password}
@findex block_passwd
 Set the encrypted device @var{device} password to @var{password}
+
+This command is now obsolete and will always return an error since 2.10
 ETEXI

    {
@@ -1724,7 +1742,23 @@ ETEXI
 STEXI
@item chardev-add args
@findex chardev-add
-chardev_add accepts the same parameters as the -chardev command line switch.
+chardev-add accepts the same parameters as the -chardev command line switch.
+
+ETEXI
+
+    {
+        .name       = "chardev-change",
+        .args_type  = "id:s,args:s",
+        .params     = "id args",
+        .help       = "change chardev",
+        .cmd        = hmp_chardev_change,
+    },
+
+STEXI
+@item chardev-change args
+@findex chardev-change
+chardev-change accepts existing chardev @var{id} and then the same arguments
+as the -chardev command line switch (except for "id").

 ETEXI

--- a/hmp.c
+++ b/hmp.c
@@ -401,16 +401,16 @@ static void print_block_info(Monitor *mon, BlockInfo *info,

    assert(!info || !info->has_inserted || info->inserted == inserted);

-    if (info) {
+    if (info && *info->device) {
        monitor_printf(mon, "%s", info->device);
        if (inserted && inserted->has_node_name) {
            monitor_printf(mon, " (%s)", inserted->node_name);
        }
    } else {
-        assert(inserted);
+        assert(info || inserted);
        monitor_printf(mon, "%s",
-                       inserted->has_node_name
-                       ? inserted->node_name
+                       inserted && inserted->has_node_name ? inserted->node_name
+                       : info && info->has_qdev ? info->qdev
                       : "<anonymous>");
    }

@@ -425,6 +425,9 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
    }

    if (info) {
+        if (info->has_qdev) {
+            monitor_printf(mon, "    Attached to:      %s\n", info->qdev);
+        }
        if (info->has_io_status && info->io_status != BLOCK_DEVICE_IO_STATUS_OK) {
            monitor_printf(mon, "    I/O status:       %s\n",
                           BlockDeviceIoStatus_lookup[info->io_status]);
@@ -600,50 +603,92 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict)
    qapi_free_BlockStatsList(stats_list);
 }

+/* Helper for hmp_info_vnc_clients, _servers */
+static void hmp_info_VncBasicInfo(Monitor *mon, VncBasicInfo *info,
+                                  const char *name)
+{
+    monitor_printf(mon, "  %s: %s:%s (%s%s)\n",
+                   name,
+                   info->host,
+                   info->service,
+                   NetworkAddressFamily_lookup[info->family],
+                   info->websocket ? " (Websocket)" : "");
+}
+
+/* Helper displaying and auth and crypt info */
+static void hmp_info_vnc_authcrypt(Monitor *mon, const char *indent,
+                                   VncPrimaryAuth auth,
+                                   VncVencryptSubAuth *vencrypt)
+{
+    monitor_printf(mon, "%sAuth: %s (Sub: %s)\n", indent,
+                   VncPrimaryAuth_lookup[auth],
+                   vencrypt ? VncVencryptSubAuth_lookup[*vencrypt] : "none");
+}
+
+static void hmp_info_vnc_clients(Monitor *mon, VncClientInfoList *client)
+{
+    while (client) {
+        VncClientInfo *cinfo = client->value;
+
+        hmp_info_VncBasicInfo(mon, qapi_VncClientInfo_base(cinfo), "Client");
+        monitor_printf(mon, "    x509_dname: %s\n",
+                       cinfo->has_x509_dname ?
+                       cinfo->x509_dname : "none");
+        monitor_printf(mon, "    sasl_username: %s\n",
+                       cinfo->has_sasl_username ?
+                       cinfo->sasl_username : "none");
+
+        client = client->next;
+    }
+}
+
+static void hmp_info_vnc_servers(Monitor *mon, VncServerInfo2List *server)
+{
+    while (server) {
+        VncServerInfo2 *sinfo = server->value;
+        hmp_info_VncBasicInfo(mon, qapi_VncServerInfo2_base(sinfo), "Server");
+        hmp_info_vnc_authcrypt(mon, "    ", sinfo->auth,
+                               sinfo->has_vencrypt ? &sinfo->vencrypt : NULL);
+        server = server->next;
+    }
+}
+
 void hmp_info_vnc(Monitor *mon, const QDict *qdict)
 {
-    VncInfo *info;
+    VncInfo2List *info2l;
    Error *err = NULL;
-    VncClientInfoList *client;

-    info = qmp_query_vnc(&err);
+    info2l = qmp_query_vnc_servers(&err);
    if (err) {
        error_report_err(err);
        return;
    }
-
-    if (!info->enabled) {
-        monitor_printf(mon, "Server: disabled\n");
-        goto out;
+    if (!info2l) {
+        monitor_printf(mon, "None\n");
+        return;
    }

-    monitor_printf(mon, "Server:\n");
-    if (info->has_host && info->has_service) {
-        monitor_printf(mon, "     address: %s:%s\n", info->host, info->service);
-    }
-    if (info->has_auth) {
-        monitor_printf(mon, "        auth: %s\n", info->auth);
-    }
-
-    if (!info->has_clients || info->clients == NULL) {
-        monitor_printf(mon, "Client: none\n");
-    } else {
-        for (client = info->clients; client; client = client->next) {
-            monitor_printf(mon, "Client:\n");
-            monitor_printf(mon, "     address: %s:%s\n",
-                           client->value->host,
-                           client->value->service);
-            monitor_printf(mon, "  x509_dname: %s\n",
-                           client->value->x509_dname ?
-                           client->value->x509_dname : "none");
-            monitor_printf(mon, "    username: %s\n",
-                           client->value->has_sasl_username ?
-                           client->value->sasl_username : "none");
+    while (info2l) {
+        VncInfo2 *info = info2l->value;
+        monitor_printf(mon, "%s:\n", info->id);
+        hmp_info_vnc_servers(mon, info->server);
+        hmp_info_vnc_clients(mon, info->clients);
+        if (!info->server) {
+            /* The server entry displays its auth, we only
+             * need to display in the case of 'reverse' connections
+             * where there's no server.
+             */
+            hmp_info_vnc_authcrypt(mon, "  ", info->auth,
+                               info->has_vencrypt ? &info->vencrypt : NULL);
        }
+        if (info->has_display) {
+            monitor_printf(mon, "  Display: %s\n", info->display);
+        }
+        info2l = info2l->next;
    }

-out:
-    qapi_free_VncInfo(info);
+    qapi_free_VncInfo2List(info2l);
+
 }

 #ifdef CONFIG_SPICE
@@ -1088,37 +1133,12 @@ void hmp_ringbuf_read(Monitor *mon, const QDict *qdict)
    g_free(data);
 }

-static void hmp_cont_cb(void *opaque, int err)
-{
-    if (!err) {
-        qmp_cont(NULL);
-    }
-}
-
-static bool key_is_missing(const BlockInfo *bdev)
-{
-    return (bdev->inserted && bdev->inserted->encryption_key_missing);
-}
-
 void hmp_cont(Monitor *mon, const QDict *qdict)
 {
-    BlockInfoList *bdev_list, *bdev;
    Error *err = NULL;

-    bdev_list = qmp_query_block(NULL);
-    for (bdev = bdev_list; bdev; bdev = bdev->next) {
-        if (key_is_missing(bdev->value)) {
-            monitor_read_block_device_key(mon, bdev->value->device,
-                                          hmp_cont_cb, NULL);
-            goto out;
-        }
-    }
-
    qmp_cont(&err);
    hmp_handle_error(mon, &err);
-
-out:
-    qapi_free_BlockInfoList(bdev_list);
 }

 void hmp_system_wakeup(Monitor *mon, const QDict *qdict)
@@ -1741,12 +1761,6 @@ void hmp_change(Monitor *mon, const QDict *qdict)
        qmp_blockdev_change_medium(true, device, false, NULL, target,
                                   !!arg, arg, !!read_only, read_only_mode,
                                   &err);
-        if (err &&
-            error_get_class(err) == ERROR_CLASS_DEVICE_ENCRYPTED) {
-            error_free(err);
-            monitor_read_block_device_key(mon, device, NULL, NULL);
-            return;
-        }
    }

    hmp_handle_error(mon, &err);
@@ -2228,6 +2242,40 @@ void hmp_chardev_add(Monitor *mon, const QDict *qdict)
    hmp_handle_error(mon, &err);
 }

+void hmp_chardev_change(Monitor *mon, const QDict *qdict)
+{
+    const char *args = qdict_get_str(qdict, "args");
+    const char *id;
+    Error *err = NULL;
+    ChardevBackend *backend = NULL;
+    ChardevReturn *ret = NULL;
+    QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("chardev"), args,
+                                             true);
+    if (!opts) {
+        error_setg(&err, "Parsing chardev args failed");
+        goto end;
+    }
+
+    id = qdict_get_str(qdict, "id");
+    if (qemu_opts_id(opts)) {
+        error_setg(&err, "Unexpected 'id' parameter");
+        goto end;
+    }
+
+    backend = qemu_chr_parse_opts(opts, &err);
+    if (!backend) {
+        goto end;
+    }
+
+    ret = qmp_chardev_change(id, backend, &err);
+
+end:
+    qapi_free_ChardevReturn(ret);
+    qapi_free_ChardevBackend(backend);
+    qemu_opts_del(opts);
+    hmp_handle_error(mon, &err);
+}
+
 void hmp_chardev_remove(Monitor *mon, const QDict *qdict)
 {
    Error *local_err = NULL;
--- a/Show More
+++ b/Show More