Update version for v2.6.0-rc4 release

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Revert "acpi: mark PMTIMER as unlocked"
2016-05-02 17:27:01 +01:00 · 2016-05-02 17:19:13 +01:00 · 2016-05-02 13:04:26 +01:00 · 2016-05-01 22:52:47 +01:00 · 2016-05-01 15:42:13 +03:00 · 2016-04-29 12:12:33 +01:00
1218 changed files with 38615 additions and 10731 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -42,83 +42,49 @@ notifications:
 env:
  global:
    - TEST_CMD="make check"
-    - EXTRA_CONFIG=""
  matrix:
-    # Group major targets together with their linux-user counterparts
-    - TARGETS=alpha-softmmu,alpha-linux-user,cris-softmmu,cris-linux-user,m68k-softmmu,m68k-linux-user,microblaze-softmmu,microblazeel-softmmu,microblaze-linux-user,microblazeel-linux-user
-    - TARGETS=arm-softmmu,arm-linux-user,armeb-linux-user,aarch64-softmmu,aarch64-linux-user
-    - TARGETS=i386-softmmu,i386-linux-user,x86_64-softmmu,x86_64-linux-user
-    - TARGETS=mips-softmmu,mips64-softmmu,mips64el-softmmu,mipsel-softmmu,mips-linux-user,mips64-linux-user,mips64el-linux-user,mipsel-linux-user,mipsn32-linux-user,mipsn32el-linux-user
-    - TARGETS=or32-softmmu,or32-linux-user,ppc-softmmu,ppc64-softmmu,ppcemb-softmmu,ppc-linux-user,ppc64-linux-user,ppc64abi32-linux-user,ppc64le-linux-user
-    - TARGETS=s390x-softmmu,s390x-linux-user,sh4-softmmu,sh4eb-softmmu,sh4-linux-user,sh4eb-linux-user,sparc-softmmu,sparc64-softmmu,sparc-linux-user,sparc32plus-linux-user,sparc64-linux-user,unicore32-softmmu,unicore32-linux-user
-    # Group remaining softmmu only targets into one build
-    - TARGETS=lm32-softmmu,moxie-softmmu,tricore-softmmu,xtensa-softmmu,xtensaeb-softmmu
+    - CONFIG=""
+    - CONFIG="--enable-debug --enable-debug-tcg --enable-trace-backends=log"
+    - CONFIG="--disable-linux-aio --disable-cap-ng --disable-attr --disable-brlapi --disable-uuid --disable-libusb"
+    - CONFIG="--enable-modules"
+    - CONFIG="--with-coroutine=ucontext"
+    - CONFIG="--with-coroutine=sigaltstack"
 git:
  # we want to do this ourselves
  submodules: false
 before_install:
+  - if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew update ; fi
+  - if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew install libffi gettext glib pixman ; fi
  - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
  - git submodule update --init --recursive
 before_script:
-  - ./configure --target-list=${TARGETS} --enable-debug-tcg ${EXTRA_CONFIG}
+  - ./configure ${CONFIG}
 script:
-  - make -j2 && ${TEST_CMD}
+  - make -j3 && ${TEST_CMD}
 matrix:
-  # We manually include a number of additional build for non-standard bits
  include:
-    # Debug related options
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--enable-debug"
+    # Sparse is GCC only
+    - env: CONFIG="--enable-sparse"
      compiler: gcc
-    # We currently disable "make check"
-    - env: TARGETS=alpha-softmmu
-           EXTRA_CONFIG="--enable-debug --enable-tcg-interpreter"
+    # gprof/gcov are GCC features
+    - env: CONFIG="--enable-gprof --enable-gcov --disable-pie"
+      compiler: gcc
+    # We manually include builds which we disable "make check" for
+    - env: CONFIG="--enable-debug --enable-tcg-interpreter"
           TEST_CMD=""
      compiler: gcc
-    # Disable a few of the optional features
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--disable-linux-aio --disable-cap-ng --disable-attr --disable-brlapi --disable-uuid --disable-libusb"
-      compiler: gcc
-    # Currently configure doesn't force --disable-pie
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--enable-gprof --enable-gcov --disable-pie"
-      compiler: gcc
-    # Sparse
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--enable-sparse"
-      compiler: gcc
-    # Modules
-    - env: TARGETS=arm-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-modules"
-      compiler: gcc
-    # All the trace backends (apart from dtrace)
-    - env: TARGETS=i386-softmmu
-           EXTRA_CONFIG="--enable-trace-backends=log"
-      compiler: gcc
-    # We currently disable "make check" (until 41fc57e44ed regression fixed)
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--enable-trace-backends=simple"
+    - env: CONFIG="--enable-trace-backends=simple"
           TEST_CMD=""
      compiler: gcc
-    # We currently disable "make check"
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--enable-trace-backends=ftrace"
+    - env: CONFIG="--enable-trace-backends=ftrace"
           TEST_CMD=""
      compiler: gcc
-    # We currently disable "make check"
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--enable-trace-backends=ust"
+    - env: CONFIG="--enable-trace-backends=ust"
           TEST_CMD=""
      compiler: gcc
-    # All the co-routine backends (apart from windows)
-    # We currently disable "make check"
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--with-coroutine=gthread"
+    - env: CONFIG="--with-coroutine=gthread"
           TEST_CMD=""
      compiler: gcc
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--with-coroutine=ucontext"
-      compiler: gcc
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--with-coroutine=sigaltstack"
-      compiler: gcc
+    - env: CONFIG=""
+      os: osx
+      compiler: clang
--- a/39
+++ b/39
@@ -234,6 +234,7 @@ L: kvm@vger.kernel.org
 S: Supported
 F: kvm-*
 F: */kvm.*
+F: include/sysemu/kvm*.h

 ARM
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -277,7 +278,8 @@ Guest CPU Cores (Xen):
 ----------------------

 X86
-M: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
+M: Stefano Stabellini <sstabellini@kernel.org>
+M: Anthony Perard <anthony.perard@citrix.com>
 L: xen-devel@lists.xensource.com
 S: Supported
 F: xen-*
@@ -356,10 +358,7 @@ F: include/hw/timer/a9gtimer.h
 F: include/hw/timer/arm_mptimer.h

 Exynos
-M: Evgeny Voevodin <e.voevodin@samsung.com>
-M: Maksim Kozlov <m.kozlov@samsung.com>
 M: Igor Mitsyanko <i.mitsyanko@gmail.com>
-M: Dmitry Solodkiy <d.solodkiy@samsung.com>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/*/exynos*
@@ -656,12 +655,6 @@ F: hw/*/grlib*

 S390 Machines
 -------------
-S390 Virtio
-M: Alexander Graf <agraf@suse.de>
-S: Maintained
-F: hw/s390x/s390-*.c
-X: hw/s390x/*pci*.[hc]
-
 S390 Virtio-ccw
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Christian Borntraeger <borntraeger@de.ibm.com>
@@ -669,7 +662,6 @@ M: Alexander Graf <agraf@suse.de>
 S: Supported
 F: hw/char/sclp*.[hc]
 F: hw/s390x/
-X: hw/s390x/s390-virtio-bus.[ch]
 F: include/hw/s390x/
 F: pc-bios/s390-ccw/
 F: hw/watchdog/wdt_diag288.c
@@ -723,6 +715,12 @@ F: hw/timer/hpet*
 F: hw/timer/i8254*
 F: hw/timer/mc146818rtc*

+Machine core
+M: Eduardo Habkost <ehabkost@redhat.com>
+M: Marcel Apfelbaum <marcel@redhat.com>
+S: Supported
+F: hw/core/machine.c
+F: include/hw/boards.h

 Xtensa Machines
 ---------------
@@ -857,6 +855,10 @@ M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
 F: hw/usb/*
 F: tests/usb-*-test.c
+F: docs/usb2.txt
+F: docs/usb-storage.txt
+F: include/hw/usb.h
+F: include/hw/usb/

 USB (serial adapter)
 M: Gerd Hoffmann <kraxel@redhat.com>
@@ -868,6 +870,7 @@ VFIO
 M: Alex Williamson <alex.williamson@redhat.com>
 S: Supported
 F: hw/vfio/*
+F: include/hw/vfio/

 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
@@ -879,6 +882,7 @@ M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
 F: hw/*/virtio*
 F: net/vhost-user.c
+F: include/hw/virtio/

 virtio-9p
 M: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
@@ -924,6 +928,7 @@ M: Amit Shah <amit.shah@redhat.com>
 S: Supported
 F: hw/virtio/virtio-rng.c
 F: include/hw/virtio/virtio-rng.h
+F: include/sysemu/rng*.h
 F: backends/rng*.c

 nvme
@@ -980,6 +985,7 @@ F: tests/intel-hda-test.c

 Block layer core
 M: Kevin Wolf <kwolf@redhat.com>
+M: Max Reitz <mreitz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: block*
@@ -993,6 +999,7 @@ T: git git://repo.or.cz/qemu/kevin.git block

 Block I/O path
 M: Stefan Hajnoczi <stefanha@redhat.com>
+M: Fam Zheng <famz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: async.c
@@ -1009,7 +1016,7 @@ F: blockjob.c
 F: include/block/blockjob.h
 F: block/backup.c
 F: block/commit.c
-F: block/stream.h
+F: block/stream.c
 F: block/mirror.c
 T: git git://github.com/codyprime/qemu-kvm-jtc.git block

@@ -1043,7 +1050,6 @@ M: Andreas Färber <afaerber@suse.de>
 S: Supported
 F: qom/cpu.c
 F: include/qom/cpu.h
-F: target-i386/cpu.c

 ICC Bus
 M: Igor Mammedov <imammedo@redhat.com>
@@ -1124,6 +1130,7 @@ Network device backends
 M: Jason Wang <jasowang@redhat.com>
 S: Maintained
 F: net/
+F: include/net/
 T: git git://github.com/jasowang/qemu.git net

 Netmap network backend
@@ -1148,8 +1155,6 @@ M: Eduardo Habkost <ehabkost@redhat.com>
 S: Maintained
 F: numa.c
 F: include/sysemu/numa.h
-K: numa|NUMA
-K: srat|SRAT
 T: git git://github.com/ehabkost/qemu.git numa

 QAPI
@@ -1219,10 +1224,12 @@ F: scripts/qmp/
 T: git git://repo.or.cz/qemu/armbru.git qapi-next

 SLIRP
+M: Samuel Thibault <samuel.thibault@ens-lyon.org>
 M: Jan Kiszka <jan.kiszka@siemens.com>
 S: Maintained
 F: slirp/
 F: net/slirp.c
+F: include/net/slirp.h
 T: git git://git.kiszka.org/qemu.git queues/slirp

 Tracing
@@ -1560,6 +1567,7 @@ F: block/win32-aio.c

 qcow2
 M: Kevin Wolf <kwolf@redhat.com>
+M: Max Reitz <mreitz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/qcow2*
@@ -1572,6 +1580,7 @@ F: block/qcow.c

 blkdebug
 M: Kevin Wolf <kwolf@redhat.com>
+M: Max Reitz <mreitz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/blkdebug.c
--- a/4
+++ b/4
@@ -238,7 +238,7 @@ qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-o
 qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
 qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a

-qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o
+qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o libqemuutil.a libqemustub.a

 fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a
 fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap
@@ -329,7 +329,7 @@ ifneq ($(EXESUF),)
 qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
 endif

-ivshmem-client$(EXESUF): $(ivshmem-client-obj-y)
+ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a
 	$(call LINK, $^)
 ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
 	$(call LINK, $^)
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -1,6 +1,6 @@
 #######################################################################
 # Common libraries for tools and emulators
-stub-obj-y = stubs/
+stub-obj-y = stubs/ crypto/
 util-obj-y = util/ qobject/ qapi/
 util-obj-y += qmp-introspect.o qapi-types.o qapi-visit.o qapi-event.o

--- a/2
+++ b/2
@@ -1 +1 @@
-2.5.50
+2.5.94
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -18,7 +18,7 @@
 #include "block/block.h"
 #include "qemu/queue.h"
 #include "qemu/sockets.h"
-#ifdef CONFIG_EPOLL
+#ifdef CONFIG_EPOLL_CREATE1
 #include <sys/epoll.h>
 #endif

@@ -33,7 +33,7 @@ struct AioHandler
    QLIST_ENTRY(AioHandler) node;
 };

-#ifdef CONFIG_EPOLL
+#ifdef CONFIG_EPOLL_CREATE1

 /* The fd number threashold to switch to epoll */
 #define EPOLL_ENABLE_THRESHOLD 64
@@ -282,10 +282,12 @@ bool aio_pending(AioContext *ctx)
        int revents;

        revents = node->pfd.revents & node->pfd.events;
-        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
+        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
+            aio_node_check(ctx, node->is_external)) {
            return true;
        }
-        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
+        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
+            aio_node_check(ctx, node->is_external)) {
            return true;
        }
    }
@@ -323,6 +325,7 @@ bool aio_dispatch(AioContext *ctx)

        if (!node->deleted &&
            (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
+            aio_node_check(ctx, node->is_external) &&
            node->io_read) {
            node->io_read(node->opaque);

@@ -333,6 +336,7 @@ bool aio_dispatch(AioContext *ctx)
        }
        if (!node->deleted &&
            (revents & (G_IO_OUT | G_IO_ERR)) &&
+            aio_node_check(ctx, node->is_external) &&
            node->io_write) {
            node->io_write(node->opaque);
            progress = true;
@@ -483,7 +487,7 @@ bool aio_poll(AioContext *ctx, bool blocking)

 void aio_context_setup(AioContext *ctx, Error **errp)
 {
-#ifdef CONFIG_EPOLL
+#ifdef CONFIG_EPOLL_CREATE1
    assert(!ctx->epollfd);
    ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
    if (ctx->epollfd == -1) {
--- a/arch_init.c
+++ b/arch_init.c
@@ -31,6 +31,7 @@
 #include "qemu/error-report.h"
 #include "qmp-commands.h"
 #include "hw/acpi/acpi.h"
+#include "qemu/help_option.h"

 #ifdef TARGET_SPARC
 int graphic_width = 1024;
--- a/async.c
+++ b/async.c
@@ -23,6 +23,7 @@
 */

 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/aio.h"
 #include "block/thread-pool.h"
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -27,6 +27,7 @@
 #include "monitor/monitor.h"
 #include "qemu/timer.h"
 #include "sysemu/sysemu.h"
+#include "qemu/cutils.h"

 #define AUDIO_CAP "audio"
 #include "audio_int.h"
@@ -1869,8 +1870,7 @@ static void audio_init (void)
        }
        conf.period.ticks = 1;
    } else {
-        conf.period.ticks =
-            muldiv64 (1, get_ticks_per_sec (), conf.period.hertz);
+        conf.period.ticks = NANOSECONDS_PER_SECOND / conf.period.hertz;
    }

    e = qemu_add_vm_change_state_handler (audio_vm_change_state_handler, s);
--- a/audio/noaudio.c
+++ b/audio/noaudio.c
@@ -49,8 +49,8 @@ static int no_run_out (HWVoiceOut *hw, int live)

    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
    ticks = now - no->old_ticks;
-    bytes = muldiv64 (ticks, hw->info.bytes_per_second, get_ticks_per_sec ());
-    bytes = audio_MIN (bytes, INT_MAX);
+    bytes = muldiv64(ticks, hw->info.bytes_per_second, NANOSECONDS_PER_SECOND);
+    bytes = audio_MIN(bytes, INT_MAX);
    samples = bytes >> hw->info.shift;

    no->old_ticks = now;
@@ -61,7 +61,7 @@ static int no_run_out (HWVoiceOut *hw, int live)

 static int no_write (SWVoiceOut *sw, void *buf, int len)
 {
-    return audio_pcm_sw_write (sw, buf, len);
+    return audio_pcm_sw_write(sw, buf, len);
 }

 static int no_init_out(HWVoiceOut *hw, struct audsettings *as, void *drv_opaque)
@@ -106,7 +106,7 @@ static int no_run_in (HWVoiceIn *hw)
        int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
        int64_t ticks = now - no->old_ticks;
        int64_t bytes =
-            muldiv64 (ticks, hw->info.bytes_per_second, get_ticks_per_sec ());
+            muldiv64(ticks, hw->info.bytes_per_second, NANOSECONDS_PER_SECOND);

        no->old_ticks = now;
        bytes = audio_MIN (bytes, INT_MAX);
--- a/audio/spiceaudio.c
+++ b/audio/spiceaudio.c
@@ -104,11 +104,11 @@ static int rate_get_samples (struct audio_pcm_info *info, SpiceRateCtl *rate)

    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
    ticks = now - rate->start_ticks;
-    bytes = muldiv64 (ticks, info->bytes_per_second, get_ticks_per_sec ());
+    bytes = muldiv64(ticks, info->bytes_per_second, NANOSECONDS_PER_SECOND);
    samples = (bytes - rate->bytes_sent) >> info->shift;
    if (samples < 0 || samples > 65536) {
        error_report("Resetting rate control (%" PRId64 " samples)", samples);
-        rate_start (rate);
+        rate_start(rate);
        samples = 0;
    }
    rate->bytes_sent += samples << info->shift;
--- a/audio/wavaudio.c
+++ b/audio/wavaudio.c
@@ -51,7 +51,7 @@ static int wav_run_out (HWVoiceOut *hw, int live)
    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
    int64_t ticks = now - wav->old_ticks;
    int64_t bytes =
-        muldiv64 (ticks, hw->info.bytes_per_second, get_ticks_per_sec ());
+        muldiv64(ticks, hw->info.bytes_per_second, NANOSECONDS_PER_SECOND);

    if (bytes > INT_MAX) {
        samples = INT_MAX >> hw->info.shift;
--- a/backends/baum.c
+++ b/backends/baum.c
@@ -22,6 +22,7 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "sysemu/char.h"
 #include "qemu/timer.h"
@@ -336,7 +337,7 @@ static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)

        /* Allow 100ms to complete the DisplayData packet */
        timer_mod(baum->cellCount_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                       get_ticks_per_sec() / 10);
+                       NANOSECONDS_PER_SECOND / 10);
        for (i = 0; i < baum->x * baum->y ; i++) {
            EAT(c);
            cells[i] = c;
@@ -567,7 +568,7 @@ static CharDriverState *chr_baum_init(const char *id,
                                      ChardevReturn *ret,
                                      Error **errp)
 {
-    ChardevCommon *common = qapi_ChardevDummy_base(backend->u.braille);
+    ChardevCommon *common = backend->u.braille.data;
    BaumDriverState *baum;
    CharDriverState *chr;
    brlapi_handle_t *handle;
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -10,6 +10,7 @@
 * See the COPYING file in the top-level directory.
 */
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "sysemu/hostmem.h"
 #include "sysemu/sysemu.h"
@@ -51,11 +52,14 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    error_setg(errp, "-mem-path not supported on this host");
 #else
    if (!memory_region_size(&backend->mr)) {
+        gchar *path;
        backend->force_prealloc = mem_prealloc;
+        path = object_get_canonical_path(OBJECT(backend));
        memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
-                                 object_get_canonical_path(OBJECT(backend)),
+                                 path,
                                 backend->size, fb->share,
                                 fb->mem_path, errp);
+        g_free(path);
    }
 #endif
 }
@@ -117,11 +121,19 @@ file_backend_instance_init(Object *o)
                            set_mem_path, NULL);
 }

+static void file_backend_instance_finalize(Object *o)
+{
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+
+    g_free(fb->mem_path);
+}
+
 static const TypeInfo file_backend_info = {
    .name = TYPE_MEMORY_BACKEND_FILE,
    .parent = TYPE_MEMORY_BACKEND,
    .class_init = file_backend_class_init,
    .instance_init = file_backend_instance_init,
+    .instance_finalize = file_backend_instance_finalize,
    .instance_size = sizeof(HostMemoryBackendFile),
 };

--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -11,6 +11,7 @@
 */
 #include "qemu/osdep.h"
 #include "sysemu/hostmem.h"
+#include "qapi/error.h"
 #include "qom/object_interfaces.h"

 #define TYPE_MEMORY_BACKEND_RAM "memory-backend-ram"
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -12,6 +12,7 @@
 #include "qemu/osdep.h"
 #include "sysemu/hostmem.h"
 #include "hw/boards.h"
+#include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "qapi-types.h"
 #include "qapi-visit.h"
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -68,7 +68,7 @@ static CharDriverState *qemu_chr_open_msmouse(const char *id,
                                              ChardevReturn *ret,
                                              Error **errp)
 {
-    ChardevCommon *common = qapi_ChardevDummy_base(backend->u.msmouse);
+    ChardevCommon *common = backend->u.msmouse.data;
    CharDriverState *chr;

    chr = qemu_chr_alloc(common, errp);
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -13,6 +13,7 @@
 #include "qemu/osdep.h"
 #include "sysemu/rng.h"
 #include "sysemu/char.h"
+#include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "hw/qdev.h" /* just for DEFINE_PROP_CHR */

@@ -25,33 +26,12 @@ typedef struct RngEgd

    CharDriverState *chr;
    char *chr_name;
-
-    GSList *requests;
 } RngEgd;

-typedef struct RngRequest
-{
-    EntropyReceiveFunc *receive_entropy;
-    uint8_t *data;
-    void *opaque;
-    size_t offset;
-    size_t size;
-} RngRequest;
-
-static void rng_egd_request_entropy(RngBackend *b, size_t size,
-                                    EntropyReceiveFunc *receive_entropy,
-                                    void *opaque)
+static void rng_egd_request_entropy(RngBackend *b, RngRequest *req)
 {
    RngEgd *s = RNG_EGD(b);
-    RngRequest *req;
-
-    req = g_malloc(sizeof(*req));
-
-    req->offset = 0;
-    req->size = size;
-    req->receive_entropy = receive_entropy;
-    req->opaque = opaque;
-    req->data = g_malloc(req->size);
+    size_t size = req->size;

    while (size > 0) {
        uint8_t header[2];
@@ -65,24 +45,15 @@ static void rng_egd_request_entropy(RngBackend *b, size_t size,

        size -= len;
    }
-
-    s->requests = g_slist_append(s->requests, req);
-}
-
-static void rng_egd_free_request(RngRequest *req)
-{
-    g_free(req->data);
-    g_free(req);
 }

 static int rng_egd_chr_can_read(void *opaque)
 {
    RngEgd *s = RNG_EGD(opaque);
-    GSList *i;
+    RngRequest *req;
    int size = 0;

-    for (i = s->requests; i; i = i->next) {
-        RngRequest *req = i->data;
+    QSIMPLEQ_FOREACH(req, &s->parent.requests, next) {
        size += req->size - req->offset;
    }

@@ -94,8 +65,8 @@ static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
    RngEgd *s = RNG_EGD(opaque);
    size_t buf_offset = 0;

-    while (size > 0 && s->requests) {
-        RngRequest *req = s->requests->data;
+    while (size > 0 && !QSIMPLEQ_EMPTY(&s->parent.requests)) {
+        RngRequest *req = QSIMPLEQ_FIRST(&s->parent.requests);
        int len = MIN(size, req->size - req->offset);

        memcpy(req->data + req->offset, buf + buf_offset, len);
@@ -104,38 +75,13 @@ static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
        size -= len;

        if (req->offset == req->size) {
-            s->requests = g_slist_remove_link(s->requests, s->requests);
-
            req->receive_entropy(req->opaque, req->data, req->size);

-            rng_egd_free_request(req);
+            rng_backend_finalize_request(&s->parent, req);
        }
    }
 }

-static void rng_egd_free_requests(RngEgd *s)
-{
-    GSList *i;
-
-    for (i = s->requests; i; i = i->next) {
-        rng_egd_free_request(i->data);
-    }
-
-    g_slist_free(s->requests);
-    s->requests = NULL;
-}
-
-static void rng_egd_cancel_requests(RngBackend *b)
-{
-    RngEgd *s = RNG_EGD(b);
-
-    /* We simply delete the list of pending requests.  If there is data in the 
-     * queue waiting to be read, this is okay, because there will always be
-     * more data than we requested originally
-     */
-    rng_egd_free_requests(s);
-}
-
 static void rng_egd_opened(RngBackend *b, Error **errp)
 {
    RngEgd *s = RNG_EGD(b);
@@ -204,8 +150,6 @@ static void rng_egd_finalize(Object *obj)
    }

    g_free(s->chr_name);
-
-    rng_egd_free_requests(s);
 }

 static void rng_egd_class_init(ObjectClass *klass, void *data)
@@ -213,7 +157,6 @@ static void rng_egd_class_init(ObjectClass *klass, void *data)
    RngBackendClass *rbc = RNG_BACKEND_CLASS(klass);

    rbc->request_entropy = rng_egd_request_entropy;
-    rbc->cancel_requests = rng_egd_cancel_requests;
    rbc->opened = rng_egd_opened;
 }

--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -13,6 +13,7 @@
 #include "qemu/osdep.h"
 #include "sysemu/rng-random.h"
 #include "sysemu/rng.h"
+#include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/main-loop.h"

@@ -22,10 +23,6 @@ struct RndRandom

    int fd;
    char *filename;
-
-    EntropyReceiveFunc *receive_func;
-    void *opaque;
-    size_t size;
 };

 /**
@@ -38,36 +35,35 @@ struct RndRandom
 static void entropy_available(void *opaque)
 {
    RndRandom *s = RNG_RANDOM(opaque);
-    uint8_t buffer[s->size];
-    ssize_t len;

-    len = read(s->fd, buffer, s->size);
-    if (len < 0 && errno == EAGAIN) {
-        return;
+    while (!QSIMPLEQ_EMPTY(&s->parent.requests)) {
+        RngRequest *req = QSIMPLEQ_FIRST(&s->parent.requests);
+        ssize_t len;
+
+        len = read(s->fd, req->data, req->size);
+        if (len < 0 && errno == EAGAIN) {
+            return;
+        }
+        g_assert(len != -1);
+
+        req->receive_entropy(req->opaque, req->data, len);
+
+        rng_backend_finalize_request(&s->parent, req);
    }
-    g_assert(len != -1);
-
-    s->receive_func(s->opaque, buffer, len);
-    s->receive_func = NULL;

+    /* We've drained all requests, the fd handler can be reset. */
    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
 }

-static void rng_random_request_entropy(RngBackend *b, size_t size,
-                                        EntropyReceiveFunc *receive_entropy,
-                                        void *opaque)
+static void rng_random_request_entropy(RngBackend *b, RngRequest *req)
 {
    RndRandom *s = RNG_RANDOM(b);

-    if (s->receive_func) {
-        s->receive_func(s->opaque, NULL, 0);
+    if (QSIMPLEQ_EMPTY(&s->parent.requests)) {
+        /* If there are no pending requests yet, we need to
+         * install our fd handler. */
+        qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
    }
-
-    s->receive_func = receive_entropy;
-    s->opaque = opaque;
-    s->size = size;
-
-    qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
 }

 static void rng_random_opened(RngBackend *b, Error **errp)
--- a/backends/rng.c
+++ b/backends/rng.c
@@ -12,6 +12,7 @@

 #include "qemu/osdep.h"
 #include "sysemu/rng.h"
+#include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qom/object_interfaces.h"

@@ -20,18 +21,20 @@ void rng_backend_request_entropy(RngBackend *s, size_t size,
                                 void *opaque)
 {
    RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
+    RngRequest *req;

    if (k->request_entropy) {
-        k->request_entropy(s, size, receive_entropy, opaque);
-    }
-}
+        req = g_malloc(sizeof(*req));

-void rng_backend_cancel_requests(RngBackend *s)
-{
-    RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
+        req->offset = 0;
+        req->size = size;
+        req->receive_entropy = receive_entropy;
+        req->opaque = opaque;
+        req->data = g_malloc(req->size);

-    if (k->cancel_requests) {
-        k->cancel_requests(s);
+        k->request_entropy(s, req);
+
+        QSIMPLEQ_INSERT_TAIL(&s->requests, req, next);
    }
 }

@@ -73,14 +76,48 @@ static void rng_backend_prop_set_opened(Object *obj, bool value, Error **errp)
    s->opened = true;
 }

+static void rng_backend_free_request(RngRequest *req)
+{
+    g_free(req->data);
+    g_free(req);
+}
+
+static void rng_backend_free_requests(RngBackend *s)
+{
+    RngRequest *req, *next;
+
+    QSIMPLEQ_FOREACH_SAFE(req, &s->requests, next, next) {
+        rng_backend_free_request(req);
+    }
+
+    QSIMPLEQ_INIT(&s->requests);
+}
+
+void rng_backend_finalize_request(RngBackend *s, RngRequest *req)
+{
+    QSIMPLEQ_REMOVE(&s->requests, req, RngRequest, next);
+    rng_backend_free_request(req);
+}
+
 static void rng_backend_init(Object *obj)
 {
+    RngBackend *s = RNG_BACKEND(obj);
+
+    QSIMPLEQ_INIT(&s->requests);
+
    object_property_add_bool(obj, "opened",
                             rng_backend_prop_get_opened,
                             rng_backend_prop_set_opened,
                             NULL);
 }

+static void rng_backend_finalize(Object *obj)
+{
+    RngBackend *s = RNG_BACKEND(obj);
+
+    rng_backend_free_requests(s);
+}
+
 static void rng_backend_class_init(ObjectClass *oc, void *data)
 {
    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
@@ -93,6 +130,7 @@ static const TypeInfo rng_backend_info = {
    .parent = TYPE_OBJECT,
    .instance_size = sizeof(RngBackend),
    .instance_init = rng_backend_init,
+    .instance_finalize = rng_backend_finalize,
    .class_size = sizeof(RngBackendClass),
    .class_init = rng_backend_class_init,
    .abstract = true,
--- a/backends/tpm.c
+++ b/backends/tpm.c
@@ -14,6 +14,7 @@

 #include "qemu/osdep.h"
 #include "sysemu/tpm_backend.h"
+#include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "sysemu/tpm.h"
 #include "qemu/thread.h"
--- a/block.c
+++ b/block.c
@@ -22,7 +22,6 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "trace.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
@@ -40,6 +39,8 @@
 #include "qemu/timer.h"
 #include "qapi-event.h"
 #include "block/throttle-groups.h"
+#include "qemu/cutils.h"
+#include "qemu/id.h"

 #ifdef CONFIG_BSD
 #include <sys/ioctl.h>
@@ -53,27 +54,8 @@
 #include <windows.h>
 #endif

-/**
- * A BdrvDirtyBitmap can be in three possible states:
- * (1) successor is NULL and disabled is false: full r/w mode
- * (2) successor is NULL and disabled is true: read only mode ("disabled")
- * (3) successor is set: frozen mode.
- *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
- *     or enabled. A frozen bitmap can only abdicate() or reclaim().
- */
-struct BdrvDirtyBitmap {
-    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
-    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
-    char *name;                 /* Optional non-empty unique ID */
-    int64_t size;               /* Size of the bitmap (Number of sectors) */
-    bool disabled;              /* Bitmap is read-only */
-    QLIST_ENTRY(BdrvDirtyBitmap) list;
-};
-
 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */

-struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
-
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
    QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);

@@ -88,9 +70,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
                             BlockDriverState *parent,
                             const BdrvChildRole *child_role, Error **errp);

-static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
-static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
-
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;

@@ -246,10 +225,7 @@ void bdrv_register(BlockDriver *bdrv)

 BlockDriverState *bdrv_new_root(void)
 {
-    BlockDriverState *bs = bdrv_new();
-
-    QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
-    return bs;
+    return bdrv_new();
 }

 BlockDriverState *bdrv_new(void)
@@ -313,6 +289,11 @@ static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
    return 0;
 }

+bool bdrv_uses_whitelist(void)
+{
+    return use_bdrv_whitelist;
+}
+
 typedef struct CreateCo {
    BlockDriver *drv;
    char *filename;
@@ -664,21 +645,23 @@ int bdrv_parse_discard_flags(const char *mode, int *flags)
 *
 * Return 0 on success, -1 if the cache mode was invalid.
 */
-int bdrv_parse_cache_flags(const char *mode, int *flags)
+int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
 {
    *flags &= ~BDRV_O_CACHE_MASK;

    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
-        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
+        *writethrough = false;
+        *flags |= BDRV_O_NOCACHE;
    } else if (!strcmp(mode, "directsync")) {
+        *writethrough = true;
        *flags |= BDRV_O_NOCACHE;
    } else if (!strcmp(mode, "writeback")) {
-        *flags |= BDRV_O_CACHE_WB;
+        *writethrough = false;
    } else if (!strcmp(mode, "unsafe")) {
-        *flags |= BDRV_O_CACHE_WB;
+        *writethrough = false;
        *flags |= BDRV_O_NO_FLUSH;
    } else if (!strcmp(mode, "writethrough")) {
-        /* this is the default */
+        *writethrough = true;
    } else {
        return -1;
    }
@@ -687,13 +670,18 @@ int bdrv_parse_cache_flags(const char *mode, int *flags)
 }

 /*
- * Returns the flags that a temporary snapshot should get, based on the
- * originally requested flags (the originally requested image will have flags
- * like a backing file)
+ * Returns the options and flags that a temporary snapshot should get, based on
+ * the originally requested flags (the originally requested image will have
+ * flags like a backing file)
 */
-static int bdrv_temp_snapshot_flags(int flags)
+static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
+                                       int parent_flags, QDict *parent_options)
 {
-    return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
+    *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
+
+    /* For temporary files, unconditional cache=unsafe is fine */
+    qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
+    qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
 }

 /*
@@ -716,11 +704,11 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
    /* Our block drivers take care to send flushes and respect unmap policy,
     * so we can default to enable both on lower layers regardless of the
     * corresponding parent options. */
-    qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
    flags |= BDRV_O_UNMAP;

    /* Clear flags that only apply to the top layer */
-    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
+    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
+               BDRV_O_NO_IO);

    *child_flags = flags;
 }
@@ -740,7 +728,7 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
    child_file.inherit_options(child_flags, child_options,
                               parent_flags, parent_options);

-    *child_flags &= ~BDRV_O_PROTOCOL;
+    *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO);
 }

 const BdrvChildRole child_format = {
@@ -756,8 +744,8 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
 {
    int flags = parent_flags;

-    /* The cache mode is inherited unmodified for backing files */
-    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_WB);
+    /* The cache mode is inherited unmodified for backing files; except WCE,
+     * which is only applied on the top level (BlockBackend) */
    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);

@@ -776,7 +764,7 @@ static const BdrvChildRole child_backing = {

 static int bdrv_open_flags(BlockDriverState *bs, int flags)
 {
-    int open_flags = flags | BDRV_O_CACHE_WB;
+    int open_flags = flags;

    /*
     * Clear flags that are internal to the block layer before opening the
@@ -798,11 +786,6 @@ static void update_flags_from_options(int *flags, QemuOpts *opts)
 {
    *flags &= ~BDRV_O_CACHE_MASK;

-    assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
-    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
-        *flags |= BDRV_O_CACHE_WB;
-    }
-
    assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
        *flags |= BDRV_O_NO_FLUSH;
@@ -816,10 +799,6 @@ static void update_flags_from_options(int *flags, QemuOpts *opts)

 static void update_options_from_flags(QDict *options, int flags)
 {
-    if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
-        qdict_put(options, BDRV_OPT_CACHE_WB,
-                  qbool_from_bool(flags & BDRV_O_CACHE_WB));
-    }
    if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
        qdict_put(options, BDRV_OPT_CACHE_DIRECT,
                  qbool_from_bool(flags & BDRV_O_NOCACHE));
@@ -881,11 +860,6 @@ static QemuOptsList bdrv_runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "Block driver to use for the node",
        },
-        {
-            .name = BDRV_OPT_CACHE_WB,
-            .type = QEMU_OPT_BOOL,
-            .help = "Enable writeback mode",
-        },
        {
            .name = BDRV_OPT_CACHE_DIRECT,
            .type = QEMU_OPT_BOOL,
@@ -992,7 +966,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,

    /* Apply cache mode options */
    update_flags_from_options(&bs->open_flags, opts);
-    bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);

    /* Open the image, either directly or using a protocol */
    open_flags = bdrv_open_flags(bs, bs->open_flags);
@@ -1022,13 +995,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
        goto free_and_fail;
    }

-    if (bs->encrypted) {
-        error_report("Encrypted images are deprecated");
-        error_printf("Support for them will be removed in a future release.\n"
-                     "You can use 'qemu-img convert' to convert your image"
-                     " to an unencrypted one.\n");
-    }
-
    ret = refresh_total_sectors(bs, bs->total_sectors);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not refresh total sector count");
@@ -1194,10 +1160,9 @@ static int bdrv_fill_options(QDict **options, const char *filename,
    return 0;
 }

-static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
-                                    BlockDriverState *child_bs,
-                                    const char *child_name,
-                                    const BdrvChildRole *child_role)
+BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
+                                  const char *child_name,
+                                  const BdrvChildRole *child_role)
 {
    BdrvChild *child = g_new(BdrvChild, 1);
    *child = (BdrvChild) {
@@ -1206,24 +1171,43 @@ static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
        .role   = child_role,
    };

-    QLIST_INSERT_HEAD(&parent_bs->children, child, next);
    QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);

    return child;
 }

+static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
+                                    BlockDriverState *child_bs,
+                                    const char *child_name,
+                                    const BdrvChildRole *child_role)
+{
+    BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
+    QLIST_INSERT_HEAD(&parent_bs->children, child, next);
+    return child;
+}
+
 static void bdrv_detach_child(BdrvChild *child)
 {
-    QLIST_REMOVE(child, next);
+    if (child->next.le_prev) {
+        QLIST_REMOVE(child, next);
+        child->next.le_prev = NULL;
+    }
    QLIST_REMOVE(child, next_parent);
    g_free(child->name);
    g_free(child);
 }

-void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
+void bdrv_root_unref_child(BdrvChild *child)
 {
    BlockDriverState *child_bs;

+    child_bs = child->bs;
+    bdrv_detach_child(child);
+    bdrv_unref(child_bs);
+}
+
+void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
+{
    if (child == NULL) {
        return;
    }
@@ -1232,9 +1216,7 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
        child->bs->inherits_from = NULL;
    }

-    child_bs = child->bs;
-    bdrv_detach_child(child);
-    bdrv_unref(child_bs);
+    bdrv_root_unref_child(child);
 }

 /*
@@ -1424,13 +1406,13 @@ done:
    return c;
 }

-int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
+static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
+                                     QDict *snapshot_options, Error **errp)
 {
    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
    char *tmp_filename = g_malloc0(PATH_MAX + 1);
    int64_t total_size;
    QemuOpts *opts = NULL;
-    QDict *snapshot_options;
    BlockDriverState *bs_snapshot;
    Error *local_err = NULL;
    int ret;
@@ -1464,8 +1446,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
        goto out;
    }

-    /* Prepare a new options QDict for the temporary file */
-    snapshot_options = qdict_new();
+    /* Prepare options QDict for the temporary file */
    qdict_put(snapshot_options, "file.driver",
              qstring_from_str("file"));
    qdict_put(snapshot_options, "file.filename",
@@ -1477,6 +1458,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)

    ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
                    flags, &local_err);
+    snapshot_options = NULL;
    if (ret < 0) {
        error_propagate(errp, local_err);
        goto out;
@@ -1485,6 +1467,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
    bdrv_append(bs_snapshot, bs);

 out:
+    QDECREF(snapshot_options);
    g_free(tmp_filename);
    return ret;
 }
@@ -1516,6 +1499,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
    const char *drvname;
    const char *backing;
    Error *local_err = NULL;
+    QDict *snapshot_options = NULL;
    int snapshot_flags = 0;

    assert(pbs);
@@ -1542,6 +1526,13 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
        if (!bs) {
            return -ENODEV;
        }
+
+        if (bs->throttle_state) {
+            error_setg(errp, "Cannot reference an existing block device for "
+                       "which I/O throttling is enabled");
+            return -EINVAL;
+        }
+
        bdrv_ref(bs);
        *pbs = bs;
        return 0;
@@ -1607,7 +1598,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
            flags |= BDRV_O_ALLOW_RDWR;
        }
        if (flags & BDRV_O_SNAPSHOT) {
-            snapshot_flags = bdrv_temp_snapshot_flags(flags);
+            snapshot_options = qdict_new();
+            bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
+                                       flags, options);
            bdrv_backing_options(&flags, options, flags, options);
        }

@@ -1681,9 +1674,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
            error_setg(errp, "Block protocol '%s' doesn't support the option "
                       "'%s'", drv->format_name, entry->key);
        } else {
-            error_setg(errp, "Block format '%s' used by device '%s' doesn't "
-                       "support the option '%s'", drv->format_name,
-                       bdrv_get_device_name(bs), entry->key);
+            error_setg(errp,
+                       "Block format '%s' does not support the option '%s'",
+                       drv->format_name, entry->key);
        }

        ret = -EINVAL;
@@ -1709,7 +1702,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
    /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
     * temporary snapshot afterwards. */
    if (snapshot_flags) {
-        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
+        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
+                                        &local_err);
+        snapshot_options = NULL;
        if (local_err) {
            goto close_and_fail;
        }
@@ -1721,6 +1716,7 @@ fail:
    if (file != NULL) {
        bdrv_unref_child(bs, file);
    }
+    QDECREF(snapshot_options);
    QDECREF(bs->explicit_options);
    QDECREF(bs->options);
    QDECREF(options);
@@ -1743,6 +1739,7 @@ close_and_fail:
    } else {
        bdrv_unref(bs);
    }
+    QDECREF(snapshot_options);
    QDECREF(options);
    if (local_err) {
        error_propagate(errp, local_err);
@@ -1998,17 +1995,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,

    update_flags_from_options(&reopen_state->flags, opts);

-    /* If a guest device is attached, it owns WCE */
-    if (reopen_state->bs->blk && blk_get_attached_dev(reopen_state->bs->blk)) {
-        bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
-        bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
-        if (old_wce != new_wce) {
-            error_setg(errp, "Cannot change cache.writeback: Device attached");
-            ret = -EINVAL;
-            goto error;
-        }
-    }
-
    /* node-name and driver must be unchanged. Put them back into the QDict, so
     * that they are checked at the end of this function. */
    value = qemu_opt_get(opts, "node-name");
@@ -2108,8 +2094,6 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)

    reopen_state->bs->explicit_options   = reopen_state->explicit_options;
    reopen_state->bs->open_flags         = reopen_state->flags;
-    reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
-                                              BDRV_O_CACHE_WB);
    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);

    bdrv_refresh_limits(reopen_state->bs, NULL);
@@ -2236,45 +2220,11 @@ void bdrv_close_all(void)
    }
 }

-/* Note that bs->device_list.tqe_prev is initially null,
- * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
- * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
- * resetting it to null on remove.  */
-void bdrv_device_remove(BlockDriverState *bs)
-{
-    QTAILQ_REMOVE(&bdrv_states, bs, device_list);
-    bs->device_list.tqe_prev = NULL;
-}
-
-/* make a BlockDriverState anonymous by removing from bdrv_state and
- * graph_bdrv_state list.
-   Also, NULL terminate the device_name to prevent double remove */
-void bdrv_make_anon(BlockDriverState *bs)
-{
-    /* Take care to remove bs from bdrv_states only when it's actually
-     * in it. */
-    if (bs->device_list.tqe_prev) {
-        bdrv_device_remove(bs);
-    }
-    if (bs->node_name[0] != '\0') {
-        QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
-    }
-    bs->node_name[0] = '\0';
-}
-
 /* Fields that need to stay with the top-level BDS */
 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
                                     BlockDriverState *bs_src)
 {
    /* move some fields that need to stay attached to the device */
-
-    /* dev info */
-    bs_dest->copy_on_read       = bs_src->copy_on_read;
-
-    bs_dest->enable_write_cache = bs_src->enable_write_cache;
-
-    /* dirty bitmap */
-    bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
 }

 static void change_parent_backing_link(BlockDriverState *from,
@@ -2282,6 +2232,14 @@ static void change_parent_backing_link(BlockDriverState *from,
 {
    BdrvChild *c, *next;

+    if (from->blk) {
+        /* FIXME We bypass blk_set_bs(), so we need to make these updates
+         * manually. The root problem is not in this change function, but the
+         * existence of BlockDriverState.blk. */
+        to->blk = from->blk;
+        from->blk = NULL;
+    }
+
    QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
        assert(c->role != &child_backing);
        c->bs = to;
@@ -2290,13 +2248,6 @@ static void change_parent_backing_link(BlockDriverState *from,
        bdrv_ref(to);
        bdrv_unref(from);
    }
-    if (from->blk) {
-        blk_set_bs(from->blk, to);
-        if (!to->device_list.tqe_prev) {
-            QTAILQ_INSERT_BEFORE(from, to, device_list);
-        }
-        bdrv_device_remove(from);
-    }
 }

 static void swap_feature_fields(BlockDriverState *bs_top,
@@ -2390,8 +2341,9 @@ static void bdrv_delete(BlockDriverState *bs)
    bdrv_close(bs);

    /* remove from list, if necessary */
-    bdrv_make_anon(bs);
-
+    if (bs->node_name[0] != '\0') {
+        QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
+    }
    QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);

    g_free(bs);
@@ -2527,26 +2479,6 @@ ro_cleanup:
    return ret;
 }

-int bdrv_commit_all(void)
-{
-    BlockDriverState *bs;
-
-    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
-        aio_context_acquire(aio_context);
-        if (bs->drv && bs->backing) {
-            int ret = bdrv_commit(bs);
-            if (ret < 0) {
-                aio_context_release(aio_context);
-                return ret;
-            }
-        }
-        aio_context_release(aio_context);
-    }
-    return 0;
-}
-
 /*
 * Return values:
 * 0        - success
@@ -2770,23 +2702,6 @@ int bdrv_is_sg(BlockDriverState *bs)
    return bs->sg;
 }

-int bdrv_enable_write_cache(BlockDriverState *bs)
-{
-    return bs->enable_write_cache;
-}
-
-void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
-{
-    bs->enable_write_cache = wce;
-
-    /* so a reopen() will preserve wce */
-    if (wce) {
-        bs->open_flags |= BDRV_O_CACHE_WB;
-    } else {
-        bs->open_flags &= ~BDRV_O_CACHE_WB;
-    }
-}
-
 int bdrv_is_encrypted(BlockDriverState *bs)
 {
    if (bs->backing && bs->backing->bs->encrypted) {
@@ -2928,7 +2843,7 @@ BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)

    list = NULL;
    QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
-        BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
+        BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, errp);
        if (!info) {
            qapi_free_BlockDeviceInfoList(list);
            return NULL;
@@ -2995,12 +2910,23 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs)
    return QTAILQ_NEXT(bs, node_list);
 }

+/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
+ * the monitor or attached to a BlockBackend */
 BlockDriverState *bdrv_next(BlockDriverState *bs)
 {
-    if (!bs) {
-        return QTAILQ_FIRST(&bdrv_states);
+    if (!bs || bs->blk) {
+        bs = blk_next_root_bs(bs);
+        if (bs) {
+            return bs;
+        }
    }
-    return QTAILQ_NEXT(bs, device_list);
+
+    /* Ignore all BDSs that are attached to a BlockBackend here; they have been
+     * handled by the above block already */
+    do {
+        bs = bdrv_next_monitor_owned(bs);
+    } while (bs && bs->blk);
+    return bs;
 }

 const char *bdrv_get_node_name(const BlockDriverState *bs)
@@ -3308,10 +3234,10 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)

 void bdrv_invalidate_cache_all(Error **errp)
 {
-    BlockDriverState *bs;
+    BlockDriverState *bs = NULL;
    Error *local_err = NULL;

-    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+    while ((bs = bdrv_next(bs)) != NULL) {
        AioContext *aio_context = bdrv_get_aio_context(bs);

        aio_context_acquire(aio_context);
@@ -3341,10 +3267,10 @@ static int bdrv_inactivate(BlockDriverState *bs)

 int bdrv_inactivate_all(void)
 {
-    BlockDriverState *bs;
+    BlockDriverState *bs = NULL;
    int ret;

-    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+    while ((bs = bdrv_next(bs)) != NULL) {
        AioContext *aio_context = bdrv_get_aio_context(bs);

        aio_context_acquire(aio_context);
@@ -3431,346 +3357,6 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked)
    }
 }

-BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
-{
-    BdrvDirtyBitmap *bm;
-
-    assert(name);
-    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
-        if (bm->name && !strcmp(name, bm->name)) {
-            return bm;
-        }
-    }
-    return NULL;
-}
-
-void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    g_free(bitmap->name);
-    bitmap->name = NULL;
-}
-
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
-                                          uint32_t granularity,
-                                          const char *name,
-                                          Error **errp)
-{
-    int64_t bitmap_size;
-    BdrvDirtyBitmap *bitmap;
-    uint32_t sector_granularity;
-
-    assert((granularity & (granularity - 1)) == 0);
-
-    if (name && bdrv_find_dirty_bitmap(bs, name)) {
-        error_setg(errp, "Bitmap already exists: %s", name);
-        return NULL;
-    }
-    sector_granularity = granularity >> BDRV_SECTOR_BITS;
-    assert(sector_granularity);
-    bitmap_size = bdrv_nb_sectors(bs);
-    if (bitmap_size < 0) {
-        error_setg_errno(errp, -bitmap_size, "could not get length of device");
-        errno = -bitmap_size;
-        return NULL;
-    }
-    bitmap = g_new0(BdrvDirtyBitmap, 1);
-    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
-    bitmap->size = bitmap_size;
-    bitmap->name = g_strdup(name);
-    bitmap->disabled = false;
-    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
-    return bitmap;
-}
-
-bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
-{
-    return bitmap->successor;
-}
-
-bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
-{
-    return !(bitmap->disabled || bitmap->successor);
-}
-
-DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
-{
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
-        return DIRTY_BITMAP_STATUS_FROZEN;
-    } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
-        return DIRTY_BITMAP_STATUS_DISABLED;
-    } else {
-        return DIRTY_BITMAP_STATUS_ACTIVE;
-    }
-}
-
-/**
- * Create a successor bitmap destined to replace this bitmap after an operation.
- * Requires that the bitmap is not frozen and has no successor.
- */
-int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
-                                       BdrvDirtyBitmap *bitmap, Error **errp)
-{
-    uint64_t granularity;
-    BdrvDirtyBitmap *child;
-
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
-        error_setg(errp, "Cannot create a successor for a bitmap that is "
-                   "currently frozen");
-        return -1;
-    }
-    assert(!bitmap->successor);
-
-    /* Create an anonymous successor */
-    granularity = bdrv_dirty_bitmap_granularity(bitmap);
-    child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
-    if (!child) {
-        return -1;
-    }
-
-    /* Successor will be on or off based on our current state. */
-    child->disabled = bitmap->disabled;
-
-    /* Install the successor and freeze the parent */
-    bitmap->successor = child;
-    return 0;
-}
-
-/**
- * For a bitmap with a successor, yield our name to the successor,
- * delete the old bitmap, and return a handle to the new bitmap.
- */
-BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
-                                            BdrvDirtyBitmap *bitmap,
-                                            Error **errp)
-{
-    char *name;
-    BdrvDirtyBitmap *successor = bitmap->successor;
-
-    if (successor == NULL) {
-        error_setg(errp, "Cannot relinquish control if "
-                   "there's no successor present");
-        return NULL;
-    }
-
-    name = bitmap->name;
-    bitmap->name = NULL;
-    successor->name = name;
-    bitmap->successor = NULL;
-    bdrv_release_dirty_bitmap(bs, bitmap);
-
-    return successor;
-}
-
-/**
- * In cases of failure where we can no longer safely delete the parent,
- * we may wish to re-join the parent and child/successor.
- * The merged parent will be un-frozen, but not explicitly re-enabled.
- */
-BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
-                                           BdrvDirtyBitmap *parent,
-                                           Error **errp)
-{
-    BdrvDirtyBitmap *successor = parent->successor;
-
-    if (!successor) {
-        error_setg(errp, "Cannot reclaim a successor when none is present");
-        return NULL;
-    }
-
-    if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
-        error_setg(errp, "Merging of parent and successor bitmap failed");
-        return NULL;
-    }
-    bdrv_release_dirty_bitmap(bs, successor);
-    parent->successor = NULL;
-
-    return parent;
-}
-
-/**
- * Truncates _all_ bitmaps attached to a BDS.
- */
-static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
-{
-    BdrvDirtyBitmap *bitmap;
-    uint64_t size = bdrv_nb_sectors(bs);
-
-    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
-        assert(!bdrv_dirty_bitmap_frozen(bitmap));
-        hbitmap_truncate(bitmap->bitmap, size);
-        bitmap->size = size;
-    }
-}
-
-static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
-                                                  BdrvDirtyBitmap *bitmap,
-                                                  bool only_named)
-{
-    BdrvDirtyBitmap *bm, *next;
-    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
-        if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
-            assert(!bdrv_dirty_bitmap_frozen(bm));
-            QLIST_REMOVE(bm, list);
-            hbitmap_free(bm->bitmap);
-            g_free(bm->name);
-            g_free(bm);
-
-            if (bitmap) {
-                return;
-            }
-        }
-    }
-}
-
-void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
-{
-    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
-}
-
-/**
- * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
- * There must not be any frozen bitmaps attached.
- */
-static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
-{
-    bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
-}
-
-void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    bitmap->disabled = true;
-}
-
-void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    bitmap->disabled = false;
-}
-
-BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
-{
-    BdrvDirtyBitmap *bm;
-    BlockDirtyInfoList *list = NULL;
-    BlockDirtyInfoList **plist = &list;
-
-    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
-        BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
-        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
-        info->count = bdrv_get_dirty_count(bm);
-        info->granularity = bdrv_dirty_bitmap_granularity(bm);
-        info->has_name = !!bm->name;
-        info->name = g_strdup(bm->name);
-        info->status = bdrv_dirty_bitmap_status(bm);
-        entry->value = info;
-        *plist = entry;
-        plist = &entry->next;
-    }
-
-    return list;
-}
-
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
-{
-    if (bitmap) {
-        return hbitmap_get(bitmap->bitmap, sector);
-    } else {
-        return 0;
-    }
-}
-
-/**
- * Chooses a default granularity based on the existing cluster size,
- * but clamped between [4K, 64K]. Defaults to 64K in the case that there
- * is no cluster size information available.
- */
-uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
-{
-    BlockDriverInfo bdi;
-    uint32_t granularity;
-
-    if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
-        granularity = MAX(4096, bdi.cluster_size);
-        granularity = MIN(65536, granularity);
-    } else {
-        granularity = 65536;
-    }
-
-    return granularity;
-}
-
-uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
-{
-    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
-}
-
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
-{
-    hbitmap_iter_init(hbi, bitmap->bitmap, 0);
-}
-
-void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                           int64_t cur_sector, int nr_sectors)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
-}
-
-void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                             int64_t cur_sector, int nr_sectors)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
-}
-
-void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    if (!out) {
-        hbitmap_reset_all(bitmap->bitmap);
-    } else {
-        HBitmap *backup = bitmap->bitmap;
-        bitmap->bitmap = hbitmap_alloc(bitmap->size,
-                                       hbitmap_granularity(backup));
-        *out = backup;
-    }
-}
-
-void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
-{
-    HBitmap *tmp = bitmap->bitmap;
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    bitmap->bitmap = in;
-    hbitmap_free(tmp);
-}
-
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
-                    int nr_sectors)
-{
-    BdrvDirtyBitmap *bitmap;
-    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
-        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
-            continue;
-        }
-        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
-    }
-}
-
-/**
- * Advance an HBitmapIter to an arbitrary offset.
- */
-void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
-{
-    assert(hbi->hb);
-    hbitmap_iter_init(hbi, hbi->hb, offset);
-}
-
-int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
-{
-    return hbitmap_count(bitmap->bitmap);
-}
-
 /* Get a reference to bs */
 void bdrv_ref(BlockDriverState *bs)
 {
@@ -3966,8 +3552,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
            }

            /* backing files always opened read-only */
-            back_flags =
-                flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+            back_flags = flags;
+            back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);

            if (backing_fmt) {
                backing_options = qdict_new();
@@ -4190,10 +3776,10 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
 */
 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
 {
-    BlockDriverState *bs;
+    BlockDriverState *bs = NULL;

    /* walk down the bs forest recursively */
-    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+    while ((bs = bdrv_next(bs)) != NULL) {
        bool perm;

        /* try to recurse in this top level bs */
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -4,7 +4,7 @@ block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += quorum.o
-block-obj-y += parallels.o blkdebug.o blkverify.o
+block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
 block-obj-y += block-backend.o snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
@@ -20,9 +20,11 @@ block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
-block-obj-y += accounting.o
+block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o

+block-obj-y += crypto.o
+
 common-obj-y += stream.o
 common-obj-y += commit.o
 common-obj-y += backup.o
--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -51,7 +51,7 @@
 */

 #include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qemu/cutils.h"
 #include "block/block_int.h"
 #include "qemu/error-report.h"
 #include "qemu/thread.h"
--- a/block/backup.c
+++ b/block/backup.c
@@ -17,14 +17,14 @@
 #include "block/block.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "qemu/cutils.h"
 #include "sysemu/block-backend.h"
+#include "qemu/bitmap.h"

-#define BACKUP_CLUSTER_BITS 16
-#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
-#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
-
+#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
 #define SLICE_TIME 100000000ULL /* ns */

 typedef struct CowRequest {
@@ -45,10 +45,17 @@ typedef struct BackupBlockJob {
    BlockdevOnError on_target_error;
    CoRwlock flush_rwlock;
    uint64_t sectors_read;
-    HBitmap *bitmap;
+    unsigned long *done_bitmap;
+    int64_t cluster_size;
    QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;

+/* Size of a cluster in sectors, instead of bytes. */
+static inline int64_t cluster_size_sectors(BackupBlockJob *job)
+{
+  return job->cluster_size / BDRV_SECTOR_SIZE;
+}
+
 /* See if in-flight requests overlap and wait for them to complete */
 static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
                                                       int64_t start,
@@ -97,13 +104,14 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
    QEMUIOVector bounce_qiov;
    void *bounce_buffer = NULL;
    int ret = 0;
+    int64_t sectors_per_cluster = cluster_size_sectors(job);
    int64_t start, end;
    int n;

    qemu_co_rwlock_rdlock(&job->flush_rwlock);

-    start = sector_num / BACKUP_SECTORS_PER_CLUSTER;
-    end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER);
+    start = sector_num / sectors_per_cluster;
+    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);

    trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);

@@ -111,19 +119,19 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
    cow_request_begin(&cow_request, job, start, end);

    for (; start < end; start++) {
-        if (hbitmap_get(job->bitmap, start)) {
+        if (test_bit(start, job->done_bitmap)) {
            trace_backup_do_cow_skip(job, start);
            continue; /* already copied */
        }

        trace_backup_do_cow_process(job, start);

-        n = MIN(BACKUP_SECTORS_PER_CLUSTER,
+        n = MIN(sectors_per_cluster,
                job->common.len / BDRV_SECTOR_SIZE -
-                start * BACKUP_SECTORS_PER_CLUSTER);
+                start * sectors_per_cluster);

        if (!bounce_buffer) {
-            bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE);
+            bounce_buffer = qemu_blockalign(bs, job->cluster_size);
        }
        iov.iov_base = bounce_buffer;
        iov.iov_len = n * BDRV_SECTOR_SIZE;
@@ -131,10 +139,10 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,

        if (is_write_notifier) {
            ret = bdrv_co_readv_no_serialising(bs,
-                                           start * BACKUP_SECTORS_PER_CLUSTER,
+                                           start * sectors_per_cluster,
                                           n, &bounce_qiov);
        } else {
-            ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
+            ret = bdrv_co_readv(bs, start * sectors_per_cluster, n,
                                &bounce_qiov);
        }
        if (ret < 0) {
@@ -147,11 +155,11 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,

        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
            ret = bdrv_co_write_zeroes(job->target,
-                                       start * BACKUP_SECTORS_PER_CLUSTER,
+                                       start * sectors_per_cluster,
                                       n, BDRV_REQ_MAY_UNMAP);
        } else {
            ret = bdrv_co_writev(job->target,
-                                 start * BACKUP_SECTORS_PER_CLUSTER, n,
+                                 start * sectors_per_cluster, n,
                                 &bounce_qiov);
        }
        if (ret < 0) {
@@ -162,7 +170,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
            goto out;
        }

-        hbitmap_set(job->bitmap, start, 1);
+        set_bit(start, job->done_bitmap);

        /* Publish progress, guest I/O counts as progress too.  Note that the
         * offset field is an opaque progress value, it is not a disk offset.
@@ -322,21 +330,22 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
    int64_t cluster;
    int64_t end;
    int64_t last_cluster = -1;
+    int64_t sectors_per_cluster = cluster_size_sectors(job);
    BlockDriverState *bs = job->common.bs;
    HBitmapIter hbi;

    granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
-    clusters_per_iter = MAX((granularity / BACKUP_CLUSTER_SIZE), 1);
+    clusters_per_iter = MAX((granularity / job->cluster_size), 1);
    bdrv_dirty_iter_init(job->sync_bitmap, &hbi);

    /* Find the next dirty sector(s) */
    while ((sector = hbitmap_iter_next(&hbi)) != -1) {
-        cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
+        cluster = sector / sectors_per_cluster;

        /* Fake progress updates for any clusters we skipped */
        if (cluster != last_cluster + 1) {
            job->common.offset += ((cluster - last_cluster - 1) *
-                                   BACKUP_CLUSTER_SIZE);
+                                   job->cluster_size);
        }

        for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
@@ -344,8 +353,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
                if (yield_and_check(job)) {
                    return ret;
                }
-                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
-                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read,
+                ret = backup_do_cow(bs, cluster * sectors_per_cluster,
+                                    sectors_per_cluster, &error_is_read,
                                    false);
                if ((ret < 0) &&
                    backup_error_action(job, error_is_read, -ret) ==
@@ -357,17 +366,17 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)

        /* If the bitmap granularity is smaller than the backup granularity,
         * we need to advance the iterator pointer to the next cluster. */
-        if (granularity < BACKUP_CLUSTER_SIZE) {
-            bdrv_set_dirty_iter(&hbi, cluster * BACKUP_SECTORS_PER_CLUSTER);
+        if (granularity < job->cluster_size) {
+            bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster);
        }

        last_cluster = cluster - 1;
    }

    /* Play some final catchup with the progress meter */
-    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
+    end = DIV_ROUND_UP(job->common.len, job->cluster_size);
    if (last_cluster + 1 < end) {
-        job->common.offset += ((end - last_cluster - 1) * BACKUP_CLUSTER_SIZE);
+        job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
    }

    return ret;
@@ -384,17 +393,17 @@ static void coroutine_fn backup_run(void *opaque)
        .notify = backup_before_write_notify,
    };
    int64_t start, end;
+    int64_t sectors_per_cluster = cluster_size_sectors(job);
    int ret = 0;

    QLIST_INIT(&job->inflight_reqs);
    qemu_co_rwlock_init(&job->flush_rwlock);

    start = 0;
-    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
+    end = DIV_ROUND_UP(job->common.len, job->cluster_size);

-    job->bitmap = hbitmap_alloc(end, 0);
+    job->done_bitmap = bitmap_new(end);

-    bdrv_set_enable_write_cache(target, true);
    if (target->blk) {
        blk_set_on_error(target->blk, on_target_error, on_target_error);
        blk_iostatus_enable(target->blk);
@@ -427,7 +436,7 @@ static void coroutine_fn backup_run(void *opaque)
                /* Check to see if these blocks are already in the
                 * backing file. */

-                for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
+                for (i = 0; i < sectors_per_cluster;) {
                    /* bdrv_is_allocated() only returns true/false based
                     * on the first set of sectors it comes across that
                     * are are all in the same state.
@@ -436,8 +445,8 @@ static void coroutine_fn backup_run(void *opaque)
                     * needed but at some point that is always the case. */
                    alloced =
                        bdrv_is_allocated(bs,
-                                start * BACKUP_SECTORS_PER_CLUSTER + i,
-                                BACKUP_SECTORS_PER_CLUSTER - i, &n);
+                                start * sectors_per_cluster + i,
+                                sectors_per_cluster - i, &n);
                    i += n;

                    if (alloced == 1 || n == 0) {
@@ -452,8 +461,8 @@ static void coroutine_fn backup_run(void *opaque)
                }
            }
            /* FULL sync mode we copy the whole drive. */
-            ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
-                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read, false);
+            ret = backup_do_cow(bs, start * sectors_per_cluster,
+                                sectors_per_cluster, &error_is_read, false);
            if (ret < 0) {
                /* Depending on error action, fail now or retry cluster */
                BlockErrorAction action =
@@ -473,7 +482,7 @@ static void coroutine_fn backup_run(void *opaque)
    /* wait until pending backup_do_cow() calls have completed */
    qemu_co_rwlock_wrlock(&job->flush_rwlock);
    qemu_co_rwlock_unlock(&job->flush_rwlock);
-    hbitmap_free(job->bitmap);
+    g_free(job->done_bitmap);

    if (target->blk) {
        blk_iostatus_disable(target->blk);
@@ -494,6 +503,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
                  BlockJobTxn *txn, Error **errp)
 {
    int64_t len;
+    BlockDriverInfo bdi;
+    int ret;

    assert(bs);
    assert(target);
@@ -563,14 +574,32 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
        goto error;
    }

-    bdrv_op_block_all(target, job->common.blocker);
-
    job->on_source_error = on_source_error;
    job->on_target_error = on_target_error;
    job->target = target;
    job->sync_mode = sync_mode;
    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
                       sync_bitmap : NULL;
+
+    /* If there is no backing file on the target, we cannot rely on COW if our
+     * backup cluster size is smaller than the target cluster size. Even for
+     * targets with a backing file, try to avoid COW if possible. */
+    ret = bdrv_get_info(job->target, &bdi);
+    if (ret < 0 && !target->backing) {
+        error_setg_errno(errp, -ret,
+            "Couldn't determine the cluster size of the target image, "
+            "which has no backing file");
+        error_append_hint(errp,
+            "Aborting, since this may create an unusable destination image\n");
+        goto error;
+    } else if (ret < 0 && target->backing) {
+        /* Not fatal; just trudge on ahead. */
+        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
+    } else {
+        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+    }
+
+    bdrv_op_block_all(target, job->common.blocker);
    job->common.len = len;
    job->common.co = qemu_coroutine_create(backup_run);
    block_job_txn_add_job(txn, &job->common);
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -23,7 +23,8 @@
 */

 #include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
 #include "qemu/config-file.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -0,0 +1,160 @@
+/*
+ * Block protocol for record/replay
+ *
+ * Copyright (c) 2010-2016 Institute for System Programming
+ *                         of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "sysemu/replay.h"
+#include "qapi/error.h"
+
+typedef struct Request {
+    Coroutine *co;
+    QEMUBH *bh;
+} Request;
+
+/* Next request id.
+   This counter is global, because requests from different
+   block devices should not get overlapping ids. */
+static uint64_t request_id;
+
+static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
+                          Error **errp)
+{
+    Error *local_err = NULL;
+    int ret;
+
+    /* Open the image file */
+    bs->file = bdrv_open_child(NULL, options, "image",
+                               bs, &child_file, false, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
+        error_propagate(errp, local_err);
+        goto fail;
+    }
+
+    ret = 0;
+fail:
+    if (ret < 0) {
+        bdrv_unref_child(bs, bs->file);
+    }
+    return ret;
+}
+
+static void blkreplay_close(BlockDriverState *bs)
+{
+}
+
+static int64_t blkreplay_getlength(BlockDriverState *bs)
+{
+    return bdrv_getlength(bs->file->bs);
+}
+
+/* This bh is used for synchronization of return from coroutines.
+   It continues yielded coroutine which then finishes its execution.
+   BH is called adjusted to some replay checkpoint, therefore
+   record and replay will always finish coroutines deterministically.
+*/
+static void blkreplay_bh_cb(void *opaque)
+{
+    Request *req = opaque;
+    qemu_coroutine_enter(req->co, NULL);
+    qemu_bh_delete(req->bh);
+    g_free(req);
+}
+
+static void block_request_create(uint64_t reqid, BlockDriverState *bs,
+                                 Coroutine *co)
+{
+    Request *req = g_new(Request, 1);
+    *req = (Request) {
+        .co = co,
+        .bh = aio_bh_new(bdrv_get_aio_context(bs), blkreplay_bh_cb, req),
+    };
+    replay_block_event(req->bh, reqid);
+}
+
+static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+    uint64_t reqid = request_id++;
+    int ret = bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
+    block_request_create(reqid, bs, qemu_coroutine_self());
+    qemu_coroutine_yield();
+
+    return ret;
+}
+
+static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+    uint64_t reqid = request_id++;
+    int ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
+    block_request_create(reqid, bs, qemu_coroutine_self());
+    qemu_coroutine_yield();
+
+    return ret;
+}
+
+static int coroutine_fn blkreplay_co_write_zeroes(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+{
+    uint64_t reqid = request_id++;
+    int ret = bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
+    block_request_create(reqid, bs, qemu_coroutine_self());
+    qemu_coroutine_yield();
+
+    return ret;
+}
+
+static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors)
+{
+    uint64_t reqid = request_id++;
+    int ret = bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+    block_request_create(reqid, bs, qemu_coroutine_self());
+    qemu_coroutine_yield();
+
+    return ret;
+}
+
+static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
+{
+    uint64_t reqid = request_id++;
+    int ret = bdrv_co_flush(bs->file->bs);
+    block_request_create(reqid, bs, qemu_coroutine_self());
+    qemu_coroutine_yield();
+
+    return ret;
+}
+
+static BlockDriver bdrv_blkreplay = {
+    .format_name            = "blkreplay",
+    .protocol_name          = "blkreplay",
+    .instance_size          = 0,
+
+    .bdrv_file_open         = blkreplay_open,
+    .bdrv_close             = blkreplay_close,
+    .bdrv_getlength         = blkreplay_getlength,
+
+    .bdrv_co_readv          = blkreplay_co_readv,
+    .bdrv_co_writev         = blkreplay_co_writev,
+
+    .bdrv_co_write_zeroes   = blkreplay_co_write_zeroes,
+    .bdrv_co_discard        = blkreplay_co_discard,
+    .bdrv_co_flush          = blkreplay_co_flush,
+};
+
+static void bdrv_blkreplay_init(void)
+{
+    bdrv_register(&bdrv_blkreplay);
+}
+
+block_init(bdrv_blkreplay_init);
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -8,10 +8,12 @@
 */

 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu/sockets.h" /* for EINPROGRESS on Windows */
 #include "block/block_int.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"

 typedef struct {
    BdrvChild *test_file;
--- a/block/block-backend.c
+++ b/block/block-backend.c
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -23,6 +23,7 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -22,6 +22,7 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
--- a/block/commit.c
+++ b/block/commit.c
@@ -16,6 +16,7 @@
 #include "trace.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -0,0 +1,586 @@
+/*
+ * QEMU block full disk encryption
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "crypto/block.h"
+#include "qapi/opts-visitor.h"
+#include "qapi-visit.h"
+#include "qapi/error.h"
+
+#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
+
+typedef struct BlockCrypto BlockCrypto;
+
+struct BlockCrypto {
+    QCryptoBlock *block;
+};
+
+
+static int block_crypto_probe_generic(QCryptoBlockFormat format,
+                                      const uint8_t *buf,
+                                      int buf_size,
+                                      const char *filename)
+{
+    if (qcrypto_block_has_format(format, buf, buf_size)) {
+        return 100;
+    } else {
+        return 0;
+    }
+}
+
+
+static ssize_t block_crypto_read_func(QCryptoBlock *block,
+                                      size_t offset,
+                                      uint8_t *buf,
+                                      size_t buflen,
+                                      Error **errp,
+                                      void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    ssize_t ret;
+
+    ret = bdrv_pread(bs->file->bs, offset, buf, buflen);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not read encryption header");
+        return ret;
+    }
+    return ret;
+}
+
+
+struct BlockCryptoCreateData {
+    const char *filename;
+    QemuOpts *opts;
+    BlockBackend *blk;
+    uint64_t size;
+};
+
+
+static ssize_t block_crypto_write_func(QCryptoBlock *block,
+                                       size_t offset,
+                                       const uint8_t *buf,
+                                       size_t buflen,
+                                       Error **errp,
+                                       void *opaque)
+{
+    struct BlockCryptoCreateData *data = opaque;
+    ssize_t ret;
+
+    ret = blk_pwrite(data->blk, offset, buf, buflen);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not write encryption header");
+        return ret;
+    }
+    return ret;
+}
+
+
+static ssize_t block_crypto_init_func(QCryptoBlock *block,
+                                      size_t headerlen,
+                                      Error **errp,
+                                      void *opaque)
+{
+    struct BlockCryptoCreateData *data = opaque;
+    int ret;
+
+    /* User provided size should reflect amount of space made
+     * available to the guest, so we must take account of that
+     * which will be used by the crypto header
+     */
+    data->size += headerlen;
+
+    qemu_opt_set_number(data->opts, BLOCK_OPT_SIZE, data->size, &error_abort);
+    ret = bdrv_create_file(data->filename, data->opts, errp);
+    if (ret < 0) {
+        return -1;
+    }
+
+    data->blk = blk_new_open(data->filename, NULL, NULL,
+                             BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+    if (!data->blk) {
+        return -1;
+    }
+
+    return 0;
+}
+
+
+static QemuOptsList block_crypto_runtime_opts_luks = {
+    .name = "crypto",
+    .head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head),
+    .desc = {
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
+            .type = QEMU_OPT_STRING,
+            .help = "ID of the secret that provides the encryption key",
+        },
+        { /* end of list */ }
+    },
+};
+
+
+static QemuOptsList block_crypto_create_opts_luks = {
+    .name = "crypto",
+    .head = QTAILQ_HEAD_INITIALIZER(block_crypto_create_opts_luks.head),
+    .desc = {
+        {
+            .name = BLOCK_OPT_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Virtual disk size"
+        },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
+            .type = QEMU_OPT_STRING,
+            .help = "ID of the secret that provides the encryption key",
+        },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG,
+            .type = QEMU_OPT_STRING,
+            .help = "Name of encryption cipher algorithm",
+        },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE,
+            .type = QEMU_OPT_STRING,
+            .help = "Name of encryption cipher mode",
+        },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG,
+            .type = QEMU_OPT_STRING,
+            .help = "Name of IV generator algorithm",
+        },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG,
+            .type = QEMU_OPT_STRING,
+            .help = "Name of IV generator hash algorithm",
+        },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_HASH_ALG,
+            .type = QEMU_OPT_STRING,
+            .help = "Name of encryption hash algorithm",
+        },
+        { /* end of list */ }
+    },
+};
+
+
+static QCryptoBlockOpenOptions *
+block_crypto_open_opts_init(QCryptoBlockFormat format,
+                            QemuOpts *opts,
+                            Error **errp)
+{
+    OptsVisitor *ov;
+    QCryptoBlockOpenOptions *ret = NULL;
+    Error *local_err = NULL;
+    Error *end_err = NULL;
+
+    ret = g_new0(QCryptoBlockOpenOptions, 1);
+    ret->format = format;
+
+    ov = opts_visitor_new(opts);
+
+    visit_start_struct(opts_get_visitor(ov),
+                       NULL, NULL, 0, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    switch (format) {
+    case Q_CRYPTO_BLOCK_FORMAT_LUKS:
+        visit_type_QCryptoBlockOptionsLUKS_members(
+            opts_get_visitor(ov), &ret->u.luks, &local_err);
+        break;
+
+    default:
+        error_setg(&local_err, "Unsupported block format %d", format);
+        break;
+    }
+
+    visit_end_struct(opts_get_visitor(ov), &end_err);
+    error_propagate(&local_err, end_err);
+
+ out:
+    if (local_err) {
+        error_propagate(errp, local_err);
+        qapi_free_QCryptoBlockOpenOptions(ret);
+        ret = NULL;
+    }
+    opts_visitor_cleanup(ov);
+    return ret;
+}
+
+
+static QCryptoBlockCreateOptions *
+block_crypto_create_opts_init(QCryptoBlockFormat format,
+                              QemuOpts *opts,
+                              Error **errp)
+{
+    OptsVisitor *ov;
+    QCryptoBlockCreateOptions *ret = NULL;
+    Error *local_err = NULL;
+    Error *end_err = NULL;
+
+    ret = g_new0(QCryptoBlockCreateOptions, 1);
+    ret->format = format;
+
+    ov = opts_visitor_new(opts);
+
+    visit_start_struct(opts_get_visitor(ov),
+                       NULL, NULL, 0, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    switch (format) {
+    case Q_CRYPTO_BLOCK_FORMAT_LUKS:
+        visit_type_QCryptoBlockCreateOptionsLUKS_members(
+            opts_get_visitor(ov), &ret->u.luks, &local_err);
+        break;
+
+    default:
+        error_setg(&local_err, "Unsupported block format %d", format);
+        break;
+    }
+
+    visit_end_struct(opts_get_visitor(ov), &end_err);
+    error_propagate(&local_err, end_err);
+
+ out:
+    if (local_err) {
+        error_propagate(errp, local_err);
+        qapi_free_QCryptoBlockCreateOptions(ret);
+        ret = NULL;
+    }
+    opts_visitor_cleanup(ov);
+    return ret;
+}
+
+
+static int block_crypto_open_generic(QCryptoBlockFormat format,
+                                     QemuOptsList *opts_spec,
+                                     BlockDriverState *bs,
+                                     QDict *options,
+                                     int flags,
+                                     Error **errp)
+{
+    BlockCrypto *crypto = bs->opaque;
+    QemuOpts *opts = NULL;
+    Error *local_err = NULL;
+    int ret = -EINVAL;
+    QCryptoBlockOpenOptions *open_opts = NULL;
+    unsigned int cflags = 0;
+
+    opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        goto cleanup;
+    }
+
+    open_opts = block_crypto_open_opts_init(format, opts, errp);
+    if (!open_opts) {
+        goto cleanup;
+    }
+
+    if (flags & BDRV_O_NO_IO) {
+        cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
+    }
+    crypto->block = qcrypto_block_open(open_opts,
+                                       block_crypto_read_func,
+                                       bs,
+                                       cflags,
+                                       errp);
+
+    if (!crypto->block) {
+        ret = -EIO;
+        goto cleanup;
+    }
+
+    bs->encrypted = 1;
+    bs->valid_key = 1;
+
+    ret = 0;
+ cleanup:
+    qapi_free_QCryptoBlockOpenOptions(open_opts);
+    return ret;
+}
+
+
+static int block_crypto_create_generic(QCryptoBlockFormat format,
+                                       const char *filename,
+                                       QemuOpts *opts,
+                                       Error **errp)
+{
+    int ret = -EINVAL;
+    QCryptoBlockCreateOptions *create_opts = NULL;
+    QCryptoBlock *crypto = NULL;
+    struct BlockCryptoCreateData data = {
+        .size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                         BDRV_SECTOR_SIZE),
+        .opts = opts,
+        .filename = filename,
+    };
+
+    create_opts = block_crypto_create_opts_init(format, opts, errp);
+    if (!create_opts) {
+        return -1;
+    }
+
+    crypto = qcrypto_block_create(create_opts,
+                                  block_crypto_init_func,
+                                  block_crypto_write_func,
+                                  &data,
+                                  errp);
+
+    if (!crypto) {
+        ret = -EIO;
+        goto cleanup;
+    }
+
+    ret = 0;
+ cleanup:
+    qcrypto_block_free(crypto);
+    blk_unref(data.blk);
+    qapi_free_QCryptoBlockCreateOptions(create_opts);
+    return ret;
+}
+
+static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)
+{
+    BlockCrypto *crypto = bs->opaque;
+    size_t payload_offset =
+        qcrypto_block_get_payload_offset(crypto->block);
+
+    offset += payload_offset;
+
+    return bdrv_truncate(bs->file->bs, offset);
+}
+
+static void block_crypto_close(BlockDriverState *bs)
+{
+    BlockCrypto *crypto = bs->opaque;
+    qcrypto_block_free(crypto->block);
+}
+
+
+#define BLOCK_CRYPTO_MAX_SECTORS 32
+
+static coroutine_fn int
+block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
+                      int remaining_sectors, QEMUIOVector *qiov)
+{
+    BlockCrypto *crypto = bs->opaque;
+    int cur_nr_sectors; /* number of sectors in current iteration */
+    uint64_t bytes_done = 0;
+    uint8_t *cipher_data = NULL;
+    QEMUIOVector hd_qiov;
+    int ret = 0;
+    size_t payload_offset =
+        qcrypto_block_get_payload_offset(crypto->block) / 512;
+
+    qemu_iovec_init(&hd_qiov, qiov->niov);
+
+    /* Bounce buffer so we have a linear mem region for
+     * entire sector. XXX optimize so we avoid bounce
+     * buffer in case that qiov->niov == 1
+     */
+    cipher_data =
+        qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
+                                              qiov->size));
+    if (cipher_data == NULL) {
+        ret = -ENOMEM;
+        goto cleanup;
+    }
+
+    while (remaining_sectors) {
+        cur_nr_sectors = remaining_sectors;
+
+        if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
+            cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
+        }
+
+        qemu_iovec_reset(&hd_qiov);
+        qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
+
+        ret = bdrv_co_readv(bs->file->bs,
+                            payload_offset + sector_num,
+                            cur_nr_sectors, &hd_qiov);
+        if (ret < 0) {
+            goto cleanup;
+        }
+
+        if (qcrypto_block_decrypt(crypto->block,
+                                  sector_num,
+                                  cipher_data, cur_nr_sectors * 512,
+                                  NULL) < 0) {
+            ret = -EIO;
+            goto cleanup;
+        }
+
+        qemu_iovec_from_buf(qiov, bytes_done,
+                            cipher_data, cur_nr_sectors * 512);
+
+        remaining_sectors -= cur_nr_sectors;
+        sector_num += cur_nr_sectors;
+        bytes_done += cur_nr_sectors * 512;
+    }
+
+ cleanup:
+    qemu_iovec_destroy(&hd_qiov);
+    qemu_vfree(cipher_data);
+
+    return ret;
+}
+
+
+static coroutine_fn int
+block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
+                       int remaining_sectors, QEMUIOVector *qiov)
+{
+    BlockCrypto *crypto = bs->opaque;
+    int cur_nr_sectors; /* number of sectors in current iteration */
+    uint64_t bytes_done = 0;
+    uint8_t *cipher_data = NULL;
+    QEMUIOVector hd_qiov;
+    int ret = 0;
+    size_t payload_offset =
+        qcrypto_block_get_payload_offset(crypto->block) / 512;
+
+    qemu_iovec_init(&hd_qiov, qiov->niov);
+
+    /* Bounce buffer so we have a linear mem region for
+     * entire sector. XXX optimize so we avoid bounce
+     * buffer in case that qiov->niov == 1
+     */
+    cipher_data =
+        qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
+                                              qiov->size));
+    if (cipher_data == NULL) {
+        ret = -ENOMEM;
+        goto cleanup;
+    }
+
+    while (remaining_sectors) {
+        cur_nr_sectors = remaining_sectors;
+
+        if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
+            cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
+        }
+
+        qemu_iovec_to_buf(qiov, bytes_done,
+                          cipher_data, cur_nr_sectors * 512);
+
+        if (qcrypto_block_encrypt(crypto->block,
+                                  sector_num,
+                                  cipher_data, cur_nr_sectors * 512,
+                                  NULL) < 0) {
+            ret = -EIO;
+            goto cleanup;
+        }
+
+        qemu_iovec_reset(&hd_qiov);
+        qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
+
+        ret = bdrv_co_writev(bs->file->bs,
+                             payload_offset + sector_num,
+                             cur_nr_sectors, &hd_qiov);
+        if (ret < 0) {
+            goto cleanup;
+        }
+
+        remaining_sectors -= cur_nr_sectors;
+        sector_num += cur_nr_sectors;
+        bytes_done += cur_nr_sectors * 512;
+    }
+
+ cleanup:
+    qemu_iovec_destroy(&hd_qiov);
+    qemu_vfree(cipher_data);
+
+    return ret;
+}
+
+
+static int64_t block_crypto_getlength(BlockDriverState *bs)
+{
+    BlockCrypto *crypto = bs->opaque;
+    int64_t len = bdrv_getlength(bs->file->bs);
+
+    ssize_t offset = qcrypto_block_get_payload_offset(crypto->block);
+
+    len -= offset;
+
+    return len;
+}
+
+
+static int block_crypto_probe_luks(const uint8_t *buf,
+                                   int buf_size,
+                                   const char *filename) {
+    return block_crypto_probe_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+                                      buf, buf_size, filename);
+}
+
+static int block_crypto_open_luks(BlockDriverState *bs,
+                                  QDict *options,
+                                  int flags,
+                                  Error **errp)
+{
+    return block_crypto_open_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+                                     &block_crypto_runtime_opts_luks,
+                                     bs, options, flags, errp);
+}
+
+static int block_crypto_create_luks(const char *filename,
+                                    QemuOpts *opts,
+                                    Error **errp)
+{
+    return block_crypto_create_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+                                       filename, opts, errp);
+}
+
+BlockDriver bdrv_crypto_luks = {
+    .format_name        = "luks",
+    .instance_size      = sizeof(BlockCrypto),
+    .bdrv_probe         = block_crypto_probe_luks,
+    .bdrv_open          = block_crypto_open_luks,
+    .bdrv_close         = block_crypto_close,
+    .bdrv_create        = block_crypto_create_luks,
+    .bdrv_truncate      = block_crypto_truncate,
+    .create_opts        = &block_crypto_create_opts_luks,
+
+    .bdrv_co_readv      = block_crypto_co_readv,
+    .bdrv_co_writev     = block_crypto_co_writev,
+    .bdrv_getlength     = block_crypto_getlength,
+};
+
+static void block_crypto_init(void)
+{
+    bdrv_register(&bdrv_crypto_luks);
+}
+
+block_init(block_crypto_init);
--- a/block/curl.c
+++ b/block/curl.c
@@ -22,12 +22,15 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qstring.h"
+#include "crypto/secret.h"
 #include <curl/curl.h>
+#include "qemu/cutils.h"

 // #define DEBUG_CURL
 // #define DEBUG_VERBOSE
@@ -78,6 +81,10 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
 #define CURL_BLOCK_OPT_TIMEOUT "timeout"
 #define CURL_BLOCK_OPT_COOKIE    "cookie"
+#define CURL_BLOCK_OPT_USERNAME "username"
+#define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
+#define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
+#define CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET "proxy-password-secret"

 struct BDRVCURLState;

@@ -120,6 +127,10 @@ typedef struct BDRVCURLState {
    char *cookie;
    bool accept_range;
    AioContext *aio_context;
+    char *username;
+    char *password;
+    char *proxyusername;
+    char *proxypassword;
 } BDRVCURLState;

 static void curl_clean_state(CURLState *s);
@@ -419,6 +430,21 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
        curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
        curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);

+        if (s->username) {
+            curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username);
+        }
+        if (s->password) {
+            curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password);
+        }
+        if (s->proxyusername) {
+            curl_easy_setopt(state->curl,
+                             CURLOPT_PROXYUSERNAME, s->proxyusername);
+        }
+        if (s->proxypassword) {
+            curl_easy_setopt(state->curl,
+                             CURLOPT_PROXYPASSWORD, s->proxypassword);
+        }
+
        /* Restrict supported protocols to avoid security issues in the more
         * obscure protocols.  For example, do not allow POP3/SMTP/IMAP see
         * CVE-2013-0249.
@@ -525,10 +551,31 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "Pass the cookie or list of cookies with each request"
        },
+        {
+            .name = CURL_BLOCK_OPT_USERNAME,
+            .type = QEMU_OPT_STRING,
+            .help = "Username for HTTP auth"
+        },
+        {
+            .name = CURL_BLOCK_OPT_PASSWORD_SECRET,
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret used as password for HTTP auth",
+        },
+        {
+            .name = CURL_BLOCK_OPT_PROXY_USERNAME,
+            .type = QEMU_OPT_STRING,
+            .help = "Username for HTTP proxy auth"
+        },
+        {
+            .name = CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET,
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret used as password for HTTP proxy auth",
+        },
        { /* end of list */ }
    },
 };

+
 static int curl_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
@@ -539,6 +586,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    const char *file;
    const char *cookie;
    double d;
+    const char *secretid;

    static int inited = 0;

@@ -580,6 +628,26 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
        goto out_noclean;
    }

+    s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
+    secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);
+
+    if (secretid) {
+        s->password = qcrypto_secret_lookup_as_utf8(secretid, errp);
+        if (!s->password) {
+            goto out_noclean;
+        }
+    }
+
+    s->proxyusername = g_strdup(
+        qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_USERNAME));
+    secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET);
+    if (secretid) {
+        s->proxypassword = qcrypto_secret_lookup_as_utf8(secretid, errp);
+        if (!s->proxypassword) {
+            goto out_noclean;
+        }
+    }
+
    if (!inited) {
        curl_global_init(CURL_GLOBAL_ALL);
        inited = 1;
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -0,0 +1,387 @@
+/*
+ * Block Dirty Bitmap
+ *
+ * Copyright (c) 2016 Red Hat. Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "block/block_int.h"
+#include "block/blockjob.h"
+
+/**
+ * A BdrvDirtyBitmap can be in three possible states:
+ * (1) successor is NULL and disabled is false: full r/w mode
+ * (2) successor is NULL and disabled is true: read only mode ("disabled")
+ * (3) successor is set: frozen mode.
+ *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
+ *     or enabled. A frozen bitmap can only abdicate() or reclaim().
+ */
+struct BdrvDirtyBitmap {
+    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
+    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
+    char *name;                 /* Optional non-empty unique ID */
+    int64_t size;               /* Size of the bitmap (Number of sectors) */
+    bool disabled;              /* Bitmap is read-only */
+    QLIST_ENTRY(BdrvDirtyBitmap) list;
+};
+
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
+{
+    BdrvDirtyBitmap *bm;
+
+    assert(name);
+    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+        if (bm->name && !strcmp(name, bm->name)) {
+            return bm;
+        }
+    }
+    return NULL;
+}
+
+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
+{
+    assert(!bdrv_dirty_bitmap_frozen(bitmap));
+    g_free(bitmap->name);
+    bitmap->name = NULL;
+}
+
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+                                          uint32_t granularity,
+                                          const char *name,
+                                          Error **errp)
+{
+    int64_t bitmap_size;
+    BdrvDirtyBitmap *bitmap;
+    uint32_t sector_granularity;
+
+    assert((granularity & (granularity - 1)) == 0);
+
+    if (name && bdrv_find_dirty_bitmap(bs, name)) {
+        error_setg(errp, "Bitmap already exists: %s", name);
+        return NULL;
+    }
+    sector_granularity = granularity >> BDRV_SECTOR_BITS;
+    assert(sector_granularity);
+    bitmap_size = bdrv_nb_sectors(bs);
+    if (bitmap_size < 0) {
+        error_setg_errno(errp, -bitmap_size, "could not get length of device");
+        errno = -bitmap_size;
+        return NULL;
+    }
+    bitmap = g_new0(BdrvDirtyBitmap, 1);
+    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
+    bitmap->size = bitmap_size;
+    bitmap->name = g_strdup(name);
+    bitmap->disabled = false;
+    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
+    return bitmap;
+}
+
+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
+{
+    return bitmap->successor;
+}
+
+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
+{
+    return !(bitmap->disabled || bitmap->successor);
+}
+
+DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
+{
+    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+        return DIRTY_BITMAP_STATUS_FROZEN;
+    } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+        return DIRTY_BITMAP_STATUS_DISABLED;
+    } else {
+        return DIRTY_BITMAP_STATUS_ACTIVE;
+    }
+}
+
+/**
+ * Create a successor bitmap destined to replace this bitmap after an operation.
+ * Requires that the bitmap is not frozen and has no successor.
+ */
+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
+                                       BdrvDirtyBitmap *bitmap, Error **errp)
+{
+    uint64_t granularity;
+    BdrvDirtyBitmap *child;
+
+    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+        error_setg(errp, "Cannot create a successor for a bitmap that is "
+                   "currently frozen");
+        return -1;
+    }
+    assert(!bitmap->successor);
+
+    /* Create an anonymous successor */
+    granularity = bdrv_dirty_bitmap_granularity(bitmap);
+    child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
+    if (!child) {
+        return -1;
+    }
+
+    /* Successor will be on or off based on our current state. */
+    child->disabled = bitmap->disabled;
+
+    /* Install the successor and freeze the parent */
+    bitmap->successor = child;
+    return 0;
+}
+
+/**
+ * For a bitmap with a successor, yield our name to the successor,
+ * delete the old bitmap, and return a handle to the new bitmap.
+ */
+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
+                                            BdrvDirtyBitmap *bitmap,
+                                            Error **errp)
+{
+    char *name;
+    BdrvDirtyBitmap *successor = bitmap->successor;
+
+    if (successor == NULL) {
+        error_setg(errp, "Cannot relinquish control if "
+                   "there's no successor present");
+        return NULL;
+    }
+
+    name = bitmap->name;
+    bitmap->name = NULL;
+    successor->name = name;
+    bitmap->successor = NULL;
+    bdrv_release_dirty_bitmap(bs, bitmap);
+
+    return successor;
+}
+
+/**
+ * In cases of failure where we can no longer safely delete the parent,
+ * we may wish to re-join the parent and child/successor.
+ * The merged parent will be un-frozen, but not explicitly re-enabled.
+ */
+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
+                                           BdrvDirtyBitmap *parent,
+                                           Error **errp)
+{
+    BdrvDirtyBitmap *successor = parent->successor;
+
+    if (!successor) {
+        error_setg(errp, "Cannot reclaim a successor when none is present");
+        return NULL;
+    }
+
+    if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
+        error_setg(errp, "Merging of parent and successor bitmap failed");
+        return NULL;
+    }
+    bdrv_release_dirty_bitmap(bs, successor);
+    parent->successor = NULL;
+
+    return parent;
+}
+
+/**
+ * Truncates _all_ bitmaps attached to a BDS.
+ */
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
+{
+    BdrvDirtyBitmap *bitmap;
+    uint64_t size = bdrv_nb_sectors(bs);
+
+    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+        assert(!bdrv_dirty_bitmap_frozen(bitmap));
+        hbitmap_truncate(bitmap->bitmap, size);
+        bitmap->size = size;
+    }
+}
+
+static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
+                                                  BdrvDirtyBitmap *bitmap,
+                                                  bool only_named)
+{
+    BdrvDirtyBitmap *bm, *next;
+    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
+        if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
+            assert(!bdrv_dirty_bitmap_frozen(bm));
+            QLIST_REMOVE(bm, list);
+            hbitmap_free(bm->bitmap);
+            g_free(bm->name);
+            g_free(bm);
+
+            if (bitmap) {
+                return;
+            }
+        }
+    }
+}
+
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+{
+    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
+}
+
+/**
+ * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
+ * There must not be any frozen bitmaps attached.
+ */
+void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
+{
+    bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
+}
+
+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+    assert(!bdrv_dirty_bitmap_frozen(bitmap));
+    bitmap->disabled = true;
+}
+
+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+    assert(!bdrv_dirty_bitmap_frozen(bitmap));
+    bitmap->disabled = false;
+}
+
+BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
+{
+    BdrvDirtyBitmap *bm;
+    BlockDirtyInfoList *list = NULL;
+    BlockDirtyInfoList **plist = &list;
+
+    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+        BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
+        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
+        info->count = bdrv_get_dirty_count(bm);
+        info->granularity = bdrv_dirty_bitmap_granularity(bm);
+        info->has_name = !!bm->name;
+        info->name = g_strdup(bm->name);
+        info->status = bdrv_dirty_bitmap_status(bm);
+        entry->value = info;
+        *plist = entry;
+        plist = &entry->next;
+    }
+
+    return list;
+}
+
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                   int64_t sector)
+{
+    if (bitmap) {
+        return hbitmap_get(bitmap->bitmap, sector);
+    } else {
+        return 0;
+    }
+}
+
+/**
+ * Chooses a default granularity based on the existing cluster size,
+ * but clamped between [4K, 64K]. Defaults to 64K in the case that there
+ * is no cluster size information available.
+ */
+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
+{
+    BlockDriverInfo bdi;
+    uint32_t granularity;
+
+    if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
+        granularity = MAX(4096, bdi.cluster_size);
+        granularity = MIN(65536, granularity);
+    } else {
+        granularity = 65536;
+    }
+
+    return granularity;
+}
+
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
+{
+    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
+}
+
+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
+{
+    hbitmap_iter_init(hbi, bitmap->bitmap, 0);
+}
+
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+                           int64_t cur_sector, int nr_sectors)
+{
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+                             int64_t cur_sector, int nr_sectors)
+{
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
+{
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    if (!out) {
+        hbitmap_reset_all(bitmap->bitmap);
+    } else {
+        HBitmap *backup = bitmap->bitmap;
+        bitmap->bitmap = hbitmap_alloc(bitmap->size,
+                                       hbitmap_granularity(backup));
+        *out = backup;
+    }
+}
+
+void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
+{
+    HBitmap *tmp = bitmap->bitmap;
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    bitmap->bitmap = in;
+    hbitmap_free(tmp);
+}
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+                    int nr_sectors)
+{
+    BdrvDirtyBitmap *bitmap;
+    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+            continue;
+        }
+        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+    }
+}
+
+/**
+ * Advance an HBitmapIter to an arbitrary offset.
+ */
+void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
+{
+    assert(hbi->hb);
+    hbitmap_iter_init(hbi, hbi->hb, offset);
+}
+
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
+{
+    return hbitmap_count(bitmap->bitmap);
+}
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -22,6 +22,7 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "qemu/bswap.h"
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -10,6 +10,7 @@
 #include "qemu/osdep.h"
 #include <glusterfs/api/glfs.h>
 #include "block/block_int.h"
+#include "qapi/error.h"
 #include "qemu/uri.h"

 typedef struct GlusterAIOCB {
@@ -246,7 +247,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
    if (!ret || ret == acb->size) {
        acb->ret = 0; /* Success */
    } else if (ret < 0) {
-        acb->ret = ret; /* Read/Write failed */
+        acb->ret = -errno; /* Read/Write failed */
    } else {
        acb->ret = -EIO; /* Partial read/write - fail it */
    }
@@ -313,6 +314,23 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
        goto out;
    }

+#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
+    /* Without this, if fsync fails for a recoverable reason (for instance,
+     * ENOSPC), gluster will dump its cache, preventing retries.  This means
+     * almost certain data loss.  Not all gluster versions support the
+     * 'resync-failed-syncs-after-fsync' key value, but there is no way to
+     * discover during runtime if it is supported (this api returns success for
+     * unknown key/value pairs) */
+    ret = glfs_set_xlator_option(s->glfs, "*-write-behind",
+                                          "resync-failed-syncs-after-fsync",
+                                          "on");
+    if (ret < 0) {
+        error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
+        ret = -errno;
+        goto out;
+    }
+#endif
+
    qemu_gluster_parse_flags(bdrv_flags, &open_flags);

    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
@@ -365,6 +383,16 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
        goto exit;
    }

+#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
+    ret = glfs_set_xlator_option(reop_s->glfs, "*-write-behind",
+                                 "resync-failed-syncs-after-fsync", "on");
+    if (ret < 0) {
+        error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
+        ret = -errno;
+        goto exit;
+    }
+#endif
+
    reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
    if (reop_s->fd == NULL) {
        /* reops->glfs will be cleaned up in _abort */
@@ -588,6 +616,17 @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
 }

+static void qemu_gluster_close(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+
+    if (s->fd) {
+        glfs_close(s->fd);
+        s->fd = NULL;
+    }
+    glfs_fini(s->glfs);
+}
+
 static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
 {
    int ret;
@@ -601,11 +640,35 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)

    ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
    if (ret < 0) {
-        return -errno;
+        ret = -errno;
+        goto error;
    }

    qemu_coroutine_yield();
+    if (acb.ret < 0) {
+        ret = acb.ret;
+        goto error;
+    }
+
    return acb.ret;
+
+error:
+    /* Some versions of Gluster (3.5.6 -> 3.5.8?) will not retain its cache
+     * after a fsync failure, so we have no way of allowing the guest to safely
+     * continue.  Gluster versions prior to 3.5.6 don't retain the cache
+     * either, but will invalidate the fd on error, so this is again our only
+     * option.
+     *
+     * The 'resync-failed-syncs-after-fsync' xlator option for the
+     * write-behind cache will cause later gluster versions to retain its
+     * cache after error, so long as the fd remains open.  However, we
+     * currently have no way of knowing if this option is supported.
+     *
+     * TODO: Once gluster provides a way for us to determine if the option
+     * is supported, bypass the closure and setting drv to NULL.  */
+    qemu_gluster_close(bs);
+    bs->drv = NULL;
+    return ret;
 }

 #ifdef CONFIG_GLUSTERFS_DISCARD
@@ -660,17 +723,6 @@ static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
    }
 }

-static void qemu_gluster_close(BlockDriverState *bs)
-{
-    BDRVGlusterState *s = bs->opaque;
-
-    if (s->fd) {
-        glfs_close(s->fd);
-        s->fd = NULL;
-    }
-    glfs_fini(s->glfs);
-}
-
 static int qemu_gluster_has_zero_init(BlockDriverState *bs)
 {
    /* GlusterFS volume could be backed by a block device */
--- a/block/io.c
+++ b/block/io.c
@@ -28,6 +28,8 @@
 #include "block/blockjob.h"
 #include "block/block_int.h"
 #include "block/throttle-groups.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"

 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
@@ -44,12 +46,6 @@ static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
-    BdrvRequestFlags flags);
-static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
-    BdrvRequestFlags flags);
 static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                         int64_t sector_num,
                                         QEMUIOVector *qiov,
@@ -257,6 +253,47 @@ static void bdrv_drain_recurse(BlockDriverState *bs)
    }
 }

+typedef struct {
+    Coroutine *co;
+    BlockDriverState *bs;
+    QEMUBH *bh;
+    bool done;
+} BdrvCoDrainData;
+
+static void bdrv_co_drain_bh_cb(void *opaque)
+{
+    BdrvCoDrainData *data = opaque;
+    Coroutine *co = data->co;
+
+    qemu_bh_delete(data->bh);
+    bdrv_drain(data->bs);
+    data->done = true;
+    qemu_coroutine_enter(co, NULL);
+}
+
+void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
+{
+    BdrvCoDrainData data;
+
+    /* Calling bdrv_drain() from a BH ensures the current coroutine yields and
+     * other coroutines run if they were queued from
+     * qemu_co_queue_run_restart(). */
+
+    assert(qemu_in_coroutine());
+    data = (BdrvCoDrainData) {
+        .co = qemu_coroutine_self(),
+        .bs = bs,
+        .done = false,
+        .bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_drain_bh_cb, &data),
+    };
+    qemu_bh_schedule(data.bh);
+
+    qemu_coroutine_yield();
+    /* If we are resumed from some other event (such as an aio completion or a
+     * timer callback), it is a bug in the caller that should be fixed. */
+    assert(data.done);
+}
+
 /*
 * Wait for pending requests to complete on a single BlockDriverState subtree,
 * and suspend block driver's internal I/O until next request arrives.
@@ -273,6 +310,10 @@ void bdrv_drain(BlockDriverState *bs)
    bool busy = true;

    bdrv_drain_recurse(bs);
+    if (qemu_in_coroutine()) {
+        bdrv_co_drain(bs);
+        return;
+    }
    while (busy) {
        /* Keep iterating */
         bdrv_flush_io_queue(bs);
@@ -621,20 +662,6 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
 }

-/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
-int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
-                          uint8_t *buf, int nb_sectors)
-{
-    bool enabled;
-    int ret;
-
-    enabled = bs->io_limits_enabled;
-    bs->io_limits_enabled = false;
-    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
-    bs->io_limits_enabled = enabled;
-    return ret;
-}
-
 /* Return < 0 if error. Important errors are:
  -EIO         generic I/O error (may happen for all errors)
  -ENOMEDIUM   No media inserted.
@@ -765,9 +792,9 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
        return ret;
    }

-    /* No flush needed for cache modes that already do it */
-    if (bs->enable_write_cache) {
-        bdrv_flush(bs);
+    ret = bdrv_flush(bs);
+    if (ret < 0) {
+        return ret;
    }

    return 0;
@@ -862,6 +889,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert(!qiov || bytes == qiov->size);
+    assert((bs->open_flags & BDRV_O_NO_IO) == 0);

    /* Handle Copy on Read and associated serialisation */
    if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -939,7 +967,7 @@ out:
 /*
 * Handle a read request in coroutine context
 */
-static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
    BdrvRequestFlags flags)
 {
@@ -1148,6 +1176,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert(!qiov || bytes == qiov->size);
+    assert((bs->open_flags & BDRV_O_NO_IO) == 0);

    waited = wait_serialising_requests(req);
    assert(!waited || !req->serialising);
@@ -1170,13 +1199,20 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
    } else if (flags & BDRV_REQ_ZERO_WRITE) {
        bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
        ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
+    } else if (drv->bdrv_co_writev_flags) {
+        bdrv_debug_event(bs, BLKDBG_PWRITEV);
+        ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
+                                        flags);
    } else {
+        assert(drv->supported_write_flags == 0);
        bdrv_debug_event(bs, BLKDBG_PWRITEV);
        ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
    }
    bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);

-    if (ret == 0 && !bs->enable_write_cache) {
+    if (ret == 0 && (flags & BDRV_REQ_FUA) &&
+        !(drv->supported_write_flags & BDRV_REQ_FUA))
+    {
        ret = bdrv_co_flush(bs);
    }

@@ -1284,7 +1320,7 @@ fail:
 /*
 * Handle a write request in coroutine context
 */
-static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
    BdrvRequestFlags flags)
 {
@@ -1445,26 +1481,6 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
                             BDRV_REQ_ZERO_WRITE | flags);
 }

-int bdrv_flush_all(void)
-{
-    BlockDriverState *bs = NULL;
-    int result = 0;
-
-    while ((bs = bdrv_next(bs))) {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-        int ret;
-
-        aio_context_acquire(aio_context);
-        ret = bdrv_flush(bs);
-        if (ret < 0 && !result) {
-            result = ret;
-        }
-        aio_context_release(aio_context);
-    }
-
-    return result;
-}
-
 typedef struct BdrvCoGetBlockStatusData {
    BlockDriverState *bs;
    BlockDriverState *base;
@@ -2369,6 +2385,13 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
    }

    tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
+
+    /* Write back all layers by calling one driver function */
+    if (bs->drv->bdrv_co_flush) {
+        ret = bs->drv->bdrv_co_flush(bs);
+        goto out;
+    }
+
    /* Write back cached data to the OS even with cache=unsafe */
    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
    if (bs->drv->bdrv_co_flush_to_os) {
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -39,6 +39,7 @@
 #include "sysemu/sysemu.h"
 #include "qmp-commands.h"
 #include "qapi/qmp/qstring.h"
+#include "crypto/secret.h"

 #include <iscsi/iscsi.h>
 #include <iscsi/scsi-lowlevel.h>
@@ -69,7 +70,6 @@ typedef struct IscsiLun {
    bool lbprz;
    bool dpofua;
    bool has_write_same;
-    bool force_next_flush;
    bool request_timed_out;
 } IscsiLun;

@@ -83,7 +83,6 @@ typedef struct IscsiTask {
    QEMUBH *bh;
    IscsiLun *iscsilun;
    QEMUTimer retry_timer;
-    bool force_next_flush;
    int err_code;
 } IscsiTask;

@@ -281,8 +280,6 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
        }
        iTask->err_code = iscsi_translate_sense(&task->sense);
        error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
-    } else {
-        iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
    }

 out:
@@ -451,15 +448,15 @@ static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
    }
 }

-static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
-                                        int64_t sector_num, int nb_sectors,
-                                        QEMUIOVector *iov)
+static int coroutine_fn
+iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                      QEMUIOVector *iov, int flags)
 {
    IscsiLun *iscsilun = bs->opaque;
    struct IscsiTask iTask;
    uint64_t lba;
    uint32_t num_sectors;
-    int fua;
+    bool fua;

    if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
        return -EINVAL;
@@ -475,8 +472,7 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
    iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
-    fua = iscsilun->dpofua && !bs->enable_write_cache;
-    iTask.force_next_flush = !fua;
+    fua = iscsilun->dpofua && (flags & BDRV_REQ_FUA);
    if (iscsilun->use_16_for_rw) {
        iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
                                        NULL, num_sectors * iscsilun->block_size,
@@ -517,6 +513,13 @@ retry:
    return 0;
 }

+static int coroutine_fn
+iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                QEMUIOVector *iov)
+{
+    return iscsi_co_writev_flags(bs, sector_num, nb_sectors, iov, 0);
+}
+

 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
                                             int64_t sector_num, int nb_sectors)
@@ -714,11 +717,6 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
    IscsiLun *iscsilun = bs->opaque;
    struct IscsiTask iTask;

-    if (!iscsilun->force_next_flush) {
-        return 0;
-    }
-    iscsilun->force_next_flush = false;
-
    iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
    if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
@@ -1018,7 +1016,6 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
    }

    iscsi_co_init_iscsitask(iscsilun, &iTask);
-    iTask.force_next_flush = true;
 retry:
    if (use_16_for_ws) {
        iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
@@ -1080,6 +1077,8 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
    QemuOpts *opts;
    const char *user = NULL;
    const char *password = NULL;
+    const char *secretid;
+    char *secret = NULL;

    list = qemu_find_opts("iscsi");
    if (!list) {
@@ -1099,8 +1098,20 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
        return;
    }

+    secretid = qemu_opt_get(opts, "password-secret");
    password = qemu_opt_get(opts, "password");
-    if (!password) {
+    if (secretid && password) {
+        error_setg(errp, "'password' and 'password-secret' properties are "
+                   "mutually exclusive");
+        return;
+    }
+    if (secretid) {
+        secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
+        if (!secret) {
+            return;
+        }
+        password = secret;
+    } else if (!password) {
        error_setg(errp, "CHAP username specified but no password was given");
        return;
    }
@@ -1108,6 +1119,8 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
    if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
        error_setg(errp, "Failed to set initiator username and password");
    }
+
+    g_free(secret);
 }

 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
@@ -1835,6 +1848,8 @@ static BlockDriver bdrv_iscsi = {
    .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
    .bdrv_co_readv         = iscsi_co_readv,
    .bdrv_co_writev        = iscsi_co_writev,
+    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
+    .supported_write_flags = BDRV_REQ_FUA,
    .bdrv_co_flush_to_disk = iscsi_co_flush,

 #ifdef __linux__
@@ -1857,6 +1872,11 @@ static QemuOptsList qemu_iscsi_opts = {
            .name = "password",
            .type = QEMU_OPT_STRING,
            .help = "password for CHAP authentication to target",
+        },{
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of the secret providing password for CHAP "
+                    "authentication to target",
        },{
            .name = "header-digest",
            .type = QEMU_OPT_STRING,
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -16,6 +16,7 @@
 #include "block/blockjob.h"
 #include "block/block_int.h"
 #include "sysemu/block-backend.h"
+#include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
@@ -47,7 +48,6 @@ typedef struct MirrorBlockJob {
    BlockdevOnError on_source_error, on_target_error;
    bool synced;
    bool should_complete;
-    int64_t sector_num;
    int64_t granularity;
    size_t buf_size;
    int64_t bdev_length;
@@ -64,6 +64,8 @@ typedef struct MirrorBlockJob {
    int ret;
    bool unmap;
    bool waiting_for_io;
+    int target_cluster_sectors;
+    int max_iov;
 } MirrorBlockJob;

 typedef struct MirrorOp {
@@ -106,7 +108,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)

    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    chunk_num = op->sector_num / sectors_per_chunk;
-    nb_chunks = op->nb_sectors / sectors_per_chunk;
+    nb_chunks = DIV_ROUND_UP(op->nb_sectors, sectors_per_chunk);
    bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
    if (ret >= 0) {
        if (s->cow_bitmap) {
@@ -159,115 +161,94 @@ static void mirror_read_complete(void *opaque, int ret)
                    mirror_write_complete, op);
 }

-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static inline void mirror_clip_sectors(MirrorBlockJob *s,
+                                       int64_t sector_num,
+                                       int *nb_sectors)
+{
+    *nb_sectors = MIN(*nb_sectors,
+                      s->bdev_length / BDRV_SECTOR_SIZE - sector_num);
+}
+
+/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
+ * return the offset of the adjusted tail sector against original. */
+static int mirror_cow_align(MirrorBlockJob *s,
+                            int64_t *sector_num,
+                            int *nb_sectors)
+{
+    bool need_cow;
+    int ret = 0;
+    int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS;
+    int64_t align_sector_num = *sector_num;
+    int align_nb_sectors = *nb_sectors;
+    int max_sectors = chunk_sectors * s->max_iov;
+
+    need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap);
+    need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
+                          s->cow_bitmap);
+    if (need_cow) {
+        bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
+                               &align_sector_num, &align_nb_sectors);
+    }
+
+    if (align_nb_sectors > max_sectors) {
+        align_nb_sectors = max_sectors;
+        if (need_cow) {
+            align_nb_sectors = QEMU_ALIGN_DOWN(align_nb_sectors,
+                                               s->target_cluster_sectors);
+        }
+    }
+    /* Clipping may result in align_nb_sectors unaligned to chunk boundary, but
+     * that doesn't matter because it's already the end of source image. */
+    mirror_clip_sectors(s, align_sector_num, &align_nb_sectors);
+
+    ret = align_sector_num + align_nb_sectors - (*sector_num + *nb_sectors);
+    *sector_num = align_sector_num;
+    *nb_sectors = align_nb_sectors;
+    assert(ret >= 0);
+    return ret;
+}
+
+static inline void mirror_wait_for_io(MirrorBlockJob *s)
+{
+    assert(!s->waiting_for_io);
+    s->waiting_for_io = true;
+    qemu_coroutine_yield();
+    s->waiting_for_io = false;
+}
+
+/* Submit async read while handling COW.
+ * Returns: nb_sectors if no alignment is necessary, or
+ *          (new_end - sector_num) if tail is rounded up or down due to
+ *          alignment or buffer limit.
+ */
+static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
+                          int nb_sectors)
 {
    BlockDriverState *source = s->common.bs;
-    int nb_sectors, sectors_per_chunk, nb_chunks, max_iov;
-    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
-    uint64_t delay_ns = 0;
+    int sectors_per_chunk, nb_chunks;
+    int ret = nb_sectors;
    MirrorOp *op;
-    int pnum;
-    int64_t ret;
-    BlockDriverState *file;

-    max_iov = MIN(source->bl.max_iov, s->target->bl.max_iov);
-
-    s->sector_num = hbitmap_iter_next(&s->hbi);
-    if (s->sector_num < 0) {
-        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
-        s->sector_num = hbitmap_iter_next(&s->hbi);
-        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
-        assert(s->sector_num >= 0);
-    }
-
-    hbitmap_next_sector = s->sector_num;
-    sector_num = s->sector_num;
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
-    end = s->bdev_length / BDRV_SECTOR_SIZE;

-    /* Extend the QEMUIOVector to include all adjacent blocks that will
-     * be copied in this operation.
-     *
-     * We have to do this if we have no backing file yet in the destination,
-     * and the cluster size is very large.  Then we need to do COW ourselves.
-     * The first time a cluster is copied, copy it entirely.  Note that,
-     * because both the granularity and the cluster size are powers of two,
-     * the number of sectors to copy cannot exceed one cluster.
-     *
-     * We also want to extend the QEMUIOVector to include more adjacent
-     * dirty blocks if possible, to limit the number of I/O operations and
-     * run efficiently even with a small granularity.
-     */
-    nb_chunks = 0;
-    nb_sectors = 0;
-    next_sector = sector_num;
-    next_chunk = sector_num / sectors_per_chunk;
+    /* We can only handle as much as buf_size at a time. */
+    nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
+    assert(nb_sectors);

-    /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
-    while (test_bit(next_chunk, s->in_flight_bitmap)) {
-        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
-        s->waiting_for_io = true;
-        qemu_coroutine_yield();
-        s->waiting_for_io = false;
+    if (s->cow_bitmap) {
+        ret += mirror_cow_align(s, &sector_num, &nb_sectors);
    }
+    assert(nb_sectors << BDRV_SECTOR_BITS <= s->buf_size);
+    /* The sector range must meet granularity because:
+     * 1) Caller passes in aligned values;
+     * 2) mirror_cow_align is used only when target cluster is larger. */
+    assert(!(sector_num % sectors_per_chunk));
+    nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk);

-    do {
-        int added_sectors, added_chunks;
-
-        if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
-            test_bit(next_chunk, s->in_flight_bitmap)) {
-            assert(nb_sectors > 0);
-            break;
-        }
-
-        added_sectors = sectors_per_chunk;
-        if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
-            bdrv_round_to_clusters(s->target,
-                                   next_sector, added_sectors,
-                                   &next_sector, &added_sectors);
-
-            /* On the first iteration, the rounding may make us copy
-             * sectors before the first dirty one.
-             */
-            if (next_sector < sector_num) {
-                assert(nb_sectors == 0);
-                sector_num = next_sector;
-                next_chunk = next_sector / sectors_per_chunk;
-            }
-        }
-
-        added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
-        added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
-
-        /* When doing COW, it may happen that there is not enough space for
-         * a full cluster.  Wait if that is the case.
-         */
-        while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
-            trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
-            s->waiting_for_io = true;
-            qemu_coroutine_yield();
-            s->waiting_for_io = false;
-        }
-        if (s->buf_free_count < nb_chunks + added_chunks) {
-            trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
-            break;
-        }
-        if (max_iov < nb_chunks + added_chunks) {
-            trace_mirror_break_iov_max(s, nb_chunks, added_chunks);
-            break;
-        }
-
-        /* We have enough free space to copy these sectors.  */
-        bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
-
-        nb_sectors += added_sectors;
-        nb_chunks += added_chunks;
-        next_sector += added_sectors;
-        next_chunk += added_chunks;
-        if (!s->synced && s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
-        }
-    } while (delay_ns == 0 && next_sector < end);
+    while (s->buf_free_count < nb_chunks) {
+        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+        mirror_wait_for_io(s);
+    }

    /* Allocate a MirrorOp that is used as an AIO callback.  */
    op = g_new(MirrorOp, 1);
@@ -279,47 +260,158 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     * from s->buf_free.
     */
    qemu_iovec_init(&op->qiov, nb_chunks);
-    next_sector = sector_num;
    while (nb_chunks-- > 0) {
        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
-        size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size;
+        size_t remaining = nb_sectors * BDRV_SECTOR_SIZE - op->qiov.size;

        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
        s->buf_free_count--;
        qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
-
-        /* Advance the HBitmapIter in parallel, so that we do not examine
-         * the same sector twice.
-         */
-        if (next_sector > hbitmap_next_sector
-            && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
-            hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
-        }
-
-        next_sector += sectors_per_chunk;
    }

-    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors);
-
    /* Copy the dirty cluster.  */
    s->in_flight++;
    s->sectors_in_flight += nb_sectors;
    trace_mirror_one_iteration(s, sector_num, nb_sectors);

-    ret = bdrv_get_block_status_above(source, NULL, sector_num,
-                                      nb_sectors, &pnum, &file);
-    if (ret < 0 || pnum < nb_sectors ||
-            (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) {
-        bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
-                       mirror_read_complete, op);
-    } else if (ret & BDRV_BLOCK_ZERO) {
+    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+                   mirror_read_complete, op);
+    return ret;
+}
+
+static void mirror_do_zero_or_discard(MirrorBlockJob *s,
+                                      int64_t sector_num,
+                                      int nb_sectors,
+                                      bool is_discard)
+{
+    MirrorOp *op;
+
+    /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed
+     * so the freeing in mirror_iteration_done is nop. */
+    op = g_new0(MirrorOp, 1);
+    op->s = s;
+    op->sector_num = sector_num;
+    op->nb_sectors = nb_sectors;
+
+    s->in_flight++;
+    s->sectors_in_flight += nb_sectors;
+    if (is_discard) {
+        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
+                         mirror_write_complete, op);
+    } else {
        bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
                              s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
                              mirror_write_complete, op);
-    } else {
-        assert(!(ret & BDRV_BLOCK_DATA));
-        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
-                         mirror_write_complete, op);
+    }
+}
+
+static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+{
+    BlockDriverState *source = s->common.bs;
+    int64_t sector_num, first_chunk;
+    uint64_t delay_ns = 0;
+    /* At least the first dirty chunk is mirrored in one iteration. */
+    int nb_chunks = 1;
+    int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
+    int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+
+    sector_num = hbitmap_iter_next(&s->hbi);
+    if (sector_num < 0) {
+        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
+        sector_num = hbitmap_iter_next(&s->hbi);
+        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
+        assert(sector_num >= 0);
+    }
+
+    first_chunk = sector_num / sectors_per_chunk;
+    while (test_bit(first_chunk, s->in_flight_bitmap)) {
+        trace_mirror_yield_in_flight(s, first_chunk, s->in_flight);
+        mirror_wait_for_io(s);
+    }
+
+    /* Find the number of consective dirty chunks following the first dirty
+     * one, and wait for in flight requests in them. */
+    while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
+        int64_t hbitmap_next;
+        int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
+        int64_t next_chunk = next_sector / sectors_per_chunk;
+        if (next_sector >= end ||
+            !bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
+            break;
+        }
+        if (test_bit(next_chunk, s->in_flight_bitmap)) {
+            break;
+        }
+
+        hbitmap_next = hbitmap_iter_next(&s->hbi);
+        if (hbitmap_next > next_sector || hbitmap_next < 0) {
+            /* The bitmap iterator's cache is stale, refresh it */
+            bdrv_set_dirty_iter(&s->hbi, next_sector);
+            hbitmap_next = hbitmap_iter_next(&s->hbi);
+        }
+        assert(hbitmap_next == next_sector);
+        nb_chunks++;
+    }
+
+    /* Clear dirty bits before querying the block status, because
+     * calling bdrv_get_block_status_above could yield - if some blocks are
+     * marked dirty in this window, we need to know.
+     */
+    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
+                            nb_chunks * sectors_per_chunk);
+    bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
+    while (nb_chunks > 0 && sector_num < end) {
+        int ret;
+        int io_sectors;
+        BlockDriverState *file;
+        enum MirrorMethod {
+            MIRROR_METHOD_COPY,
+            MIRROR_METHOD_ZERO,
+            MIRROR_METHOD_DISCARD
+        } mirror_method = MIRROR_METHOD_COPY;
+
+        assert(!(sector_num % sectors_per_chunk));
+        ret = bdrv_get_block_status_above(source, NULL, sector_num,
+                                          nb_chunks * sectors_per_chunk,
+                                          &io_sectors, &file);
+        if (ret < 0) {
+            io_sectors = nb_chunks * sectors_per_chunk;
+        }
+
+        io_sectors -= io_sectors % sectors_per_chunk;
+        if (io_sectors < sectors_per_chunk) {
+            io_sectors = sectors_per_chunk;
+        } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
+            int64_t target_sector_num;
+            int target_nb_sectors;
+            bdrv_round_to_clusters(s->target, sector_num, io_sectors,
+                                   &target_sector_num, &target_nb_sectors);
+            if (target_sector_num == sector_num &&
+                target_nb_sectors == io_sectors) {
+                mirror_method = ret & BDRV_BLOCK_ZERO ?
+                                    MIRROR_METHOD_ZERO :
+                                    MIRROR_METHOD_DISCARD;
+            }
+        }
+
+        mirror_clip_sectors(s, sector_num, &io_sectors);
+        switch (mirror_method) {
+        case MIRROR_METHOD_COPY:
+            io_sectors = mirror_do_read(s, sector_num, io_sectors);
+            break;
+        case MIRROR_METHOD_ZERO:
+            mirror_do_zero_or_discard(s, sector_num, io_sectors, false);
+            break;
+        case MIRROR_METHOD_DISCARD:
+            mirror_do_zero_or_discard(s, sector_num, io_sectors, true);
+            break;
+        default:
+            abort();
+        }
+        assert(io_sectors);
+        sector_num += io_sectors;
+        nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
+        delay_ns += ratelimit_calculate_delay(&s->limit, io_sectors);
    }
    return delay_ns;
 }
@@ -344,9 +436,7 @@ static void mirror_free_init(MirrorBlockJob *s)
 static void mirror_drain(MirrorBlockJob *s)
 {
    while (s->in_flight > 0) {
-        s->waiting_for_io = true;
-        qemu_coroutine_yield();
-        s->waiting_for_io = false;
+        mirror_wait_for_io(s);
    }
 }

@@ -405,6 +495,9 @@ out:
    block_job_completed(&s->common, data->ret);
    g_free(data);
    bdrv_drained_end(src);
+    if (qemu_get_aio_context() == bdrv_get_aio_context(src)) {
+        aio_enable_external(iohandler_get_aio_context());
+    }
    bdrv_unref(src);
 }

@@ -420,6 +513,7 @@ static void coroutine_fn mirror_run(void *opaque)
                                 checking for a NULL string */
    int ret = 0;
    int n;
+    int target_cluster_size = BDRV_SECTOR_SIZE;

    if (block_job_is_cancelled(&s->common)) {
        goto immediate_exit;
@@ -449,16 +543,16 @@ static void coroutine_fn mirror_run(void *opaque)
     */
    bdrv_get_backing_filename(s->target, backing_filename,
                              sizeof(backing_filename));
-    if (backing_filename[0] && !s->target->backing) {
-        ret = bdrv_get_info(s->target, &bdi);
-        if (ret < 0) {
-            goto immediate_exit;
-        }
-        if (s->granularity < bdi.cluster_size) {
-            s->buf_size = MAX(s->buf_size, bdi.cluster_size);
-            s->cow_bitmap = bitmap_new(length);
-        }
+    if (!bdrv_get_info(s->target, &bdi) && bdi.cluster_size) {
+        target_cluster_size = bdi.cluster_size;
    }
+    if (backing_filename[0] && !s->target->backing
+        && s->granularity < target_cluster_size) {
+        s->buf_size = MAX(s->buf_size, target_cluster_size);
+        s->cow_bitmap = bitmap_new(length);
+    }
+    s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
+    s->max_iov = MIN(s->common.bs->bl.max_iov, s->target->bl.max_iov);

    end = s->bdev_length / BDRV_SECTOR_SIZE;
    s->buf = qemu_try_blockalign(bs, s->buf_size);
@@ -533,9 +627,7 @@ static void coroutine_fn mirror_run(void *opaque)
            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
-                s->waiting_for_io = true;
-                qemu_coroutine_yield();
-                s->waiting_for_io = false;
+                mirror_wait_for_io(s);
                continue;
            } else if (cnt != 0) {
                delay_ns = mirror_iteration(s);
@@ -578,7 +670,7 @@ static void coroutine_fn mirror_run(void *opaque)
             * mirror_populate runs.
             */
            trace_mirror_before_drain(s, cnt);
-            bdrv_drain(bs);
+            bdrv_co_drain(bs);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        }

@@ -627,6 +719,12 @@ immediate_exit:
    /* Before we switch to target in mirror_exit, make sure data doesn't
     * change. */
    bdrv_drained_begin(s->common.bs);
+    if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) {
+        /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the
+         * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we
+         * need a block layer API change to achieve this. */
+        aio_disable_external(iohandler_get_aio_context());
+    }
    block_job_defer_to_main_loop(&s->common, mirror_exit, data);
 }

@@ -784,7 +882,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,

    bdrv_op_block_all(s->target, s->common.blocker);

-    bdrv_set_enable_write_cache(s->target, true);
    if (s->target->blk) {
        blk_set_on_error(s->target->blk, on_target_error, on_target_error);
        blk_iostatus_enable(s->target->blk);
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -243,15 +243,15 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,

 static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
                           int nb_sectors, QEMUIOVector *qiov,
-                           int offset)
+                           int offset, int *flags)
 {
    NbdClientSession *client = nbd_get_client_session(bs);
    struct nbd_request request = { .type = NBD_CMD_WRITE };
    struct nbd_reply reply;
    ssize_t ret;

-    if (!bdrv_enable_write_cache(bs) &&
-        (client->nbdflags & NBD_FLAG_SEND_FUA)) {
+    if ((*flags & BDRV_REQ_FUA) && (client->nbdflags & NBD_FLAG_SEND_FUA)) {
+        *flags &= ~BDRV_REQ_FUA;
        request.type |= NBD_CMD_FLAG_FUA;
    }

@@ -291,12 +291,13 @@ int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
 }

 int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov)
+                         int nb_sectors, QEMUIOVector *qiov, int *flags)
 {
    int offset = 0;
    int ret;
    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset,
+                              flags);
        if (ret < 0) {
            return ret;
        }
@@ -304,7 +305,7 @@ int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
        sector_num += NBD_MAX_SECTORS;
        nb_sectors -= NBD_MAX_SECTORS;
    }
-    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
+    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset, flags);
 }

 int nbd_client_co_flush(BlockDriverState *bs)
@@ -318,10 +319,6 @@ int nbd_client_co_flush(BlockDriverState *bs)
        return 0;
    }

-    if (client->nbdflags & NBD_FLAG_SEND_FUA) {
-        request.type |= NBD_CMD_FLAG_FUA;
-    }
-
    request.from = 0;
    request.len = 0;

--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -48,7 +48,7 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
                          int nb_sectors);
 int nbd_client_co_flush(BlockDriverState *bs);
 int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov);
+                         int nb_sectors, QEMUIOVector *qiov, int *flags);
 int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
                        int nb_sectors, QEMUIOVector *qiov);

--- a/block/nbd.c
+++ b/block/nbd.c
@@ -28,6 +28,7 @@

 #include "qemu/osdep.h"
 #include "block/nbd-client.h"
+#include "qapi/error.h"
 #include "qemu/uri.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
@@ -35,7 +36,7 @@
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qint.h"
 #include "qapi/qmp/qstring.h"
-
+#include "qemu/cutils.h"

 #define EN_OPTSTR ":exportname="

@@ -204,18 +205,20 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
    saddr = g_new0(SocketAddress, 1);

    if (qdict_haskey(options, "path")) {
+        UnixSocketAddress *q_unix;
        saddr->type = SOCKET_ADDRESS_KIND_UNIX;
-        saddr->u.q_unix = g_new0(UnixSocketAddress, 1);
-        saddr->u.q_unix->path = g_strdup(qdict_get_str(options, "path"));
+        q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
+        q_unix->path = g_strdup(qdict_get_str(options, "path"));
        qdict_del(options, "path");
    } else {
+        InetSocketAddress *inet;
        saddr->type = SOCKET_ADDRESS_KIND_INET;
-        saddr->u.inet = g_new0(InetSocketAddress, 1);
-        saddr->u.inet->host = g_strdup(qdict_get_str(options, "host"));
+        inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
+        inet->host = g_strdup(qdict_get_str(options, "host"));
        if (!qdict_get_try_str(options, "port")) {
-            saddr->u.inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
+            inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
        } else {
-            saddr->u.inet->port = g_strdup(qdict_get_str(options, "port"));
+            inet->port = g_strdup(qdict_get_str(options, "port"));
        }
        qdict_del(options, "host");
        qdict_del(options, "port");
@@ -319,7 +322,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
            error_setg(errp, "TLS only supported over IP sockets");
            goto error;
        }
-        hostname = saddr->u.inet->host;
+        hostname = saddr->u.inet.data->host;
    }

    /* establish TCP connection, return error if it fails
@@ -352,10 +355,29 @@ static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
    return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov);
 }

+static int nbd_co_writev_flags(BlockDriverState *bs, int64_t sector_num,
+                               int nb_sectors, QEMUIOVector *qiov, int flags)
+{
+    int ret;
+
+    ret = nbd_client_co_writev(bs, sector_num, nb_sectors, qiov, &flags);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* The flag wasn't sent to the server, so we need to emulate it with an
+     * explicit flush */
+    if (flags & BDRV_REQ_FUA) {
+        ret = nbd_client_co_flush(bs);
+    }
+
+    return ret;
+}
+
 static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
                         int nb_sectors, QEMUIOVector *qiov)
 {
-    return nbd_client_co_writev(bs, sector_num, nb_sectors, qiov);
+    return nbd_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0);
 }

 static int nbd_co_flush(BlockDriverState *bs)
@@ -455,6 +477,8 @@ static BlockDriver bdrv_nbd = {
    .bdrv_file_open             = nbd_open,
    .bdrv_co_readv              = nbd_co_readv,
    .bdrv_co_writev             = nbd_co_writev,
+    .bdrv_co_writev_flags       = nbd_co_writev_flags,
+    .supported_write_flags      = BDRV_REQ_FUA,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_discard            = nbd_co_discard,
@@ -473,6 +497,8 @@ static BlockDriver bdrv_nbd_tcp = {
    .bdrv_file_open             = nbd_open,
    .bdrv_co_readv              = nbd_co_readv,
    .bdrv_co_writev             = nbd_co_writev,
+    .bdrv_co_writev_flags       = nbd_co_writev_flags,
+    .supported_write_flags      = BDRV_REQ_FUA,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_discard            = nbd_co_discard,
@@ -491,6 +517,8 @@ static BlockDriver bdrv_nbd_unix = {
    .bdrv_file_open             = nbd_open,
    .bdrv_co_readv              = nbd_co_readv,
    .bdrv_co_writev             = nbd_co_writev,
+    .bdrv_co_writev_flags       = nbd_co_writev_flags,
+    .supported_write_flags      = BDRV_REQ_FUA,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_discard            = nbd_co_discard,
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -28,14 +28,17 @@
 #include "qemu-common.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
+#include "qapi/error.h"
 #include "block/block_int.h"
 #include "trace.h"
 #include "qemu/iov.h"
 #include "qemu/uri.h"
+#include "qemu/cutils.h"
 #include "sysemu/sysemu.h"
 #include <nfsc/libnfs.h>

 #define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
+#define QEMU_NFS_MAX_DEBUG_LEVEL 2

 typedef struct NFSClient {
    struct nfs_context *context;
@@ -333,6 +336,17 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
                val = QEMU_NFS_MAX_READAHEAD_SIZE;
            }
            nfs_set_readahead(client->context, val);
+#endif
+#ifdef LIBNFS_FEATURE_DEBUG
+        } else if (!strcmp(qp->p[i].name, "debug")) {
+            /* limit the maximum debug level to avoid potential flooding
+             * of our log files. */
+            if (val > QEMU_NFS_MAX_DEBUG_LEVEL) {
+                error_report("NFS Warning: Limiting NFS debug level"
+                             " to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
+                val = QEMU_NFS_MAX_DEBUG_LEVEL;
+            }
+            nfs_set_debug(client->context, val);
 #endif
        } else {
            error_setg(errp, "Unknown NFS parameter name: %s",
--- a/block/null.c
+++ b/block/null.c
@@ -11,13 +11,16 @@
 */

 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "block/block_int.h"

 #define NULL_OPT_LATENCY "latency-ns"
+#define NULL_OPT_ZEROES  "read-zeroes"

 typedef struct {
    int64_t length;
    int64_t latency_ns;
+    bool read_zeroes;
 } BDRVNullState;

 static QemuOptsList runtime_opts = {
@@ -40,6 +43,11 @@ static QemuOptsList runtime_opts = {
            .help = "nanoseconds (approximated) to wait "
                    "before completing request",
        },
+        {
+            .name = NULL_OPT_ZEROES,
+            .type = QEMU_OPT_BOOL,
+            .help = "return zeroes when read",
+        },
        { /* end of list */ }
    },
 };
@@ -61,6 +69,7 @@ static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
        error_setg(errp, "latency-ns is invalid");
        ret = -EINVAL;
    }
+    s->read_zeroes = qemu_opt_get_bool(opts, NULL_OPT_ZEROES, false);
    qemu_opts_del(opts);
    return ret;
 }
@@ -90,6 +99,12 @@ static coroutine_fn int null_co_readv(BlockDriverState *bs,
                                      int64_t sector_num, int nb_sectors,
                                      QEMUIOVector *qiov)
 {
+    BDRVNullState *s = bs->opaque;
+
+    if (s->read_zeroes) {
+        qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
+    }
+
    return null_co_common(bs);
 }

@@ -159,6 +174,12 @@ static BlockAIOCB *null_aio_readv(BlockDriverState *bs,
                                  BlockCompletionFunc *cb,
                                  void *opaque)
 {
+    BDRVNullState *s = bs->opaque;
+
+    if (s->read_zeroes) {
+        qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
+    }
+
    return null_aio_common(bs, cb, opaque);
 }

@@ -184,6 +205,24 @@ static int null_reopen_prepare(BDRVReopenState *reopen_state,
    return 0;
 }

+static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs,
+                                                     int64_t sector_num,
+                                                     int nb_sectors, int *pnum,
+                                                     BlockDriverState **file)
+{
+    BDRVNullState *s = bs->opaque;
+    off_t start = sector_num * BDRV_SECTOR_SIZE;
+
+    *pnum = nb_sectors;
+    *file = bs;
+
+    if (s->read_zeroes) {
+        return BDRV_BLOCK_OFFSET_VALID | start | BDRV_BLOCK_ZERO;
+    } else {
+        return BDRV_BLOCK_OFFSET_VALID | start;
+    }
+}
+
 static BlockDriver bdrv_null_co = {
    .format_name            = "null-co",
    .protocol_name          = "null-co",
@@ -197,6 +236,8 @@ static BlockDriver bdrv_null_co = {
    .bdrv_co_writev         = null_co_writev,
    .bdrv_co_flush_to_disk  = null_co_flush,
    .bdrv_reopen_prepare    = null_reopen_prepare,
+
+    .bdrv_co_get_block_status   = null_co_get_block_status,
 };

 static BlockDriver bdrv_null_aio = {
@@ -212,6 +253,8 @@ static BlockDriver bdrv_null_aio = {
    .bdrv_aio_writev        = null_aio_writev,
    .bdrv_aio_flush         = null_aio_flush,
    .bdrv_reopen_prepare    = null_reopen_prepare,
+
+    .bdrv_co_get_block_status   = null_co_get_block_status,
 };

 static void bdrv_null_init(void)
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -28,8 +28,10 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/bitmap.h"
 #include "qapi/util.h"
@@ -461,7 +463,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
    int64_t total_size, cl_size;
    uint8_t tmp[BDRV_SECTOR_SIZE];
    Error *local_err = NULL;
-    BlockDriverState *file;
+    BlockBackend *file;
    uint32_t bat_entries, bat_sectors;
    ParallelsHeader header;
    int ret;
@@ -477,14 +479,16 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
        return ret;
    }

-    file = NULL;
-    ret = bdrv_open(&file, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (ret < 0) {
+    file = blk_new_open(filename, NULL, NULL,
+                        BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+    if (file == NULL) {
        error_propagate(errp, local_err);
-        return ret;
+        return -EIO;
    }
-    ret = bdrv_truncate(file, 0);
+
+    blk_set_allow_write_beyond_eof(file, true);
+
+    ret = blk_truncate(file, 0);
    if (ret < 0) {
        goto exit;
    }
@@ -508,18 +512,18 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
    memset(tmp, 0, sizeof(tmp));
    memcpy(tmp, &header, sizeof(header));

-    ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
+    ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
    if (ret < 0) {
        goto exit;
    }
-    ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0);
+    ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0);
    if (ret < 0) {
        goto exit;
    }
    ret = 0;

 done:
-    bdrv_unref(file);
+    blk_unref(file);
    return ret;

 exit:
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -32,8 +32,10 @@
 #include "qapi/qmp-output-visitor.h"
 #include "qapi/qmp/types.h"
 #include "sysemu/block-backend.h"
+#include "qemu/cutils.h"

-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
+BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
+                                        BlockDriverState *bs, Error **errp)
 {
    ImageInfo **p_image_info;
    BlockDriverState *bs0;
@@ -47,7 +49,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)

    info->cache = g_new(BlockdevCacheInfo, 1);
    *info->cache = (BlockdevCacheInfo) {
-        .writeback      = bdrv_enable_write_cache(bs),
+        .writeback      = blk ? blk_enable_write_cache(blk) : true,
        .direct         = !!(bs->open_flags & BDRV_O_NOCACHE),
        .no_flush       = !!(bs->open_flags & BDRV_O_NO_FLUSH),
    };
@@ -342,7 +344,7 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,

    if (bs && bs->drv) {
        info->has_inserted = true;
-        info->inserted = bdrv_block_device_info(bs, errp);
+        info->inserted = bdrv_block_device_info(blk, bs, errp);
        if (info->inserted == NULL) {
            goto err;
        }
@@ -355,100 +357,115 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
    qapi_free_BlockInfo(info);
 }

-static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
-                                    bool query_backing)
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+                                    const BlockDriverState *bs,
+                                    bool query_backing);
+
+static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
 {
-    BlockStats *s;
+    BlockAcctStats *stats = blk_get_stats(blk);
+    BlockAcctTimedStats *ts = NULL;

-    s = g_malloc0(sizeof(*s));
+    ds->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
+    ds->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+    ds->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
+    ds->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];

-    if (bdrv_get_device_name(bs)[0]) {
-        s->has_device = true;
-        s->device = g_strdup(bdrv_get_device_name(bs));
+    ds->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
+    ds->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
+    ds->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
+
+    ds->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
+    ds->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
+    ds->invalid_flush_operations =
+        stats->invalid_ops[BLOCK_ACCT_FLUSH];
+
+    ds->rd_merged = stats->merged[BLOCK_ACCT_READ];
+    ds->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+    ds->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
+    ds->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+    ds->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
+    ds->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+
+    ds->has_idle_time_ns = stats->last_access_time_ns > 0;
+    if (ds->has_idle_time_ns) {
+        ds->idle_time_ns = block_acct_idle_time_ns(stats);
    }

+    ds->account_invalid = stats->account_invalid;
+    ds->account_failed = stats->account_failed;
+
+    while ((ts = block_acct_interval_next(stats, ts))) {
+        BlockDeviceTimedStatsList *timed_stats =
+            g_malloc0(sizeof(*timed_stats));
+        BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
+        timed_stats->next = ds->timed_stats;
+        timed_stats->value = dev_stats;
+        ds->timed_stats = timed_stats;
+
+        TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
+        TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
+        TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
+
+        dev_stats->interval_length = ts->interval_length;
+
+        dev_stats->min_rd_latency_ns = timed_average_min(rd);
+        dev_stats->max_rd_latency_ns = timed_average_max(rd);
+        dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
+
+        dev_stats->min_wr_latency_ns = timed_average_min(wr);
+        dev_stats->max_wr_latency_ns = timed_average_max(wr);
+        dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
+
+        dev_stats->min_flush_latency_ns = timed_average_min(fl);
+        dev_stats->max_flush_latency_ns = timed_average_max(fl);
+        dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
+
+        dev_stats->avg_rd_queue_depth =
+            block_acct_queue_depth(ts, BLOCK_ACCT_READ);
+        dev_stats->avg_wr_queue_depth =
+            block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
+    }
+}
+
+static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
+                                 bool query_backing)
+{
    if (bdrv_get_node_name(bs)[0]) {
        s->has_node_name = true;
        s->node_name = g_strdup(bdrv_get_node_name(bs));
    }

-    s->stats = g_malloc0(sizeof(*s->stats));
-    if (bs->blk) {
-        BlockAcctStats *stats = blk_get_stats(bs->blk);
-        BlockAcctTimedStats *ts = NULL;
-
-        s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
-        s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
-        s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
-        s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
-
-        s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
-        s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
-        s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
-
-        s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
-        s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
-        s->stats->invalid_flush_operations =
-            stats->invalid_ops[BLOCK_ACCT_FLUSH];
-
-        s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
-        s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
-        s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
-        s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
-        s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
-        s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
-
-        s->stats->has_idle_time_ns = stats->last_access_time_ns > 0;
-        if (s->stats->has_idle_time_ns) {
-            s->stats->idle_time_ns = block_acct_idle_time_ns(stats);
-        }
-
-        s->stats->account_invalid = stats->account_invalid;
-        s->stats->account_failed = stats->account_failed;
-
-        while ((ts = block_acct_interval_next(stats, ts))) {
-            BlockDeviceTimedStatsList *timed_stats =
-                g_malloc0(sizeof(*timed_stats));
-            BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
-            timed_stats->next = s->stats->timed_stats;
-            timed_stats->value = dev_stats;
-            s->stats->timed_stats = timed_stats;
-
-            TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
-            TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
-            TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
-
-            dev_stats->interval_length = ts->interval_length;
-
-            dev_stats->min_rd_latency_ns = timed_average_min(rd);
-            dev_stats->max_rd_latency_ns = timed_average_max(rd);
-            dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
-
-            dev_stats->min_wr_latency_ns = timed_average_min(wr);
-            dev_stats->max_wr_latency_ns = timed_average_max(wr);
-            dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
-
-            dev_stats->min_flush_latency_ns = timed_average_min(fl);
-            dev_stats->max_flush_latency_ns = timed_average_max(fl);
-            dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
-
-            dev_stats->avg_rd_queue_depth =
-                block_acct_queue_depth(ts, BLOCK_ACCT_READ);
-            dev_stats->avg_wr_queue_depth =
-                block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
-        }
-    }
-
    s->stats->wr_highest_offset = bs->wr_highest_offset;

    if (bs->file) {
        s->has_parent = true;
-        s->parent = bdrv_query_stats(bs->file->bs, query_backing);
+        s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
    }

    if (query_backing && bs->backing) {
        s->has_backing = true;
-        s->backing = bdrv_query_stats(bs->backing->bs, query_backing);
+        s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
+    }
+
+}
+
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+                                    const BlockDriverState *bs,
+                                    bool query_backing)
+{
+    BlockStats *s;
+
+    s = g_malloc0(sizeof(*s));
+    s->stats = g_malloc0(sizeof(*s->stats));
+
+    if (blk) {
+        s->has_device = true;
+        s->device = g_strdup(blk_name(blk));
+        bdrv_query_blk_stats(s->stats, blk);
+    }
+    if (bs) {
+        bdrv_query_bds_stats(s, bs, query_backing);
    }

    return s;
@@ -477,22 +494,38 @@ BlockInfoList *qmp_query_block(Error **errp)
    return head;
 }

+static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
+                           bool query_nodes)
+{
+    if (query_nodes) {
+        *bs = bdrv_next_node(*bs);
+        return !!*bs;
+    }
+
+    *blk = blk_next(*blk);
+    *bs = *blk ? blk_bs(*blk) : NULL;
+
+    return !!*blk;
+}
+
 BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
                                     bool query_nodes,
                                     Error **errp)
 {
    BlockStatsList *head = NULL, **p_next = &head;
+    BlockBackend *blk = NULL;
    BlockDriverState *bs = NULL;

    /* Just to be safe if query_nodes is not always initialized */
    query_nodes = has_query_nodes && query_nodes;

-    while ((bs = query_nodes ? bdrv_next_node(bs) : bdrv_next(bs))) {
+    while (next_query_bds(&blk, &bs, query_nodes)) {
        BlockStatsList *info = g_malloc0(sizeof(*info));
-        AioContext *ctx = bdrv_get_aio_context(bs);
+        AioContext *ctx = blk ? blk_get_aio_context(blk)
+                              : bdrv_get_aio_context(bs);

        aio_context_acquire(ctx);
-        info->value = bdrv_query_stats(bs, !query_nodes);
+        info->value = bdrv_query_stats(blk, bs, !query_nodes);
        aio_context_release(ctx);

        *p_next = info;
@@ -619,9 +652,8 @@ static void dump_qlist(fprintf_function func_fprintf, void *f, int indentation,
    for (entry = qlist_first(list); entry; entry = qlist_next(entry), i++) {
        QType type = qobject_type(entry->value);
        bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
-        const char *format = composite ? "%*s[%i]:\n" : "%*s[%i]: ";
-
-        func_fprintf(f, format, indentation * 4, "", i);
+        func_fprintf(f, "%*s[%i]:%c", indentation * 4, "", i,
+                     composite ? '\n' : ' ');
        dump_qobject(func_fprintf, f, indentation + 1, entry->value);
        if (!composite) {
            func_fprintf(f, "\n");
@@ -637,8 +669,7 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
    for (entry = qdict_first(dict); entry; entry = qdict_next(dict, entry)) {
        QType type = qobject_type(entry->value);
        bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
-        const char *format = composite ? "%*s%s:\n" : "%*s%s: ";
-        char key[strlen(entry->key) + 1];
+        char *key = g_malloc(strlen(entry->key) + 1);
        int i;

        /* replace dashes with spaces in key (variable) names */
@@ -646,12 +677,13 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
            key[i] = entry->key[i] == '-' ? ' ' : entry->key[i];
        }
        key[i] = 0;
-
-        func_fprintf(f, format, indentation * 4, "", key);
+        func_fprintf(f, "%*s%s:%c", indentation * 4, "", key,
+                     composite ? '\n' : ' ');
        dump_qobject(func_fprintf, f, indentation + 1, entry->value);
        if (!composite) {
            func_fprintf(f, "\n");
        }
+        g_free(key);
    }
 }

--- a/block/qcow.c
+++ b/block/qcow.c
@@ -22,8 +22,11 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
+#include "qemu/error-report.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include <zlib.h>
 #include "qapi/qmp/qerror.h"
@@ -120,11 +123,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }
    if (header.version != QCOW_VERSION) {
-        char version[64];
-        snprintf(version, sizeof(version), "QCOW version %" PRIu32,
-                 header.version);
-        error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                   bdrv_get_device_or_node_name(bs), "qcow", version);
+        error_setg(errp, "Unsupported qcow version %" PRIu32, header.version);
        ret = -ENOTSUP;
        goto fail;
    }
@@ -160,6 +159,14 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    }
    s->crypt_method_header = header.crypt_method;
    if (s->crypt_method_header) {
+        if (bdrv_uses_whitelist() &&
+            s->crypt_method_header == QCOW_CRYPT_AES) {
+            error_report("qcow built-in AES encryption is deprecated");
+            error_printf("Support for it will be removed in a future release.\n"
+                         "You can use 'qemu-img convert' to switch to an\n"
+                         "unencrypted qcow image, or a LUKS raw image.\n");
+        }
+
        bs->encrypted = 1;
    }
    s->cluster_bits = header.cluster_bits;
@@ -780,7 +787,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    int flags = 0;
    Error *local_err = NULL;
    int ret;
-    BlockDriverState *qcow_bs;
+    BlockBackend *qcow_blk;

    /* Read out options */
    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -796,15 +803,17 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
        goto cleanup;
    }

-    qcow_bs = NULL;
-    ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (ret < 0) {
+    qcow_blk = blk_new_open(filename, NULL, NULL,
+                            BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+    if (qcow_blk == NULL) {
        error_propagate(errp, local_err);
+        ret = -EIO;
        goto cleanup;
    }

-    ret = bdrv_truncate(qcow_bs, 0);
+    blk_set_allow_write_beyond_eof(qcow_blk, true);
+
+    ret = blk_truncate(qcow_blk, 0);
    if (ret < 0) {
        goto exit;
    }
@@ -844,13 +853,13 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    /* write all the data */
-    ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
+    ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header));
    if (ret != sizeof(header)) {
        goto exit;
    }

    if (backing_file) {
-        ret = bdrv_pwrite(qcow_bs, sizeof(header),
+        ret = blk_pwrite(qcow_blk, sizeof(header),
            backing_file, backing_filename_len);
        if (ret != backing_filename_len) {
            goto exit;
@@ -860,7 +869,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    tmp = g_malloc0(BDRV_SECTOR_SIZE);
    for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
        BDRV_SECTOR_SIZE); i++) {
-        ret = bdrv_pwrite(qcow_bs, header_size +
+        ret = blk_pwrite(qcow_blk, header_size +
            BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
        if (ret != BDRV_SECTOR_SIZE) {
            g_free(tmp);
@@ -871,7 +880,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    g_free(tmp);
    ret = 0;
 exit:
-    bdrv_unref(qcow_bs);
+    blk_unref(qcow_blk);
 cleanup:
    g_free(backing_file);
    return ret;
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include <zlib.h>

+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "block/qcow2.h"
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -23,6 +23,7 @@
 */

 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "block/qcow2.h"
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -23,10 +23,11 @@
 */

 #include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qapi/error.h"
 #include "block/block_int.h"
 #include "block/qcow2.h"
 #include "qemu/error-report.h"
+#include "qemu/cutils.h"

 void qcow2_free_snapshots(BlockDriverState *bs)
 {
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -22,8 +22,8 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include <zlib.h>
 #include "block/qcow2.h"
@@ -35,6 +35,7 @@
 #include "qapi-event.h"
 #include "trace.h"
 #include "qemu/option_int.h"
+#include "qemu/cutils.h"

 /*
  Differences with QCOW:
@@ -197,22 +198,8 @@ static void cleanup_unknown_header_ext(BlockDriverState *bs)
    }
 }

-static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs,
-    Error **errp, const char *fmt, ...)
-{
-    char msg[64];
-    va_list ap;
-
-    va_start(ap, fmt);
-    vsnprintf(msg, sizeof(msg), fmt, ap);
-    va_end(ap);
-
-    error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-               bdrv_get_device_or_node_name(bs), "qcow2", msg);
-}
-
-static void report_unsupported_feature(BlockDriverState *bs,
-    Error **errp, Qcow2Feature *table, uint64_t mask)
+static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
+                                       uint64_t mask)
 {
    char *features = g_strdup("");
    char *old;
@@ -237,7 +224,7 @@ static void report_unsupported_feature(BlockDriverState *bs,
        g_free(old);
    }

-    report_unsupported(bs, errp, "%s", features);
+    error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
    g_free(features);
 }

@@ -854,7 +841,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }
    if (header.version < 2 || header.version > 3) {
-        report_unsupported(bs, errp, "QCOW version %" PRIu32, header.version);
+        error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
        ret = -ENOTSUP;
        goto fail;
    }
@@ -934,7 +921,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
        void *feature_table = NULL;
        qcow2_read_extensions(bs, header.header_length, ext_end,
                              &feature_table, NULL);
-        report_unsupported_feature(bs, errp, feature_table,
+        report_unsupported_feature(errp, feature_table,
                                   s->incompatible_features &
                                   ~QCOW2_INCOMPAT_MASK);
        ret = -ENOTSUP;
@@ -978,6 +965,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    }
    s->crypt_method_header = header.crypt_method;
    if (s->crypt_method_header) {
+        if (bdrv_uses_whitelist() &&
+            s->crypt_method_header == QCOW_CRYPT_AES) {
+            error_report("qcow2 built-in AES encryption is deprecated");
+            error_printf("Support for it will be removed in a future release.\n"
+                         "You can use 'qemu-img convert' to switch to an\n"
+                         "unencrypted qcow2 image, or a LUKS raw image.\n");
+        }
+
        bs->encrypted = 1;
    }

@@ -1991,6 +1986,10 @@ static int qcow2_change_backing_file(BlockDriverState *bs,
 {
    BDRVQcow2State *s = bs->opaque;

+    if (backing_file && strlen(backing_file) > 1023) {
+        return -EINVAL;
+    }
+
    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
    pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");

@@ -2097,7 +2096,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
     * size for any qcow2 image.
     */
-    BlockDriverState* bs;
+    BlockBackend *blk;
    QCowHeader *header;
    uint64_t* refcount_table;
    Error *local_err = NULL;
@@ -2172,14 +2171,15 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        return ret;
    }

-    bs = NULL;
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    &local_err);
-    if (ret < 0) {
+    blk = blk_new_open(filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+    if (blk == NULL) {
        error_propagate(errp, local_err);
-        return ret;
+        return -EIO;
    }

+    blk_set_allow_write_beyond_eof(blk, true);
+
    /* Write the header */
    QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
    header = g_malloc0(cluster_size);
@@ -2207,7 +2207,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
            cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
    }

-    ret = bdrv_pwrite(bs, 0, header, cluster_size);
+    ret = blk_pwrite(blk, 0, header, cluster_size);
    g_free(header);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not write qcow2 header");
@@ -2217,7 +2217,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
    /* Write a refcount table with one refcount block */
    refcount_table = g_malloc0(2 * cluster_size);
    refcount_table[0] = cpu_to_be64(2 * cluster_size);
-    ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
+    ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size);
    g_free(refcount_table);

    if (ret < 0) {
@@ -2225,8 +2225,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        goto out;
    }

-    bdrv_unref(bs);
-    bs = NULL;
+    blk_unref(blk);
+    blk = NULL;

    /*
     * And now open the image and make it consistent first (i.e. increase the
@@ -2235,15 +2235,15 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     */
    options = qdict_new();
    qdict_put(options, "driver", qstring_from_str("qcow2"));
-    ret = bdrv_open(&bs, filename, NULL, options,
-                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
-                    &local_err);
-    if (ret < 0) {
+    blk = blk_new_open(filename, NULL, options,
+                       BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
+    if (blk == NULL) {
        error_propagate(errp, local_err);
+        ret = -EIO;
        goto out;
    }

-    ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
+    ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
                         "header and refcount table");
@@ -2255,14 +2255,14 @@ static int qcow2_create2(const char *filename, int64_t total_size,
    }

    /* Create a full header (including things like feature table) */
-    ret = qcow2_update_header(bs);
+    ret = qcow2_update_header(blk_bs(blk));
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not update qcow2 header");
        goto out;
    }

    /* Okay, now that we have a valid image, let's give it the right size */
-    ret = bdrv_truncate(bs, total_size);
+    ret = blk_truncate(blk, total_size);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not resize image");
        goto out;
@@ -2270,7 +2270,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,

    /* Want a backing file? There you go.*/
    if (backing_file) {
-        ret = bdrv_change_backing_file(bs, backing_file, backing_format);
+        ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
                             "with format '%s'", backing_file, backing_format);
@@ -2280,9 +2280,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,

    /* And if we're supposed to preallocate metadata, do that now */
    if (prealloc != PREALLOC_MODE_OFF) {
-        BDRVQcow2State *s = bs->opaque;
+        BDRVQcow2State *s = blk_bs(blk)->opaque;
        qemu_co_mutex_lock(&s->lock);
-        ret = preallocate(bs);
+        ret = preallocate(blk_bs(blk));
        qemu_co_mutex_unlock(&s->lock);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not preallocate metadata");
@@ -2290,24 +2290,24 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        }
    }

-    bdrv_unref(bs);
-    bs = NULL;
+    blk_unref(blk);
+    blk = NULL;

    /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
    options = qdict_new();
    qdict_put(options, "driver", qstring_from_str("qcow2"));
-    ret = bdrv_open(&bs, filename, NULL, options,
-                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
-                    &local_err);
-    if (local_err) {
+    blk = blk_new_open(filename, NULL, options,
+                       BDRV_O_RDWR | BDRV_O_NO_BACKING, &local_err);
+    if (blk == NULL) {
        error_propagate(errp, local_err);
+        ret = -EIO;
        goto out;
    }

    ret = 0;
 out:
-    if (bs) {
-        bdrv_unref(bs);
+    if (blk) {
+        blk_unref(blk);
    }
    return ret;
 }
@@ -2809,15 +2809,15 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)

    *spec_info = (ImageInfoSpecific){
        .type  = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
-        .u.qcow2 = g_new(ImageInfoSpecificQCow2, 1),
+        .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1),
    };
    if (s->qcow_version == 2) {
-        *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){
+        *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
            .compat             = g_strdup("0.10"),
            .refcount_bits      = s->refcount_bits,
        };
    } else if (s->qcow_version == 3) {
-        *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){
+        *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
            .compat             = g_strdup("1.1"),
            .lazy_refcounts     = s->compatible_features &
                                  QCOW2_COMPAT_LAZY_REFCOUNTS,
--- a/block/qed.c
+++ b/block/qed.c
@@ -13,11 +13,13 @@
 */

 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu/timer.h"
 #include "trace.h"
 #include "qed.h"
 #include "qapi/qmp/qerror.h"
 #include "migration/migration.h"
+#include "sysemu/block-backend.h"

 static const AIOCBInfo qed_aiocb_info = {
    .aiocb_size         = sizeof(QEDAIOCB),
@@ -345,7 +347,7 @@ static void qed_start_need_check_timer(BDRVQEDState *s)
     * migration.
     */
    timer_mod(s->need_check_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                   get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
+                   NANOSECONDS_PER_SECOND * QED_NEED_CHECK_TIMEOUT);
 }

 /* It's okay to call this multiple times or when no timer is started */
@@ -376,18 +378,6 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
    }
 }

-static void bdrv_qed_drain(BlockDriverState *bs)
-{
-    BDRVQEDState *s = bs->opaque;
-
-    /* Cancel timer and start doing I/O that were meant to happen as if it
-     * fired, that way we get bdrv_drain() taking care of the ongoing requests
-     * correctly. */
-    qed_cancel_need_check_timer(s);
-    qed_plug_allocating_write_reqs(s);
-    bdrv_aio_flush(s->bs, qed_clear_need_check, s);
-}
-
 static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
                         Error **errp)
 {
@@ -411,11 +401,8 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
    }
    if (s->header.features & ~QED_FEATURE_MASK) {
        /* image uses unsupported feature bits */
-        char buf[64];
-        snprintf(buf, sizeof(buf), "%" PRIx64,
-            s->header.features & ~QED_FEATURE_MASK);
-        error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                   bdrv_get_device_or_node_name(bs), "QED", buf);
+        error_setg(errp, "Unsupported QED features: %" PRIx64,
+                   s->header.features & ~QED_FEATURE_MASK);
        return -ENOTSUP;
    }
    if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
@@ -580,7 +567,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    size_t l1_size = header.cluster_size * header.table_size;
    Error *local_err = NULL;
    int ret = 0;
-    BlockDriverState *bs;
+    BlockBackend *blk;

    ret = bdrv_create_file(filename, opts, &local_err);
    if (ret < 0) {
@@ -588,17 +575,17 @@ static int qed_create(const char *filename, uint32_t cluster_size,
        return ret;
    }

-    bs = NULL;
-    ret = bdrv_open(&bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
-                    &local_err);
-    if (ret < 0) {
+    blk = blk_new_open(filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+    if (blk == NULL) {
        error_propagate(errp, local_err);
-        return ret;
+        return -EIO;
    }

+    blk_set_allow_write_beyond_eof(blk, true);
+
    /* File must start empty and grow, check truncate is supported */
-    ret = bdrv_truncate(bs, 0);
+    ret = blk_truncate(blk, 0);
    if (ret < 0) {
        goto out;
    }
@@ -614,18 +601,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    }

    qed_header_cpu_to_le(&header, &le_header);
-    ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
+    ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header));
    if (ret < 0) {
        goto out;
    }
-    ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
-                      header.backing_filename_size);
+    ret = blk_pwrite(blk, sizeof(le_header), backing_file,
+                     header.backing_filename_size);
    if (ret < 0) {
        goto out;
    }

    l1_table = g_malloc0(l1_size);
-    ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
+    ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size);
    if (ret < 0) {
        goto out;
    }
@@ -633,7 +620,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    ret = 0; /* success */
 out:
    g_free(l1_table);
-    bdrv_unref(bs);
+    blk_unref(blk);
    return ret;
 }

@@ -1692,7 +1679,6 @@ static BlockDriver bdrv_qed = {
    .bdrv_check               = bdrv_qed_check,
    .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
    .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
-    .bdrv_drain               = bdrv_qed_drain,
 };

 static void bdrv_qed_init(void)
--- a/block/qed.h
+++ b/block/qed.h
@@ -16,6 +16,7 @@
 #define BLOCK_QED_H

 #include "block/block_int.h"
+#include "qemu/cutils.h"

 /* The layout of a QED file is as follows:
 *
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -215,14 +215,16 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
    return acb;
 }

-static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
+                              int nb_sectors, char *node_name, int ret)
 {
    const char *msg = NULL;
    if (ret < 0) {
        msg = strerror(-ret);
    }
-    qapi_event_send_quorum_report_bad(!!msg, msg, node_name,
-                                      acb->sector_num, acb->nb_sectors, &error_abort);
+
+    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
+                                      sector_num, nb_sectors, &error_abort);
 }

 static void quorum_report_failure(QuorumAIOCB *acb)
@@ -284,6 +286,15 @@ static void quorum_aio_cb(void *opaque, int ret)
    BDRVQuorumState *s = acb->common.bs->opaque;
    bool rewrite = false;

+    if (ret == 0) {
+        acb->success_count++;
+    } else {
+        QuorumOpType type;
+        type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
+        quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
+                          sacb->aiocb->bs->node_name, ret);
+    }
+
    if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
        /* We try to read next child in FIFO order if we fail to read */
        if (ret < 0 && (acb->child_iter + 1) < s->num_children) {
@@ -302,11 +313,6 @@ static void quorum_aio_cb(void *opaque, int ret)

    sacb->ret = ret;
    acb->count++;
-    if (ret == 0) {
-        acb->success_count++;
-    } else {
-        quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret);
-    }
    assert(acb->count <= s->num_children);
    assert(acb->success_count <= s->num_children);
    if (acb->count < s->num_children) {
@@ -338,7 +344,9 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
            continue;
        }
        QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0);
+            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
+                              acb->nb_sectors,
+                              s->children[item->index]->bs->node_name, 0);
        }
    }
 }
@@ -648,8 +656,9 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
    }

    for (i = 0; i < s->num_children; i++) {
-        bdrv_aio_readv(s->children[i]->bs, acb->sector_num, &acb->qcrs[i].qiov,
-                       acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
+        acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num,
+                                            &acb->qcrs[i].qiov, acb->nb_sectors,
+                                            quorum_aio_cb, &acb->qcrs[i]);
    }

    return &acb->common;
@@ -664,9 +673,10 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
    qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
    qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
                     acb->qcrs[acb->child_iter].buf);
-    bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
-                   &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
-                   quorum_aio_cb, &acb->qcrs[acb->child_iter]);
+    acb->qcrs[acb->child_iter].aiocb =
+        bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
+                       &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
+                       quorum_aio_cb, &acb->qcrs[acb->child_iter]);

    return &acb->common;
 }
@@ -760,19 +770,30 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
    QuorumVoteValue result_value;
    int i;
    int result = 0;
+    int success_count = 0;

    QLIST_INIT(&error_votes.vote_list);
    error_votes.compare = quorum_64bits_compare;

    for (i = 0; i < s->num_children; i++) {
        result = bdrv_co_flush(s->children[i]->bs);
-        result_value.l = result;
-        quorum_count_vote(&error_votes, &result_value, i);
+        if (result) {
+            quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
+                              bdrv_nb_sectors(s->children[i]->bs),
+                              s->children[i]->bs->node_name, result);
+            result_value.l = result;
+            quorum_count_vote(&error_votes, &result_value, i);
+        } else {
+            success_count++;
+        }
    }

-    winner = quorum_get_vote_winner(&error_votes);
-    result = winner->value.l;
-
+    if (success_count >= s->threshold) {
+        result = 0;
+    } else {
+        winner = quorum_get_vote_winner(&error_votes);
+        result = winner->value.l;
+    }
    quorum_free_vote_list(&error_votes);

    return result;
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -15,6 +15,8 @@
 #ifndef QEMU_RAW_AIO_H
 #define QEMU_RAW_AIO_H

+#include "qemu/iov.h"
+
 /* AIO request types */
 #define QEMU_AIO_READ         0x0001
 #define QEMU_AIO_WRITE        0x0002
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -22,7 +22,8 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
 #include "qemu/error-report.h"
 #include "qemu/timer.h"
 #include "qemu/log.h"
@@ -44,6 +45,7 @@
 #include <IOKit/storage/IOMedia.h>
 #include <IOKit/storage/IOCDMedia.h>
 //#include <IOKit/storage/IOCDTypes.h>
+#include <IOKit/storage/IODVDMedia.h>
 #include <CoreFoundation/CoreFoundation.h>
 #endif

@@ -1965,33 +1967,47 @@ BlockDriver bdrv_file = {
 /* host device */

 #if defined(__APPLE__) && defined(__MACH__)
-static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
 static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
                                CFIndex maxPathSize, int flags);
-kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
+static char *FindEjectableOpticalMedia(io_iterator_t *mediaIterator)
 {
-    kern_return_t       kernResult;
+    kern_return_t kernResult = KERN_FAILURE;
    mach_port_t     masterPort;
    CFMutableDictionaryRef  classesToMatch;
+    const char *matching_array[] = {kIODVDMediaClass, kIOCDMediaClass};
+    char *mediaType = NULL;

    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
    if ( KERN_SUCCESS != kernResult ) {
        printf( "IOMasterPort returned %d\n", kernResult );
    }

-    classesToMatch = IOServiceMatching( kIOCDMediaClass );
-    if ( classesToMatch == NULL ) {
-        printf( "IOServiceMatching returned a NULL dictionary.\n" );
-    } else {
-    CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
-    }
-    kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
-    if ( KERN_SUCCESS != kernResult )
-    {
-        printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
-    }
+    int index;
+    for (index = 0; index < ARRAY_SIZE(matching_array); index++) {
+        classesToMatch = IOServiceMatching(matching_array[index]);
+        if (classesToMatch == NULL) {
+            error_report("IOServiceMatching returned NULL for %s",
+                         matching_array[index]);
+            continue;
+        }
+        CFDictionarySetValue(classesToMatch, CFSTR(kIOMediaEjectableKey),
+                             kCFBooleanTrue);
+        kernResult = IOServiceGetMatchingServices(masterPort, classesToMatch,
+                                                  mediaIterator);
+        if (kernResult != KERN_SUCCESS) {
+            error_report("Note: IOServiceGetMatchingServices returned %d",
+                         kernResult);
+            continue;
+        }

-    return kernResult;
+        /* If a match was found, leave the loop */
+        if (*mediaIterator != 0) {
+            DPRINTF("Matching using %s\n", matching_array[index]);
+            mediaType = g_strdup(matching_array[index]);
+            break;
+        }
+    }
+    return mediaType;
 }

 kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
@@ -2023,7 +2039,46 @@ kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
    return kernResult;
 }

-#endif
+/* Sets up a real cdrom for use in QEMU */
+static bool setup_cdrom(char *bsd_path, Error **errp)
+{
+    int index, num_of_test_partitions = 2, fd;
+    char test_partition[MAXPATHLEN];
+    bool partition_found = false;
+
+    /* look for a working partition */
+    for (index = 0; index < num_of_test_partitions; index++) {
+        snprintf(test_partition, sizeof(test_partition), "%ss%d", bsd_path,
+                 index);
+        fd = qemu_open(test_partition, O_RDONLY | O_BINARY | O_LARGEFILE);
+        if (fd >= 0) {
+            partition_found = true;
+            qemu_close(fd);
+            break;
+        }
+    }
+
+    /* if a working partition on the device was not found */
+    if (partition_found == false) {
+        error_setg(errp, "Failed to find a working partition on disc");
+    } else {
+        DPRINTF("Using %s as optical disc\n", test_partition);
+        pstrcpy(bsd_path, MAXPATHLEN, test_partition);
+    }
+    return partition_found;
+}
+
+/* Prints directions on mounting and unmounting a device */
+static void print_unmounting_directions(const char *file_name)
+{
+    error_report("If device %s is mounted on the desktop, unmount"
+                 " it first before using it in QEMU", file_name);
+    error_report("Command to unmount device: diskutil unmountDisk %s",
+                 file_name);
+    error_report("Command to mount device: diskutil mountDisk %s", file_name);
+}
+
+#endif /* defined(__APPLE__) && defined(__MACH__) */

 static int hdev_probe_device(const char *filename)
 {
@@ -2114,33 +2169,57 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,

 #if defined(__APPLE__) && defined(__MACH__)
    const char *filename = qdict_get_str(options, "filename");
+    char bsd_path[MAXPATHLEN] = "";
+    bool error_occurred = false;

-    if (strstart(filename, "/dev/cdrom", NULL)) {
-        kern_return_t kernResult;
-        io_iterator_t mediaIterator;
-        char bsdPath[ MAXPATHLEN ];
-        int fd;
+    /* If using a real cdrom */
+    if (strcmp(filename, "/dev/cdrom") == 0) {
+        char *mediaType = NULL;
+        kern_return_t ret_val;
+        io_iterator_t mediaIterator = 0;

-        kernResult = FindEjectableCDMedia( &mediaIterator );
-        kernResult = GetBSDPath(mediaIterator, bsdPath, sizeof(bsdPath),
-                                flags);
-        if ( bsdPath[ 0 ] != '\0' ) {
-            strcat(bsdPath,"s0");
-            /* some CDs don't have a partition 0 */
-            fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
-            if (fd < 0) {
-                bsdPath[strlen(bsdPath)-1] = '1';
-            } else {
-                qemu_close(fd);
-            }
-            filename = bsdPath;
-            qdict_put(options, "filename", qstring_from_str(filename));
+        mediaType = FindEjectableOpticalMedia(&mediaIterator);
+        if (mediaType == NULL) {
+            error_setg(errp, "Please make sure your CD/DVD is in the optical"
+                       " drive");
+            error_occurred = true;
+            goto hdev_open_Mac_error;
        }

-        if ( mediaIterator )
-            IOObjectRelease( mediaIterator );
+        ret_val = GetBSDPath(mediaIterator, bsd_path, sizeof(bsd_path), flags);
+        if (ret_val != KERN_SUCCESS) {
+            error_setg(errp, "Could not get BSD path for optical drive");
+            error_occurred = true;
+            goto hdev_open_Mac_error;
+        }
+
+        /* If a real optical drive was not found */
+        if (bsd_path[0] == '\0') {
+            error_setg(errp, "Failed to obtain bsd path for optical drive");
+            error_occurred = true;
+            goto hdev_open_Mac_error;
+        }
+
+        /* If using a cdrom disc and finding a partition on the disc failed */
+        if (strncmp(mediaType, kIOCDMediaClass, 9) == 0 &&
+            setup_cdrom(bsd_path, errp) == false) {
+            print_unmounting_directions(bsd_path);
+            error_occurred = true;
+            goto hdev_open_Mac_error;
+        }
+
+        qdict_put(options, "filename", qstring_from_str(bsd_path));
+
+hdev_open_Mac_error:
+        g_free(mediaType);
+        if (mediaIterator) {
+            IOObjectRelease(mediaIterator);
+        }
+        if (error_occurred) {
+            return -ENOENT;
+        }
    }
-#endif
+#endif /* defined(__APPLE__) && defined(__MACH__) */

    s->type = FTYPE_FILE;

@@ -2149,6 +2228,15 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
        if (local_err) {
            error_propagate(errp, local_err);
        }
+#if defined(__APPLE__) && defined(__MACH__)
+        if (*bsd_path) {
+            filename = bsd_path;
+        }
+        /* if a physical device experienced an error while being opened */
+        if (strncmp(filename, "/dev/", 5) == 0) {
+            print_unmounting_directions(filename);
+        }
+#endif /* defined(__APPLE__) && defined(__MACH__) */
        return ret;
    }

--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -22,7 +22,8 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
 #include "qemu/timer.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -28,6 +28,7 @@

 #include "qemu/osdep.h"
 #include "block/block_int.h"
+#include "qapi/error.h"
 #include "qemu/option.h"

 static QemuOptsList raw_create_opts = {
@@ -56,8 +57,9 @@ static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
    return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
 }

-static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn
+raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                    QEMUIOVector *qiov, int flags)
 {
    void *buf = NULL;
    BlockDriver *drv;
@@ -103,7 +105,8 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
    }

    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-    ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
+    ret = bdrv_co_do_pwritev(bs->file->bs, sector_num * BDRV_SECTOR_SIZE,
+                             nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);

 fail:
    if (qiov == &local_qiov) {
@@ -113,6 +116,13 @@ fail:
    return ret;
 }

+static int coroutine_fn
+raw_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+              QEMUIOVector *qiov)
+{
+    return raw_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0);
+}
+
 static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
                                            int64_t sector_num,
                                            int nb_sectors, int *pnum,
@@ -247,6 +257,8 @@ BlockDriver bdrv_raw = {
    .bdrv_create          = &raw_create,
    .bdrv_co_readv        = &raw_co_readv,
    .bdrv_co_writev       = &raw_co_writev,
+    .bdrv_co_writev_flags = &raw_co_writev_flags,
+    .supported_write_flags = BDRV_REQ_FUA,
    .bdrv_co_write_zeroes = &raw_co_write_zeroes,
    .bdrv_co_discard      = &raw_co_discard,
    .bdrv_co_get_block_status = &raw_co_get_block_status,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -13,9 +13,11 @@

 #include "qemu/osdep.h"

-#include "qemu-common.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
+#include "crypto/secret.h"
+#include "qemu/cutils.h"

 #include <rbd/librbd.h>

@@ -228,6 +230,27 @@ static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
    return NULL;
 }

+
+static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
+                             Error **errp)
+{
+    if (secretid == 0) {
+        return 0;
+    }
+
+    gchar *secret = qcrypto_secret_lookup_as_base64(secretid,
+                                                    errp);
+    if (!secret) {
+        return -1;
+    }
+
+    rados_conf_set(cluster, "key", secret);
+    g_free(secret);
+
+    return 0;
+}
+
+
 static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
                             bool only_read_conf_file,
                             Error **errp)
@@ -299,10 +322,13 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
    char conf[RBD_MAX_CONF_SIZE];
    char clientname_buf[RBD_MAX_CONF_SIZE];
    char *clientname;
+    const char *secretid;
    rados_t cluster;
    rados_ioctx_t io_ctx;
    int ret;

+    secretid = qemu_opt_get(opts, "password-secret");
+
    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
                           snap_buf, sizeof(snap_buf),
                           name, sizeof(name),
@@ -350,6 +376,11 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
        return -EIO;
    }

+    if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
+        rados_shutdown(cluster);
+        return -EIO;
+    }
+
    if (rados_connect(cluster) < 0) {
        error_setg(errp, "error connecting");
        rados_shutdown(cluster);
@@ -423,6 +454,11 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "Specification of the rbd image",
        },
+        {
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret providing the password",
+        },
        { /* end of list */ }
    },
 };
@@ -436,6 +472,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    char conf[RBD_MAX_CONF_SIZE];
    char clientname_buf[RBD_MAX_CONF_SIZE];
    char *clientname;
+    const char *secretid;
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename;
@@ -450,6 +487,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    }

    filename = qemu_opt_get(opts, "filename");
+    secretid = qemu_opt_get(opts, "password-secret");

    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
                           snap_buf, sizeof(snap_buf),
@@ -488,6 +526,11 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

+    if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) {
+        r = -EIO;
+        goto failed_shutdown;
+    }
+
    /*
     * Fallback to more conservative semantics if setting cache
     * options fails. Ignore errors from setting rbd_cache because the
@@ -919,6 +962,11 @@ static QemuOptsList qemu_rbd_create_opts = {
            .type = QEMU_OPT_SIZE,
            .help = "RBD object size"
        },
+        {
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret providing the password",
+        },
        { /* end of list */ }
    }
 };
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -13,12 +13,14 @@
 */

 #include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qapi/error.h"
 #include "qemu/uri.h"
 #include "qemu/error-report.h"
 #include "qemu/sockets.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/bitops.h"
+#include "qemu/cutils.h"

 #define SD_PROTO_VER 0x01

@@ -284,6 +286,12 @@ static inline bool is_snapshot(struct SheepdogInode *inode)
    return !!inode->snap_ctime;
 }

+static inline size_t count_data_objs(const struct SheepdogInode *inode)
+{
+    return DIV_ROUND_UP(inode->vdi_size,
+                        (1UL << inode->block_size_shift));
+}
+
 #undef DPRINTF
 #ifdef DEBUG_SDOG
 #define DPRINTF(fmt, args...)                                       \
@@ -609,14 +617,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
    ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
    if (ret != sizeof(*hdr)) {
        error_report("failed to send a req, %s", strerror(errno));
-        ret = -socket_error();
-        return ret;
+        return -errno;
    }

    ret = qemu_co_send(sockfd, data, *wlen);
    if (ret != *wlen) {
-        ret = -socket_error();
        error_report("failed to send a req, %s", strerror(errno));
+        return -errno;
    }

    return ret;
@@ -1631,7 +1638,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,

 static int sd_prealloc(const char *filename, Error **errp)
 {
-    BlockDriverState *bs = NULL;
+    BlockBackend *blk = NULL;
    BDRVSheepdogState *base = NULL;
    unsigned long buf_size;
    uint32_t idx, max_idx;
@@ -1640,19 +1647,22 @@ static int sd_prealloc(const char *filename, Error **errp)
    void *buf = NULL;
    int ret;

-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    errp);
-    if (ret < 0) {
+    blk = blk_new_open(filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+    if (blk == NULL) {
+        ret = -EIO;
        goto out_with_err_set;
    }

-    vdi_size = bdrv_getlength(bs);
+    blk_set_allow_write_beyond_eof(blk, true);
+
+    vdi_size = blk_getlength(blk);
    if (vdi_size < 0) {
        ret = vdi_size;
        goto out;
    }

-    base = bs->opaque;
+    base = blk_bs(blk)->opaque;
    object_size = (UINT32_C(1) << base->inode.block_size_shift);
    buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
    buf = g_malloc0(buf_size);
@@ -1664,23 +1674,24 @@ static int sd_prealloc(const char *filename, Error **errp)
         * The created image can be a cloned image, so we need to read
         * a data from the source image.
         */
-        ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
+        ret = blk_pread(blk, idx * buf_size, buf, buf_size);
        if (ret < 0) {
            goto out;
        }
-        ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
+        ret = blk_pwrite(blk, idx * buf_size, buf, buf_size);
        if (ret < 0) {
            goto out;
        }
    }

+    ret = 0;
 out:
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Can't pre-allocate");
    }
 out_with_err_set:
-    if (bs) {
-        bdrv_unref(bs);
+    if (blk) {
+        blk_unref(blk);
    }
    g_free(buf);

@@ -1820,7 +1831,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
    }

    if (backing_file) {
-        BlockDriverState *bs;
+        BlockBackend *blk;
        BDRVSheepdogState *base;
        BlockDriver *drv;

@@ -1832,22 +1843,23 @@ static int sd_create(const char *filename, QemuOpts *opts,
            goto out;
        }

-        bs = NULL;
-        ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, errp);
-        if (ret < 0) {
+        blk = blk_new_open(backing_file, NULL, NULL,
+                           BDRV_O_PROTOCOL, errp);
+        if (blk == NULL) {
+            ret = -EIO;
            goto out;
        }

-        base = bs->opaque;
+        base = blk_bs(blk)->opaque;

        if (!is_snapshot(&base->inode)) {
            error_setg(errp, "cannot clone from a non snapshot vdi");
-            bdrv_unref(bs);
+            blk_unref(blk);
            ret = -EINVAL;
            goto out;
        }
        s->inode.vdi_id = base->inode.vdi_id;
-        bdrv_unref(bs);
+        blk_unref(blk);
    }

    s->aio_context = qemu_get_aio_context();
@@ -2478,13 +2490,131 @@ out:
    return ret;
 }

+#define NR_BATCHED_DISCARD 128
+
+static bool remove_objects(BDRVSheepdogState *s)
+{
+    int fd, i = 0, nr_objs = 0;
+    Error *local_err = NULL;
+    int ret = 0;
+    bool result = true;
+    SheepdogInode *inode = &s->inode;
+
+    fd = connect_to_sdog(s, &local_err);
+    if (fd < 0) {
+        error_report_err(local_err);
+        return false;
+    }
+
+    nr_objs = count_data_objs(inode);
+    while (i < nr_objs) {
+        int start_idx, nr_filled_idx;
+
+        while (i < nr_objs && !inode->data_vdi_id[i]) {
+            i++;
+        }
+        start_idx = i;
+
+        nr_filled_idx = 0;
+        while (i < nr_objs && nr_filled_idx < NR_BATCHED_DISCARD) {
+            if (inode->data_vdi_id[i]) {
+                inode->data_vdi_id[i] = 0;
+                nr_filled_idx++;
+            }
+
+            i++;
+        }
+
+        ret = write_object(fd, s->aio_context,
+                           (char *)&inode->data_vdi_id[start_idx],
+                           vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies,
+                           (i - start_idx) * sizeof(uint32_t),
+                           offsetof(struct SheepdogInode,
+                                    data_vdi_id[start_idx]),
+                           false, s->cache_flags);
+        if (ret < 0) {
+            error_report("failed to discard snapshot inode.");
+            result = false;
+            goto out;
+        }
+    }
+
+out:
+    closesocket(fd);
+    return result;
+}
+
 static int sd_snapshot_delete(BlockDriverState *bs,
                              const char *snapshot_id,
                              const char *name,
                              Error **errp)
 {
-    /* FIXME: Delete specified snapshot id.  */
-    return 0;
+    unsigned long snap_id = 0;
+    char snap_tag[SD_MAX_VDI_TAG_LEN];
+    Error *local_err = NULL;
+    int fd, ret;
+    char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
+    BDRVSheepdogState *s = bs->opaque;
+    unsigned int wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN, rlen = 0;
+    uint32_t vid;
+    SheepdogVdiReq hdr = {
+        .opcode = SD_OP_DEL_VDI,
+        .data_length = wlen,
+        .flags = SD_FLAG_CMD_WRITE,
+    };
+    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
+
+    if (!remove_objects(s)) {
+        return -1;
+    }
+
+    memset(buf, 0, sizeof(buf));
+    memset(snap_tag, 0, sizeof(snap_tag));
+    pstrcpy(buf, SD_MAX_VDI_LEN, s->name);
+    ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id);
+    if (ret || snap_id > UINT32_MAX) {
+        error_setg(errp, "Invalid snapshot ID: %s",
+                         snapshot_id ? snapshot_id : "<null>");
+        return -EINVAL;
+    }
+
+    if (snap_id) {
+        hdr.snapid = (uint32_t) snap_id;
+    } else {
+        pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id);
+        pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag);
+    }
+
+    ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true,
+                        &local_err);
+    if (ret) {
+        return ret;
+    }
+
+    fd = connect_to_sdog(s, &local_err);
+    if (fd < 0) {
+        error_report_err(local_err);
+        return -1;
+    }
+
+    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+                 buf, &wlen, &rlen);
+    closesocket(fd);
+    if (ret) {
+        return ret;
+    }
+
+    switch (rsp->result) {
+    case SD_RES_NO_VDI:
+        error_report("%s was already deleted", s->name);
+    case SD_RES_SUCCESS:
+        break;
+    default:
+        error_report("%s, %s", sd_strerror(rsp->result), s->name);
+        return -1;
+    }
+
+    return ret;
 }

 static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include "block/snapshot.h"
 #include "block/block_int.h"
+#include "qapi/error.h"
 #include "qapi/qmp/qerror.h"

 QemuOptsList internal_snapshot_opts = {
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -28,6 +28,7 @@
 #include <libssh2_sftp.h>

 #include "block/block_int.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/sockets.h"
 #include "qemu/uri.h"
--- a/block/stream.c
+++ b/block/stream.c
@@ -15,6 +15,7 @@
 #include "trace.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
@@ -89,21 +90,21 @@ static void coroutine_fn stream_run(void *opaque)
    StreamCompleteData *data;
    BlockDriverState *bs = s->common.bs;
    BlockDriverState *base = s->base;
-    int64_t sector_num, end;
+    int64_t sector_num = 0;
+    int64_t end = -1;
    int error = 0;
    int ret = 0;
    int n = 0;
    void *buf;

    if (!bs->backing) {
-        block_job_completed(&s->common, 0);
-        return;
+        goto out;
    }

    s->common.len = bdrv_getlength(bs);
    if (s->common.len < 0) {
-        block_job_completed(&s->common, s->common.len);
-        return;
+        ret = s->common.len;
+        goto out;
    }

    end = s->common.len >> BDRV_SECTOR_BITS;
@@ -190,6 +191,7 @@ wait:

    qemu_vfree(buf);

+out:
    /* Modify backing chain and close BDSes in main loop */
    data = g_malloc(sizeof(*data));
    data->ret = ret;
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -50,11 +50,13 @@
 */

 #include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qapi/error.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
 #include "qemu/coroutine.h"
+#include "qemu/cutils.h"

 #if defined(CONFIG_UUID)
 #include <uuid/uuid.h>
@@ -733,7 +735,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    size_t bmap_size;
    int64_t offset = 0;
    Error *local_err = NULL;
-    BlockDriverState *bs = NULL;
+    BlockBackend *blk = NULL;
    uint32_t *bmap = NULL;

    logout("\n");
@@ -766,13 +768,17 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
        error_propagate(errp, local_err);
        goto exit;
    }
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    &local_err);
-    if (ret < 0) {
+
+    blk = blk_new_open(filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+    if (blk == NULL) {
        error_propagate(errp, local_err);
+        ret = -EIO;
        goto exit;
    }

+    blk_set_allow_write_beyond_eof(blk, true);
+
    /* We need enough blocks to store the given disk size,
       so always round up. */
    blocks = DIV_ROUND_UP(bytes, block_size);
@@ -802,7 +808,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    vdi_header_print(&header);
 #endif
    vdi_header_to_le(&header);
-    ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header));
+    ret = blk_pwrite(blk, offset, &header, sizeof(header));
    if (ret < 0) {
        error_setg(errp, "Error writing header to %s", filename);
        goto exit;
@@ -823,7 +829,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
                bmap[i] = VDI_UNALLOCATED;
            }
        }
-        ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size);
+        ret = blk_pwrite(blk, offset, bmap, bmap_size);
        if (ret < 0) {
            error_setg(errp, "Error writing bmap to %s", filename);
            goto exit;
@@ -832,7 +838,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    if (image_type == VDI_TYPE_STATIC) {
-        ret = bdrv_truncate(bs, offset + blocks * block_size);
+        ret = blk_truncate(blk, offset + blocks * block_size);
        if (ret < 0) {
            error_setg(errp, "Failed to statically allocate %s", filename);
            goto exit;
@@ -840,7 +846,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    }

 exit:
-    bdrv_unref(bs);
+    blk_unref(blk);
    g_free(bmap);
    return ret;
 }
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -18,6 +18,7 @@
 *
 */
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "qemu/error-report.h"
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -16,8 +16,10 @@
 */

 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/crc32c.h"
 #include "block/vhdx.h"
@@ -264,10 +266,10 @@ static void vhdx_region_unregister_all(BDRVVHDXState *s)

 static void vhdx_set_shift_bits(BDRVVHDXState *s)
 {
-    s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
-    s->sectors_per_block_bits =   31 - clz32(s->sectors_per_block);
-    s->chunk_ratio_bits =         63 - clz64(s->chunk_ratio);
-    s->block_size_bits =          31 - clz32(s->block_size);
+    s->logical_sector_size_bits = ctz32(s->logical_sector_size);
+    s->sectors_per_block_bits =   ctz32(s->sectors_per_block);
+    s->chunk_ratio_bits =         ctz64(s->chunk_ratio);
+    s->block_size_bits =          ctz32(s->block_size);
 }

 /*
@@ -857,14 +859,8 @@ static void vhdx_calc_bat_entries(BDRVVHDXState *s)
 {
    uint32_t data_blocks_cnt, bitmap_blocks_cnt;

-    data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
-    if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
-        data_blocks_cnt++;
-    }
-    bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits;
-    if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) {
-        bitmap_blocks_cnt++;
-    }
+    data_blocks_cnt = DIV_ROUND_UP(s->virtual_disk_size, s->block_size);
+    bitmap_blocks_cnt = DIV_ROUND_UP(data_blocks_cnt, s->chunk_ratio);

    if (s->parent_entries) {
        s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
@@ -1778,7 +1774,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)

    gunichar2 *creator = NULL;
    glong creator_items;
-    BlockDriverState *bs;
+    BlockBackend *blk;
    char *type = NULL;
    VHDXImageType image_type;
    Error *local_err = NULL;
@@ -1843,14 +1839,16 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
        goto exit;
    }

-    bs = NULL;
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    &local_err);
-    if (ret < 0) {
+    blk = blk_new_open(filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+    if (blk == NULL) {
        error_propagate(errp, local_err);
+        ret = -EIO;
        goto exit;
    }

+    blk_set_allow_write_beyond_eof(blk, true);
+
    /* Create (A) */

    /* The creator field is optional, but may be useful for
@@ -1858,13 +1856,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
                              &creator_items, NULL);
    signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
-    ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+    ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
    if (ret < 0) {
        goto delete_and_exit;
    }
    if (creator) {
-        ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature),
-                          creator, creator_items * sizeof(gunichar2));
+        ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
+                         creator, creator_items * sizeof(gunichar2));
        if (ret < 0) {
            goto delete_and_exit;
        }
@@ -1872,13 +1870,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)


    /* Creates (B),(C) */
-    ret = vhdx_create_new_headers(bs, image_size, log_size);
+    ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size);
    if (ret < 0) {
        goto delete_and_exit;
    }

    /* Creates (D),(E),(G) explicitly. (F) created as by-product */
-    ret = vhdx_create_new_region_table(bs, image_size, block_size, 512,
+    ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512,
                                       log_size, use_zero_blocks, image_type,
                                       &metadata_offset);
    if (ret < 0) {
@@ -1886,7 +1884,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    /* Creates (H) */
-    ret = vhdx_create_new_metadata(bs, image_size, block_size, 512,
+    ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512,
                                   metadata_offset, image_type);
    if (ret < 0) {
        goto delete_and_exit;
@@ -1894,7 +1892,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)


 delete_and_exit:
-    bdrv_unref(bs);
+    blk_unref(blk);
 exit:
    g_free(type);
    g_free(creator);
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -24,12 +24,14 @@
 */

 #include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qapi/error.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
+#include "qemu/cutils.h"
 #include <zlib.h>
 #include <glib.h>

@@ -242,15 +244,17 @@ static void vmdk_free_last_extent(BlockDriverState *bs)

 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
 {
-    char desc[DESC_SIZE];
+    char *desc;
    uint32_t cid = 0xffffffff;
    const char *p_name, *cid_str;
    size_t cid_str_size;
    BDRVVmdkState *s = bs->opaque;
    int ret;

+    desc = g_malloc0(DESC_SIZE);
    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
    if (ret < 0) {
+        g_free(desc);
        return 0;
    }

@@ -269,41 +273,45 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
        sscanf(p_name, "%" SCNx32, &cid);
    }

+    g_free(desc);
    return cid;
 }

 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
 {
-    char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
+    char *desc, *tmp_desc;
    char *p_name, *tmp_str;
    BDRVVmdkState *s = bs->opaque;
-    int ret;
+    int ret = 0;

+    desc = g_malloc0(DESC_SIZE);
+    tmp_desc = g_malloc0(DESC_SIZE);
    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
    if (ret < 0) {
-        return ret;
+        goto out;
    }

    desc[DESC_SIZE - 1] = '\0';
    tmp_str = strstr(desc, "parentCID");
    if (tmp_str == NULL) {
-        return -EINVAL;
+        ret = -EINVAL;
+        goto out;
    }

-    pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
+    pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
    p_name = strstr(desc, "CID");
    if (p_name != NULL) {
        p_name += sizeof("CID");
-        snprintf(p_name, sizeof(desc) - (p_name - desc), "%" PRIx32 "\n", cid);
-        pstrcat(desc, sizeof(desc), tmp_desc);
+        snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
+        pstrcat(desc, DESC_SIZE, tmp_desc);
    }

    ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
-    if (ret < 0) {
-        return ret;
-    }

-    return 0;
+out:
+    g_free(desc);
+    g_free(tmp_desc);
+    return ret;
 }

 static int vmdk_is_cid_valid(BlockDriverState *bs)
@@ -337,15 +345,16 @@ static int vmdk_reopen_prepare(BDRVReopenState *state,
 static int vmdk_parent_open(BlockDriverState *bs)
 {
    char *p_name;
-    char desc[DESC_SIZE + 1];
+    char *desc;
    BDRVVmdkState *s = bs->opaque;
    int ret;

-    desc[DESC_SIZE] = '\0';
+    desc = g_malloc0(DESC_SIZE + 1);
    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
    if (ret < 0) {
-        return ret;
+        goto out;
    }
+    ret = 0;

    p_name = strstr(desc, "parentFileNameHint");
    if (p_name != NULL) {
@@ -354,16 +363,20 @@ static int vmdk_parent_open(BlockDriverState *bs)
        p_name += sizeof("parentFileNameHint") + 1;
        end_name = strchr(p_name, '\"');
        if (end_name == NULL) {
-            return -EINVAL;
+            ret = -EINVAL;
+            goto out;
        }
        if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
-            return -EINVAL;
+            ret = -EINVAL;
+            goto out;
        }

        pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
    }

-    return 0;
+out:
+    g_free(desc);
+    return ret;
 }

 /* Create and append extent to the extent array. Return the added VmdkExtent
@@ -649,11 +662,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    compressed =
        le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
    if (le32_to_cpu(header.version) > 3) {
-        char buf[64];
-        snprintf(buf, sizeof(buf), "VMDK version %" PRId32,
-                 le32_to_cpu(header.version));
-        error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                   bdrv_get_device_or_node_name(bs), "vmdk", buf);
+        error_setg(errp, "Unsupported VMDK version %" PRIu32,
+                   le32_to_cpu(header.version));
        return -ENOTSUP;
    } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) &&
               !compressed) {
@@ -1639,7 +1649,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
                              QemuOpts *opts, Error **errp)
 {
    int ret, i;
-    BlockDriverState *bs = NULL;
+    BlockBackend *blk = NULL;
    VMDK4Header header;
    Error *local_err = NULL;
    uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
@@ -1652,16 +1662,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
        goto exit;
    }

-    assert(bs == NULL);
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    &local_err);
-    if (ret < 0) {
+    blk = blk_new_open(filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+    if (blk == NULL) {
        error_propagate(errp, local_err);
+        ret = -EIO;
        goto exit;
    }

+    blk_set_allow_write_beyond_eof(blk, true);
+
    if (flat) {
-        ret = bdrv_truncate(bs, filesize);
+        ret = blk_truncate(blk, filesize);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not truncate file");
        }
@@ -1716,18 +1728,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    header.check_bytes[3] = 0xa;

    /* write all the data */
-    ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic));
+    ret = blk_pwrite(blk, 0, &magic, sizeof(magic));
    if (ret < 0) {
        error_setg(errp, QERR_IO_ERROR);
        goto exit;
    }
-    ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header));
+    ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header));
    if (ret < 0) {
        error_setg(errp, QERR_IO_ERROR);
        goto exit;
    }

-    ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9);
+    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not truncate file");
        goto exit;
@@ -1740,8 +1752,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
         i < gt_count; i++, tmp += gt_size) {
        gd_buf[i] = cpu_to_le32(tmp);
    }
-    ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
-                      gd_buf, gd_buf_size);
+    ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
+                     gd_buf, gd_buf_size);
    if (ret < 0) {
        error_setg(errp, QERR_IO_ERROR);
        goto exit;
@@ -1752,8 +1764,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
         i < gt_count; i++, tmp += gt_size) {
        gd_buf[i] = cpu_to_le32(tmp);
    }
-    ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
-                      gd_buf, gd_buf_size);
+    ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
+                     gd_buf, gd_buf_size);
    if (ret < 0) {
        error_setg(errp, QERR_IO_ERROR);
        goto exit;
@@ -1761,8 +1773,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,

    ret = 0;
 exit:
-    if (bs) {
-        bdrv_unref(bs);
+    if (blk) {
+        blk_unref(blk);
    }
    g_free(gd_buf);
    return ret;
@@ -1811,7 +1823,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
 static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
 {
    int idx = 0;
-    BlockDriverState *new_bs = NULL;
+    BlockBackend *new_blk = NULL;
    Error *local_err = NULL;
    char *desc = NULL;
    int64_t total_size = 0, filesize;
@@ -1922,7 +1934,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
        goto exit;
    }
    if (backing_file) {
-        BlockDriverState *bs = NULL;
+        BlockBackend *blk;
        char *full_backing = g_new0(char, PATH_MAX);
        bdrv_get_full_backing_filename_from_filename(filename, backing_file,
                                                     full_backing, PATH_MAX,
@@ -1933,18 +1945,21 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
            ret = -ENOENT;
            goto exit;
        }
-        ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, errp);
+
+        blk = blk_new_open(full_backing, NULL, NULL,
+                           BDRV_O_NO_BACKING, errp);
        g_free(full_backing);
-        if (ret != 0) {
+        if (blk == NULL) {
+            ret = -EIO;
            goto exit;
        }
-        if (strcmp(bs->drv->format_name, "vmdk")) {
-            bdrv_unref(bs);
+        if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) {
+            blk_unref(blk);
            ret = -EINVAL;
            goto exit;
        }
-        parent_cid = vmdk_read_cid(bs, 0);
-        bdrv_unref(bs);
+        parent_cid = vmdk_read_cid(blk_bs(blk), 0);
+        blk_unref(blk);
        snprintf(parent_desc_line, BUF_SIZE,
                "parentFileNameHint=\"%s\"", backing_file);
    }
@@ -2002,14 +2017,18 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
            goto exit;
        }
    }
-    assert(new_bs == NULL);
-    ret = bdrv_open(&new_bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (ret < 0) {
+
+    new_blk = blk_new_open(filename, NULL, NULL,
+                           BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+    if (new_blk == NULL) {
        error_propagate(errp, local_err);
+        ret = -EIO;
        goto exit;
    }
-    ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len);
+
+    blk_set_allow_write_beyond_eof(new_blk, true);
+
+    ret = blk_pwrite(new_blk, desc_offset, desc, desc_len);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not write description");
        goto exit;
@@ -2017,14 +2036,14 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    /* bdrv_pwrite write padding zeros to align to sector, we don't need that
     * for description file */
    if (desc_offset == 0) {
-        ret = bdrv_truncate(new_bs, desc_len);
+        ret = blk_truncate(new_blk, desc_len);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not truncate file");
        }
    }
 exit:
-    if (new_bs) {
-        bdrv_unref(new_bs);
+    if (new_blk) {
+        blk_unref(new_blk);
    }
    g_free(adapter_type);
    g_free(backing_file);
@@ -2183,18 +2202,18 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)

    *spec_info = (ImageInfoSpecific){
        .type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
-        {
-            .vmdk = g_new0(ImageInfoSpecificVmdk, 1),
+        .u = {
+            .vmdk.data = g_new0(ImageInfoSpecificVmdk, 1),
        },
    };

-    *spec_info->u.vmdk = (ImageInfoSpecificVmdk) {
+    *spec_info->u.vmdk.data = (ImageInfoSpecificVmdk) {
        .create_type = g_strdup(s->create_type),
        .cid = s->cid,
        .parent_cid = s->parent_cid,
    };

-    next = &spec_info->u.vmdk->extents;
+    next = &spec_info->u.vmdk.data->extents;
    for (i = 0; i < s->num_extents; i++) {
        *next = g_new0(ImageInfoList, 1);
        (*next)->value = vmdk_get_extent_info(&s->extents[i]);
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -23,8 +23,10 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
 #if defined(CONFIG_UUID)
@@ -43,28 +45,34 @@ enum vhd_type {
    VHD_DIFFERENCING    = 4,
 };

-// Seconds since Jan 1, 2000 0:00:00 (UTC)
+/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
 #define VHD_TIMESTAMP_BASE 946684800

-#define VHD_MAX_SECTORS       (65535LL * 255 * 255)
-#define VHD_MAX_GEOMETRY      (65535LL *  16 * 255)
+#define VHD_CHS_MAX_C   65535LL
+#define VHD_CHS_MAX_H   16
+#define VHD_CHS_MAX_S   255

-// always big-endian
+#define VHD_MAX_SECTORS       0xff000000    /* 2040 GiB max image size */
+#define VHD_MAX_GEOMETRY      (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
+
+#define VPC_OPT_FORCE_SIZE "force_size"
+
+/* always big-endian */
 typedef struct vhd_footer {
-    char        creator[8]; // "conectix"
+    char        creator[8]; /* "conectix" */
    uint32_t    features;
    uint32_t    version;

-    // Offset of next header structure, 0xFFFFFFFF if none
+    /* Offset of next header structure, 0xFFFFFFFF if none */
    uint64_t    data_offset;

-    // Seconds since Jan 1, 2000 0:00:00 (UTC)
+    /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
    uint32_t    timestamp;

-    char        creator_app[4]; // "vpc "
+    char        creator_app[4]; /*  e.g., "vpc " */
    uint16_t    major;
    uint16_t    minor;
-    char        creator_os[4]; // "Wi2k"
+    char        creator_os[4]; /* "Wi2k" */

    uint64_t    orig_size;
    uint64_t    current_size;
@@ -75,29 +83,29 @@ typedef struct vhd_footer {

    uint32_t    type;

-    // Checksum of the Hard Disk Footer ("one's complement of the sum of all
-    // the bytes in the footer without the checksum field")
+    /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
+       the bytes in the footer without the checksum field") */
    uint32_t    checksum;

-    // UUID used to identify a parent hard disk (backing file)
+    /* UUID used to identify a parent hard disk (backing file) */
    uint8_t     uuid[16];

    uint8_t     in_saved_state;
 } QEMU_PACKED VHDFooter;

 typedef struct vhd_dyndisk_header {
-    char        magic[8]; // "cxsparse"
+    char        magic[8]; /* "cxsparse" */

-    // Offset of next header structure, 0xFFFFFFFF if none
+    /* Offset of next header structure, 0xFFFFFFFF if none */
    uint64_t    data_offset;

-    // Offset of the Block Allocation Table (BAT)
+    /* Offset of the Block Allocation Table (BAT) */
    uint64_t    table_offset;

    uint32_t    version;
-    uint32_t    max_table_entries; // 32bit/entry
+    uint32_t    max_table_entries; /* 32bit/entry */

-    // 2 MB by default, must be a power of two
+    /* 2 MB by default, must be a power of two */
    uint32_t    block_size;

    uint32_t    checksum;
@@ -105,7 +113,7 @@ typedef struct vhd_dyndisk_header {
    uint32_t    parent_timestamp;
    uint32_t    reserved;

-    // Backing file name (in UTF-16)
+    /* Backing file name (in UTF-16) */
    uint8_t     parent_name[512];

    struct {
@@ -128,6 +136,8 @@ typedef struct BDRVVPCState {

    uint32_t block_size;
    uint32_t bitmap_size;
+    bool force_use_chs;
+    bool force_use_sz;

 #ifdef CACHE
    uint8_t *pageentry_u8;
@@ -140,6 +150,22 @@ typedef struct BDRVVPCState {
    Error *migration_blocker;
 } BDRVVPCState;

+#define VPC_OPT_SIZE_CALC "force_size_calc"
+static QemuOptsList vpc_runtime_opts = {
+    .name = "vpc-runtime-opts",
+    .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
+    .desc = {
+        {
+            .name = VPC_OPT_SIZE_CALC,
+            .type = QEMU_OPT_STRING,
+            .help = "Force disk size calculation to use either CHS geometry, "
+                    "or use the disk current_size specified in the VHD footer. "
+                    "{chs, current_size}"
+        },
+        { /* end of list */ }
+    }
+};
+
 static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 {
    uint32_t res = 0;
@@ -159,6 +185,25 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
    return 0;
 }

+static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
+                              Error **errp)
+{
+    BDRVVPCState *s = bs->opaque;
+    const char *size_calc;
+
+    size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
+
+    if (!size_calc) {
+       /* no override, use autodetect only */
+    } else if (!strcmp(size_calc, "current_size")) {
+        s->force_use_sz = true;
+    } else if (!strcmp(size_calc, "chs")) {
+        s->force_use_chs = true;
+    } else {
+        error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
+    }
+}
+
 static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
@@ -166,6 +211,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    int i;
    VHDFooter *footer;
    VHDDynDiskHeader *dyndisk_header;
+    QemuOpts *opts = NULL;
+    Error *local_err = NULL;
+    bool use_chs;
    uint8_t buf[HEADER_SIZE];
    uint32_t checksum;
    uint64_t computed_size;
@@ -173,8 +221,24 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    int disk_type = VHD_DYNAMIC;
    int ret;

+    opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    vpc_parse_options(bs, opts, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
    ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
    if (ret < 0) {
+        error_setg(errp, "Unable to read VHD header");
        goto fail;
    }

@@ -183,9 +247,11 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
        int64_t offset = bdrv_getlength(bs->file->bs);
        if (offset < 0) {
            ret = offset;
+            error_setg(errp, "Invalid file size");
            goto fail;
        } else if (offset < HEADER_SIZE) {
            ret = -EINVAL;
+            error_setg(errp, "File too small for a VHD header");
            goto fail;
        }

@@ -212,22 +278,50 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    /* Write 'checksum' back to footer, or else will leave it with zero. */
    footer->checksum = cpu_to_be32(checksum);

-    // The visible size of a image in Virtual PC depends on the geometry
-    // rather than on the size stored in the footer (the size in the footer
-    // is too large usually)
+    /* The visible size of a image in Virtual PC depends on the geometry
+       rather than on the size stored in the footer (the size in the footer
+       is too large usually) */
    bs->total_sectors = (int64_t)
        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;

-    /* Images that have exactly the maximum geometry are probably bigger and
-     * would be truncated if we adhered to the geometry for them. Rely on
-     * footer->current_size for them. */
-    if (bs->total_sectors == VHD_MAX_GEOMETRY) {
+    /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
+     * VHD image sizes differently.  VPC will rely on CHS geometry,
+     * while Hyper-V and disk2vhd use the size specified in the footer.
+     *
+     * We use a couple of approaches to try and determine the correct method:
+     * look at the Creator App field, and look for images that have CHS
+     * geometry that is the maximum value.
+     *
+     * If the CHS geometry is the maximum CHS geometry, then we assume that
+     * the size is the footer->current_size to avoid truncation.  Otherwise,
+     * we follow the table based on footer->creator_app:
+     *
+     *  Known creator apps:
+     *      'vpc '  :  CHS              Virtual PC (uses disk geometry)
+     *      'qemu'  :  CHS              QEMU (uses disk geometry)
+     *      'qem2'  :  current_size     QEMU (uses current_size)
+     *      'win '  :  current_size     Hyper-V
+     *      'd2v '  :  current_size     Disk2vhd
+     *      'tap\0' :  current_size     XenServer
+     *      'CTXS'  :  current_size     XenConverter
+     *
+     *  The user can override the table values via drive options, however
+     *  even with an override we will still use current_size for images
+     *  that have CHS geometry of the maximum size.
+     */
+    use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
+               !!strncmp(footer->creator_app, "qem2", 4) &&
+               !!strncmp(footer->creator_app, "d2v ", 4) &&
+               !!strncmp(footer->creator_app, "CTXS", 4) &&
+               !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
+
+    if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
        bs->total_sectors = be64_to_cpu(footer->current_size) /
-                            BDRV_SECTOR_SIZE;
+                                        BDRV_SECTOR_SIZE;
    }

-    /* Allow a maximum disk size of approximately 2 TB */
-    if (bs->total_sectors >= VHD_MAX_SECTORS) {
+    /* Allow a maximum disk size of 2040 GiB */
+    if (bs->total_sectors > VHD_MAX_SECTORS) {
        ret = -EFBIG;
        goto fail;
    }
@@ -236,12 +330,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
        ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
                         HEADER_SIZE);
        if (ret < 0) {
+            error_setg(errp, "Error reading dynamic VHD header");
            goto fail;
        }

        dyndisk_header = (VHDDynDiskHeader *) buf;

        if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
+            error_setg(errp, "Invalid header magic");
            ret = -EINVAL;
            goto fail;
        }
@@ -257,16 +353,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);

        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
-            ret = -EINVAL;
-            goto fail;
-        }
-        if (s->max_table_entries > (VHD_MAX_SECTORS * 512) / s->block_size) {
+            error_setg(errp, "Too many blocks");
            ret = -EINVAL;
            goto fail;
        }

        computed_size = (uint64_t) s->max_table_entries * s->block_size;
        if (computed_size < bs->total_sectors * 512) {
+            error_setg(errp, "Page table too small");
            ret = -EINVAL;
            goto fail;
        }
@@ -283,6 +377,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,

        s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
        if (s->pagetable == NULL) {
+            error_setg(errp, "Unable to allocate memory for page table");
            ret = -ENOMEM;
            goto fail;
        }
@@ -292,6 +387,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
        ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
                         pagetable_size);
        if (ret < 0) {
+            error_setg(errp, "Error reading pagetable");
            goto fail;
        }

@@ -370,16 +466,16 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
    pageentry_index = (offset % s->block_size) / 512;

    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
-        return -1; // not allocated
+        return -1; /* not allocated */

    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
    block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);

-    // We must ensure that we don't write to any sectors which are marked as
-    // unused in the bitmap. We get away with setting all bits in the block
-    // bitmap each time we write to a new block. This might cause Virtual PC to
-    // miss sparse read optimization, but it's not a problem in terms of
-    // correctness.
+    /* We must ensure that we don't write to any sectors which are marked as
+       unused in the bitmap. We get away with setting all bits in the block
+       bitmap each time we write to a new block. This might cause Virtual PC to
+       miss sparse read optimization, but it's not a problem in terms of
+       correctness. */
    if (write && (s->last_bitmap_offset != bitmap_offset)) {
        uint8_t bitmap[s->bitmap_size];

@@ -425,18 +521,18 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
    int ret;
    uint8_t bitmap[s->bitmap_size];

-    // Check if sector_num is valid
+    /* Check if sector_num is valid */
    if ((sector_num < 0) || (sector_num > bs->total_sectors))
        return -1;

-    // Write entry into in-memory BAT
+    /* Write entry into in-memory BAT */
    index = (sector_num * 512) / s->block_size;
    if (s->pagetable[index] != 0xFFFFFFFF)
        return -1;

    s->pagetable[index] = s->free_data_block_offset / 512;

-    // Initialize the block's bitmap
+    /* Initialize the block's bitmap */
    memset(bitmap, 0xff, s->bitmap_size);
    ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
        s->bitmap_size);
@@ -444,13 +540,13 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
        return ret;
    }

-    // Write new footer (the old one will be overwritten)
+    /* Write new footer (the old one will be overwritten) */
    s->free_data_block_offset += s->block_size + s->bitmap_size;
    ret = rewrite_footer(bs);
    if (ret < 0)
        goto fail;

-    // Write BAT entry to disk
+    /* Write BAT entry to disk */
    bat_offset = s->bat_offset + (4 * index);
    bat_value = cpu_to_be32(s->pagetable[index]);
    ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
@@ -631,7 +727,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
 * Note that the geometry doesn't always exactly match total_sectors but
 * may round it down.
 *
- * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override
+ * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
 * and instead allow up to 255 heads.
 */
@@ -673,7 +769,7 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
    return 0;
 }

-static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
+static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
                               int64_t total_sectors)
 {
    VHDDynDiskHeader *dyndisk_header =
@@ -683,34 +779,34 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
    int ret;
    int64_t offset = 0;

-    // Write the footer (twice: at the beginning and at the end)
+    /* Write the footer (twice: at the beginning and at the end) */
    block_size = 0x200000;
    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);

-    ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
-    if (ret) {
-        goto fail;
-    }
-
-    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
-    ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
    if (ret < 0) {
        goto fail;
    }

-    // Write the initial BAT
+    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
+    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    /* Write the initial BAT */
    offset = 3 * 512;

    memset(buf, 0xFF, 512);
    for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
-        ret = bdrv_pwrite_sync(bs, offset, buf, 512);
+        ret = blk_pwrite(blk, offset, buf, 512);
        if (ret < 0) {
            goto fail;
        }
        offset += 512;
    }

-    // Prepare the Dynamic Disk Header
+    /* Prepare the Dynamic Disk Header */
    memset(buf, 0, 1024);

    memcpy(dyndisk_header->magic, "cxsparse", 8);
@@ -727,10 +823,10 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,

    dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));

-    // Write the header
+    /* Write the header */
    offset = 512;

-    ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
+    ret = blk_pwrite(blk, offset, buf, 1024);
    if (ret < 0) {
        goto fail;
    }
@@ -739,7 +835,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
    return ret;
 }

-static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
+static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
                             int64_t total_size)
 {
    int ret;
@@ -747,12 +843,12 @@ static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
    /* Add footer to total size */
    total_size += HEADER_SIZE;

-    ret = bdrv_truncate(bs, total_size);
+    ret = blk_truncate(blk, total_size);
    if (ret < 0) {
        return ret;
    }

-    ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
+    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
    if (ret < 0) {
        return ret;
    }
@@ -773,8 +869,9 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
    int64_t total_size;
    int disk_type;
    int ret = -EIO;
+    bool force_size;
    Error *local_err = NULL;
-    BlockDriverState *bs = NULL;
+    BlockBackend *blk = NULL;

    /* Read out options */
    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -786,6 +883,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
        } else if (!strcmp(disk_type_param, "fixed")) {
            disk_type = VHD_FIXED;
        } else {
+            error_setg(errp, "Invalid disk type, %s", disk_type_param);
            ret = -EINVAL;
            goto out;
        }
@@ -793,36 +891,50 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
        disk_type = VHD_DYNAMIC;
    }

+    force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
+
    ret = bdrv_create_file(filename, opts, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        goto out;
    }
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    &local_err);
-    if (ret < 0) {
+
+    blk = blk_new_open(filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+    if (blk == NULL) {
        error_propagate(errp, local_err);
+        ret = -EIO;
        goto out;
    }

+    blk_set_allow_write_beyond_eof(blk, true);
+
    /*
     * Calculate matching total_size and geometry. Increase the number of
     * sectors requested until we get enough (or fail). This ensures that
     * qemu-img convert doesn't truncate images, but rather rounds up.
     *
-     * If the image size can't be represented by a spec conform CHS geometry,
+     * If the image size can't be represented by a spec conformant CHS geometry,
     * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
     * the image size from the VHD footer to calculate total_sectors.
     */
-    total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
-    for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
-        calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
+    if (force_size) {
+        /* This will force the use of total_size for sector count, below */
+        cyls         = VHD_CHS_MAX_C;
+        heads        = VHD_CHS_MAX_H;
+        secs_per_cyl = VHD_CHS_MAX_S;
+    } else {
+        total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
+        for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
+            calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
+        }
    }

    if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
        total_sectors = total_size / BDRV_SECTOR_SIZE;
-        /* Allow a maximum disk size of approximately 2 TB */
+        /* Allow a maximum disk size of 2040 GiB */
        if (total_sectors > VHD_MAX_SECTORS) {
+            error_setg(errp, "Disk size is too large, max size is 2040 GiB");
            ret = -EFBIG;
            goto out;
        }
@@ -835,8 +947,11 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
    memset(buf, 0, 1024);

    memcpy(footer->creator, "conectix", 8);
-    /* TODO Check if "qemu" creator_app is ok for VPC */
-    memcpy(footer->creator_app, "qemu", 4);
+    if (force_size) {
+        memcpy(footer->creator_app, "qem2", 4);
+    } else {
+        memcpy(footer->creator_app, "qemu", 4);
+    }
    memcpy(footer->creator_os, "Wi2k", 4);

    footer->features = cpu_to_be32(0x02);
@@ -866,13 +981,16 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
    footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));

    if (disk_type == VHD_DYNAMIC) {
-        ret = create_dynamic_disk(bs, buf, total_sectors);
+        ret = create_dynamic_disk(blk, buf, total_sectors);
    } else {
-        ret = create_fixed_disk(bs, buf, total_size);
+        ret = create_fixed_disk(blk, buf, total_size);
+    }
+    if (ret < 0) {
+        error_setg(errp, "Unable to create or write VHD header");
    }

 out:
-    bdrv_unref(bs);
+    blk_unref(blk);
    g_free(disk_type_param);
    return ret;
 }
@@ -917,6 +1035,13 @@ static QemuOptsList vpc_create_opts = {
                "Type of virtual hard disk format. Supported formats are "
                "{dynamic (default) | fixed} "
        },
+        {
+            .name = VPC_OPT_FORCE_SIZE,
+            .type = QEMU_OPT_BOOL,
+            .help = "Force disk size calculation to use the actual size "
+                    "specified, rather than using the nearest CHS-based "
+                    "calculation"
+        },
        { /* end of list */ }
    }
 };
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -24,13 +24,14 @@
 */
 #include "qemu/osdep.h"
 #include <dirent.h>
-#include "qemu-common.h"
+#include "qapi/error.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
 #include "qapi/qmp/qint.h"
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"

 #ifndef S_IWGRP
 #define S_IWGRP 0
@@ -1108,6 +1109,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }
        memcpy(s->volume_label, label, label_length);
+    } else {
+        memcpy(s->volume_label, "QEMU VVFAT", 10);
    }

    if (floppy) {
@@ -2282,12 +2285,17 @@ DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", mapp
 		factor * (old_cluster_count - new_cluster_count));

    for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) {
+        direntry_t *first_direntry;
 	void* direntry = array_get(&(s->directory), current_dir_index);
 	int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry,
 		s->sectors_per_cluster);
 	if (ret)
 	    return ret;
-	assert(!strncmp(s->directory.pointer, "QEMU", 4));
+
+        /* The first directory entry on the filesystem is the volume name */
+        first_direntry = (direntry_t*) s->directory.pointer;
+        assert(!memcmp(first_direntry->name, s->volume_label, 11));
+
 	current_dir_index += factor;
    }

@@ -2956,8 +2964,7 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
    options = qdict_new();
    qdict_put(options, "driver", qstring_from_str("qcow"));
    ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, options,
-                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
-                    errp);
+                    BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp);
    if (ret < 0) {
        goto err;
    }
--- a/blockdev.c
+++ b/blockdev.c
@@ -50,6 +50,8 @@
 #include "qmp-commands.h"
 #include "trace.h"
 #include "sysemu/arch_init.h"
+#include "qemu/cutils.h"
+#include "qemu/help_option.h"

 static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
    QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
@@ -147,6 +149,7 @@ void blockdev_auto_del(BlockBackend *blk)
    DriveInfo *dinfo = blk_legacy_dinfo(blk);

    if (dinfo && dinfo->auto_del) {
+        monitor_remove_blk(blk);
        blk_unref(blk);
    }
 }
@@ -466,6 +469,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
    int bdrv_flags = 0;
    int on_read_error, on_write_error;
    bool account_invalid, account_failed;
+    bool writethrough;
    BlockBackend *blk;
    BlockDriverState *bs;
    ThrottleConfig cfg;
@@ -504,6 +508,8 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
    account_invalid = qemu_opt_get_bool(opts, "stats-account-invalid", true);
    account_failed = qemu_opt_get_bool(opts, "stats-account-failed", true);

+    writethrough = !qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true);
+
    qdict_extract_subqdict(bs_opts, &interval_dict, "stats-intervals.");
    qdict_array_split(interval_dict, &interval_list);

@@ -561,7 +567,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
    if ((!file || !*file) && !qdict_size(bs_opts)) {
        BlockBackendRootState *blk_rs;

-        blk = blk_new(qemu_opts_id(opts), errp);
+        blk = blk_new(errp);
        if (!blk) {
            goto early_err;
        }
@@ -589,23 +595,15 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
        /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
         * with other callers) rather than what we want as the real defaults.
         * Apply the defaults here instead. */
-        qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_WB, "on");
        qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
        qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
-
-        if (snapshot) {
-            /* always use cache=unsafe with snapshot */
-            qdict_put(bs_opts, BDRV_OPT_CACHE_WB, qstring_from_str("on"));
-            qdict_put(bs_opts, BDRV_OPT_CACHE_DIRECT, qstring_from_str("off"));
-            qdict_put(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, qstring_from_str("on"));
-        }
+        assert((bdrv_flags & BDRV_O_CACHE_MASK) == 0);

        if (runstate_check(RUN_STATE_INMIGRATE)) {
            bdrv_flags |= BDRV_O_INACTIVE;
        }

-        blk = blk_new_open(qemu_opts_id(opts), file, NULL, bs_opts, bdrv_flags,
-                           errp);
+        blk = blk_new_open(file, NULL, bs_opts, bdrv_flags, errp);
        if (!blk) {
            goto err_no_bs_opts;
        }
@@ -635,8 +633,15 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
        }
    }

+    blk_set_enable_write_cache(blk, !writethrough);
    blk_set_on_error(blk, on_read_error, on_write_error);

+    if (!monitor_add_blk(blk, qemu_opts_id(opts), errp)) {
+        blk_unref(blk);
+        blk = NULL;
+        goto err_no_bs_opts;
+    }
+
 err_no_bs_opts:
    qemu_opts_del(opts);
    QDECREF(interval_dict);
@@ -682,6 +687,12 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
        goto fail;
    }

+    /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
+     * with other callers) rather than what we want as the real defaults.
+     * Apply the defaults here instead. */
+    qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
+    qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
+
    if (runstate_check(RUN_STATE_INMIGRATE)) {
        bdrv_flags |= BDRV_O_INACTIVE;
    }
@@ -717,6 +728,13 @@ void blockdev_close_all_bdrv_states(void)
    }
 }

+/* Iterates over the list of monitor-owned BlockDriverStates */
+BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs)
+{
+    return bs ? QTAILQ_NEXT(bs, monitor_list)
+              : QTAILQ_FIRST(&monitor_bdrv_states);
+}
+
 static void qemu_opt_rename(QemuOpts *opts, const char *from, const char *to,
                            Error **errp)
 {
@@ -879,8 +897,9 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    value = qemu_opt_get(all_opts, "cache");
    if (value) {
        int flags = 0;
+        bool writethrough;

-        if (bdrv_parse_cache_flags(value, &flags) != 0) {
+        if (bdrv_parse_cache_mode(value, &flags, &writethrough) != 0) {
            error_report("invalid cache option");
            return NULL;
        }
@@ -888,7 +907,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
        /* Specific options take precedence */
        if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_WB)) {
            qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_WB,
-                              !!(flags & BDRV_O_CACHE_WB), &error_abort);
+                              !writethrough, &error_abort);
        }
        if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_DIRECT)) {
            qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_DIRECT,
@@ -1173,7 +1192,7 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
    int ret;

    if (!strcmp(device, "all")) {
-        ret = bdrv_commit_all();
+        ret = blk_commit_all();
    } else {
        BlockDriverState *bs;
        AioContext *aio_context;
@@ -1202,15 +1221,11 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
    }
 }

-static void blockdev_do_action(TransactionActionKind type, void *data,
-                               Error **errp)
+static void blockdev_do_action(TransactionAction *action, Error **errp)
 {
-    TransactionAction action;
    TransactionActionList list;

-    action.type = type;
-    action.u.data = data;
-    list.value = &action;
+    list.value = action;
    list.next = NULL;
    qmp_transaction(&list, false, NULL, errp);
 }
@@ -1236,8 +1251,11 @@ void qmp_blockdev_snapshot_sync(bool has_device, const char *device,
        .has_mode = has_mode,
        .mode = mode,
    };
-    blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC,
-                       &snapshot, errp);
+    TransactionAction action = {
+        .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC,
+        .u.blockdev_snapshot_sync.data = &snapshot,
+    };
+    blockdev_do_action(&action, errp);
 }

 void qmp_blockdev_snapshot(const char *node, const char *overlay,
@@ -1247,9 +1265,11 @@ void qmp_blockdev_snapshot(const char *node, const char *overlay,
        .node = (char *) node,
        .overlay = (char *) overlay
    };
-
-    blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT,
-                       &snapshot_data, errp);
+    TransactionAction action = {
+        .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT,
+        .u.blockdev_snapshot.data = &snapshot_data,
+    };
+    blockdev_do_action(&action, errp);
 }

 void qmp_blockdev_snapshot_internal_sync(const char *device,
@@ -1260,9 +1280,11 @@ void qmp_blockdev_snapshot_internal_sync(const char *device,
        .device = (char *) device,
        .name = (char *) name
    };
-
-    blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC,
-                       &snapshot, errp);
+    TransactionAction action = {
+        .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC,
+        .u.blockdev_snapshot_internal_sync.data = &snapshot,
+    };
+    blockdev_do_action(&action, errp);
 }

 SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
@@ -1499,7 +1521,7 @@ static void internal_snapshot_prepare(BlkActionState *common,

    g_assert(common->action->type ==
             TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC);
-    internal = common->action->u.blockdev_snapshot_internal_sync;
+    internal = common->action->u.blockdev_snapshot_internal_sync.data;
    state = DO_UPCAST(InternalSnapshotState, common, common);

    /* 1. parse input */
@@ -1649,7 +1671,7 @@ static void external_snapshot_prepare(BlkActionState *common,
    switch (action->type) {
    case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT:
        {
-            BlockdevSnapshot *s = action->u.blockdev_snapshot;
+            BlockdevSnapshot *s = action->u.blockdev_snapshot.data;
            device = s->node;
            node_name = s->node;
            new_image_file = NULL;
@@ -1658,7 +1680,7 @@ static void external_snapshot_prepare(BlkActionState *common,
        break;
    case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
        {
-            BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync;
+            BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data;
            device = s->has_device ? s->device : NULL;
            node_name = s->has_node_name ? s->node_name : NULL;
            new_image_file = s->snapshot_file;
@@ -1707,7 +1729,7 @@ static void external_snapshot_prepare(BlkActionState *common,
    }

    if (action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC) {
-        BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync;
+        BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data;
        const char *format = s->has_format ? s->format : "qcow2";
        enum NewImageMode mode;
        const char *snapshot_node_name =
@@ -1725,14 +1747,20 @@ static void external_snapshot_prepare(BlkActionState *common,
        }

        flags = state->old_bs->open_flags;
+        flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);

        /* create new image w/backing file */
        mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
        if (mode != NEW_IMAGE_MODE_EXISTING) {
+            int64_t size = bdrv_getlength(state->old_bs);
+            if (size < 0) {
+                error_setg_errno(errp, -size, "bdrv_getlength failed");
+                return;
+            }
            bdrv_img_create(new_image_file, format,
                            state->old_bs->filename,
                            state->old_bs->drv->format_name,
-                            NULL, -1, flags, &local_err, false);
+                            NULL, size, flags, &local_err, false);
            if (local_err) {
                error_propagate(errp, local_err);
                return;
@@ -1790,8 +1818,10 @@ static void external_snapshot_commit(BlkActionState *common)
    /* We don't need (or want) to use the transactional
     * bdrv_reopen_multiple() across all the entries at once, because we
     * don't want to abort all of them if one of them fails the reopen */
-    bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
-                NULL);
+    if (!state->old_bs->copy_on_read) {
+        bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
+                    NULL);
+    }
 }

 static void external_snapshot_abort(BlkActionState *common)
@@ -1840,7 +1870,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
    Error *local_err = NULL;

    assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
-    backup = common->action->u.drive_backup;
+    backup = common->action->u.drive_backup.data;

    blk = blk_by_name(backup->device);
    if (!blk) {
@@ -1922,7 +1952,7 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
    Error *local_err = NULL;

    assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
-    backup = common->action->u.blockdev_backup;
+    backup = common->action->u.blockdev_backup.data;

    blk = blk_by_name(backup->device);
    if (!blk) {
@@ -2008,7 +2038,7 @@ static void block_dirty_bitmap_add_prepare(BlkActionState *common,
        return;
    }

-    action = common->action->u.block_dirty_bitmap_add;
+    action = common->action->u.block_dirty_bitmap_add.data;
    /* AIO context taken and released within qmp_block_dirty_bitmap_add */
    qmp_block_dirty_bitmap_add(action->node, action->name,
                               action->has_granularity, action->granularity,
@@ -2027,7 +2057,7 @@ static void block_dirty_bitmap_add_abort(BlkActionState *common)
    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
                                             common, common);

-    action = common->action->u.block_dirty_bitmap_add;
+    action = common->action->u.block_dirty_bitmap_add.data;
    /* Should not be able to fail: IF the bitmap was added via .prepare(),
     * then the node reference and bitmap name must have been valid.
     */
@@ -2047,7 +2077,7 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
        return;
    }

-    action = common->action->u.block_dirty_bitmap_clear;
+    action = common->action->u.block_dirty_bitmap_clear.data;
    state->bitmap = block_dirty_bitmap_lookup(action->node,
                                              action->name,
                                              &state->bs,
@@ -2405,11 +2435,6 @@ void qmp_x_blockdev_remove_medium(const char *device, Error **errp)
        goto out;
    }

-    /* This follows the convention established by bdrv_make_anon() */
-    if (bs->device_list.tqe_prev) {
-        bdrv_device_remove(bs);
-    }
-
    blk_remove_bs(blk);

    if (!blk_dev_has_tray(blk)) {
@@ -2457,8 +2482,6 @@ static void qmp_blockdev_insert_anon_medium(const char *device,

    blk_insert_bs(blk, bs);

-    QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
-
    if (!blk_dev_has_tray(blk)) {
        /* For tray-less devices, blockdev-close-tray is a no-op (or may not be
         * called at all); therefore, the medium needs to be pushed into the
@@ -2638,6 +2661,13 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
        goto out;
    }

+    /* The BlockBackend must be the only parent */
+    assert(QLIST_FIRST(&bs->parents));
+    if (QLIST_NEXT(QLIST_FIRST(&bs->parents), next_parent)) {
+        error_setg(errp, "Cannot throttle device with multiple parents");
+        goto out;
+    }
+
    throttle_config_init(&cfg);
    cfg.buckets[THROTTLE_BPS_TOTAL].avg = bps;
    cfg.buckets[THROTTLE_BPS_READ].avg  = bps_rd;
@@ -2816,6 +2846,15 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
    AioContext *aio_context;
    Error *local_err = NULL;

+    bs = bdrv_find_node(id);
+    if (bs) {
+        qmp_x_blockdev_del(false, NULL, true, id, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+        }
+        return;
+    }
+
    blk = blk_by_name(id);
    if (!blk) {
        error_report("Device '%s' not found", id);
@@ -2842,13 +2881,13 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
        blk_remove_bs(blk);
    }

-    /* if we have a device attached to this BlockDriverState
-     * then we need to make the drive anonymous until the device
-     * can be removed.  If this is a drive with no device backing
-     * then we can just get rid of the block driver state right here.
+    /* Make the BlockBackend and the attached BlockDriverState anonymous */
+    monitor_remove_blk(blk);
+
+    /* If this BlockBackend has a device attached to it, its refcount will be
+     * decremented when the device is removed; otherwise we have to do so here.
     */
    if (blk_get_attached_dev(blk)) {
-        blk_hide_on_behalf_of_hmp_drive_del(blk);
        /* Further I/O must not pause the guest */
        blk_set_on_error(blk, BLOCKDEV_ON_ERROR_REPORT,
                         BLOCKDEV_ON_ERROR_REPORT);
@@ -3867,6 +3906,37 @@ out:
    aio_context_release(aio_context);
 }

+void hmp_drive_add_node(Monitor *mon, const char *optstr)
+{
+    QemuOpts *opts;
+    QDict *qdict;
+    Error *local_err = NULL;
+
+    opts = qemu_opts_parse_noisily(&qemu_drive_opts, optstr, false);
+    if (!opts) {
+        return;
+    }
+
+    qdict = qemu_opts_to_qdict(opts, NULL);
+
+    if (!qdict_get_try_str(qdict, "node-name")) {
+        QDECREF(qdict);
+        error_report("'node-name' needs to be specified");
+        goto out;
+    }
+
+    BlockDriverState *bs = bds_tree_init(qdict, &local_err);
+    if (!bs) {
+        error_report_err(local_err);
+        goto out;
+    }
+
+    QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
+
+out:
+    qemu_opts_del(opts);
+}
+
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
    QmpOutputVisitor *ov = qmp_output_visitor_new();
@@ -3928,6 +3998,7 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)

    if (bs && bdrv_key_required(bs)) {
        if (blk) {
+            monitor_remove_blk(blk);
            blk_unref(blk);
        } else {
            QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
@@ -3957,11 +4028,17 @@ void qmp_x_blockdev_del(bool has_id, const char *id,
    }

    if (has_id) {
+        /* blk_by_name() never returns a BB that is not owned by the monitor */
        blk = blk_by_name(id);
        if (!blk) {
            error_setg(errp, "Cannot find block backend %s", id);
            return;
        }
+        if (blk_legacy_dinfo(blk)) {
+            error_setg(errp, "Deleting block backend added with drive-add"
+                       " is not supported");
+            return;
+        }
        if (blk_get_refcnt(blk) > 1) {
            error_setg(errp, "Block backend %s is in use", id);
            return;
@@ -4004,6 +4081,7 @@ void qmp_x_blockdev_del(bool has_id, const char *id,
    }

    if (blk) {
+        monitor_remove_blk(blk);
        blk_unref(blk);
    } else {
        QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
@@ -4053,6 +4131,10 @@ QemuOptsList qemu_common_drive_opts = {
            .name = "aio",
            .type = QEMU_OPT_STRING,
            .help = "host AIO implementation (threads, native)",
+        },{
+            .name = BDRV_OPT_CACHE_WB,
+            .type = QEMU_OPT_BOOL,
+            .help = "Enable writeback mode",
        },{
            .name = "format",
            .type = QEMU_OPT_STRING,
@@ -4174,7 +4256,7 @@ QemuOptsList qemu_common_drive_opts = {

 static QemuOptsList qemu_root_bds_opts = {
    .name = "root-bds",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_root_bds_opts.head),
    .desc = {
        {
            .name = "discard",
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -23,6 +23,7 @@
 */

 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "sysemu/sysemu.h"
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -5,6 +5,7 @@

 #include "qemu.h"
 #include "disas/disas.h"
+#include "qemu/path.h"

 #ifdef _ARCH_PPC64
 #undef ARCH_DLINFO
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -21,7 +21,8 @@
 #include <sys/mman.h>

 #include "qemu.h"
-#include "qemu-common.h"
+#include "qemu/path.h"
+#include "qemu/help_option.h"
 /* For tb_lock */
 #include "cpu.h"
 #include "tcg.h"
--- a/bsd-user/syscall.c
+++ b/bsd-user/syscall.c
@@ -17,6 +17,8 @@
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/path.h"
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <sys/param.h>
--- a/bsd-user/uaccess.c
+++ b/bsd-user/uaccess.c
@@ -1,5 +1,6 @@
 /* User memory access */
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"

 #include "qemu.h"

@@ -50,7 +51,7 @@ abi_long target_strlen(abi_ulong guest_addr1)
        ptr = lock_user(VERIFY_READ, guest_addr, max_len, 1);
        if (!ptr)
            return -TARGET_EFAULT;
-        len = qemu_strnlen((char *)ptr, max_len);
+        len = qemu_strnlen((const char *)ptr, max_len);
        unlock_user(ptr, guest_addr, 0);
        guest_addr += len;
        /* we don't allow wrapping or integer overflow */
--- a/107
+++ b/107
@@ -280,6 +280,7 @@ libusb=""
 usb_redir=""
 opengl=""
 opengl_dmabuf="no"
+avx2_opt="no"
 zlib="yes"
 lzo=""
 snappy=""
@@ -297,6 +298,7 @@ coroutine=""
 coroutine_pool=""
 seccomp=""
 glusterfs=""
+glusterfs_xlator_opt="no"
 glusterfs_discard="no"
 glusterfs_zerofill="no"
 archipelago="no"
@@ -305,8 +307,11 @@ gtkabi=""
 gtk_gl="no"
 gnutls=""
 gnutls_hash=""
+gnutls_rnd=""
 nettle=""
+nettle_kdf="no"
 gcrypt=""
+gcrypt_kdf="no"
 vte=""
 virglrenderer=""
 tpm="yes"
@@ -1773,6 +1778,21 @@ EOF
 fi

 ##########################################
+# avx2 optimization requirement check
+
+cat > $TMPC << EOF
+static void bar(void) {}
+static void *bar_ifunc(void) {return (void*) bar;}
+static void foo(void) __attribute__((ifunc("bar_ifunc")));
+int main(void) { foo(); return 0; }
+EOF
+if compile_prog "-mavx2" "" ; then
+    if readelf --syms $TMPE |grep "IFUNC.*foo" >/dev/null 2>&1; then
+        avx2_opt="yes"
+    fi
+fi
+
+#########################################
 # zlib check

 if test "$zlib" != "no" ; then
@@ -1853,6 +1873,9 @@ if test "$seccomp" != "no" ; then
    i386|x86_64)
        libseccomp_minver="2.1.0"
        ;;
+    mips)
+        libseccomp_minver="2.2.0"
+        ;;
    arm|aarch64)
        libseccomp_minver="2.2.3"
        ;;
@@ -2185,6 +2208,13 @@ if test "$gnutls" != "no"; then
 	    gnutls_hash="no"
 	fi

+	# gnutls_rnd requires >= 2.11.0
+	if $pkg_config --exists "gnutls >= 2.11.0"; then
+	    gnutls_rnd="yes"
+	else
+	    gnutls_rnd="no"
+	fi
+
 	if $pkg_config --exists 'gnutls >= 3.0'; then
 	    gnutls_gcrypt=no
 	    gnutls_nettle=yes
@@ -2212,9 +2242,11 @@ if test "$gnutls" != "no"; then
    else
        gnutls="no"
        gnutls_hash="no"
+        gnutls_rnd="no"
    fi
 else
    gnutls_hash="no"
+    gnutls_rnd="no"
 fi


@@ -2276,6 +2308,19 @@ if test "$gcrypt" != "no"; then
        if test -z "$nettle"; then
           nettle="no"
        fi
+
+        cat > $TMPC << EOF
+#include <gcrypt.h>
+int main(void) {
+  gcry_kdf_derive(NULL, 0, GCRY_KDF_PBKDF2,
+                  GCRY_MD_SHA256,
+                  NULL, 0, 0, 0, NULL);
+ return 0;
+}
+EOF
+        if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then
+            gcrypt_kdf=yes
+        fi
    else
        if test "$gcrypt" = "yes"; then
            feature_not_found "gcrypt" "Install gcrypt devel"
@@ -2295,6 +2340,17 @@ if test "$nettle" != "no"; then
        libs_tools="$nettle_libs $libs_tools"
        QEMU_CFLAGS="$QEMU_CFLAGS $nettle_cflags"
        nettle="yes"
+
+        cat > $TMPC << EOF
+#include <nettle/pbkdf2.h>
+int main(void) {
+     pbkdf2_hmac_sha256(8, NULL, 1000, 8, NULL, 8, NULL);
+     return 0;
+}
+EOF
+        if compile_prog "$nettle_cflags" "$nettle_libs" ; then
+            nettle_kdf=yes
+        fi
    else
        if test "$nettle" = "yes"; then
            feature_not_found "nettle" "Install nettle devel"
@@ -2796,7 +2852,7 @@ fi
 # curses probe
 if test "$curses" != "no" ; then
  if test "$mingw32" = "yes" ; then
-    curses_list="-lpdcurses"
+    curses_list="$($pkg_config --libs ncurses 2>/dev/null):-lpdcurses"
  else
    curses_list="$($pkg_config --libs ncurses 2>/dev/null):-lncurses:-lcurses"
  fi
@@ -3345,6 +3401,9 @@ if test "$glusterfs" != "no" ; then
    glusterfs="yes"
    glusterfs_cflags=`$pkg_config --cflags glusterfs-api`
    glusterfs_libs=`$pkg_config --libs glusterfs-api`
+    if $pkg_config --atleast-version=4 glusterfs-api; then
+      glusterfs_xlator_opt="yes"
+    fi
    if $pkg_config --atleast-version=5 glusterfs-api; then
      glusterfs_discard="yes"
    fi
@@ -4434,6 +4493,21 @@ if test "$fortify_source" != "no"; then
  fi
 fi

+##########################################
+# check if struct fsxattr is available via linux/fs.h
+
+have_fsxattr=no
+cat > $TMPC << EOF
+#include <linux/fs.h>
+struct fsxattr foo;
+int main(void) {
+  return 0;
+}
+EOF
+if compile_prog "" "" ; then
+    have_fsxattr=yes
+fi
+
 ##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
@@ -4698,12 +4772,15 @@ echo "GTK support       $gtk"
 echo "GTK GL support    $gtk_gl"
 echo "GNUTLS support    $gnutls"
 echo "GNUTLS hash       $gnutls_hash"
+echo "GNUTLS rnd        $gnutls_rnd"
 echo "libgcrypt         $gcrypt"
+echo "libgcrypt kdf     $gcrypt_kdf"
 if test "$nettle" = "yes"; then
    echo "nettle            $nettle ($nettle_version)"
 else
    echo "nettle            $nettle"
 fi
+echo "nettle kdf        $nettle_kdf"
 echo "libtasn1          $tasn1"
 echo "VTE support       $vte"
 echo "curses support    $curses"
@@ -4790,6 +4867,7 @@ echo "bzip2 support     $bzip2"
 echo "NUMA host support $numa"
 echo "tcmalloc support  $tcmalloc"
 echo "jemalloc support  $jemalloc"
+echo "avx2 optimization $avx2_opt"

 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5075,12 +5153,21 @@ fi
 if test "$gnutls_hash" = "yes" ; then
  echo "CONFIG_GNUTLS_HASH=y" >> $config_host_mak
 fi
+if test "$gnutls_rnd" = "yes" ; then
+  echo "CONFIG_GNUTLS_RND=y" >> $config_host_mak
+fi
 if test "$gcrypt" = "yes" ; then
  echo "CONFIG_GCRYPT=y" >> $config_host_mak
+  if test "$gcrypt_kdf" = "yes" ; then
+    echo "CONFIG_GCRYPT_KDF=y" >> $config_host_mak
+  fi
 fi
 if test "$nettle" = "yes" ; then
  echo "CONFIG_NETTLE=y" >> $config_host_mak
  echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak
+  if test "$nettle_kdf" = "yes" ; then
+    echo "CONFIG_NETTLE_KDF=y" >> $config_host_mak
+  fi
 fi
 if test "$tasn1" = "yes" ; then
  echo "CONFIG_TASN1=y" >> $config_host_mak
@@ -5088,6 +5175,14 @@ fi
 if test "$have_ifaddrs_h" = "yes" ; then
    echo "HAVE_IFADDRS_H=y" >> $config_host_mak
 fi
+
+# Work around a system header bug with some kernel/XFS header
+# versions where they both try to define 'struct fsxattr':
+# xfs headers will not try to redefine structs from linux headers
+# if this macro is set.
+if test "$have_fsxattr" = "yes" ; then
+    echo "HAVE_FSXATTR=y" >> $config_host_mak
+fi
 if test "$vte" = "yes" ; then
  echo "CONFIG_VTE=y" >> $config_host_mak
  echo "VTE_CFLAGS=$vte_cflags" >> $config_host_mak
@@ -5178,6 +5273,10 @@ if test "$opengl" = "yes" ; then
  fi
 fi

+if test "$avx2_opt" = "yes" ; then
+  echo "CONFIG_AVX2_OPT=y" >> $config_host_mak
+fi
+
 if test "$lzo" = "yes" ; then
  echo "CONFIG_LZO=y" >> $config_host_mak
 fi
@@ -5270,6 +5369,10 @@ if test "$glusterfs" = "yes" ; then
  echo "GLUSTERFS_LIBS=$glusterfs_libs" >> $config_host_mak
 fi

+if test "$glusterfs_xlator_opt" = "yes" ; then
+  echo "CONFIG_GLUSTERFS_XLATOR_OPT=y" >> $config_host_mak
+fi
+
 if test "$glusterfs_discard" = "yes" ; then
  echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak
 fi
@@ -5889,7 +5992,7 @@ cat <<EOD >config.status
 EOD
 printf "exec" >>config.status
 printf " '%s'" "$0" "$@" >>config.status
-echo >>config.status
+echo ' "$@"' >>config.status
 chmod +x config.status

 rm -r "$TMPDIR1"
--- a/contrib/ivshmem-server/ivshmem-server.c
+++ b/contrib/ivshmem-server/ivshmem-server.c
@@ -12,9 +12,6 @@
 #include <sys/mman.h>
 #include <sys/socket.h>
 #include <sys/un.h>
-#ifdef CONFIG_LINUX
-#include <sys/vfs.h>
-#endif

 #include "ivshmem-server.h"

@@ -257,7 +254,8 @@ ivshmem_server_ftruncate(int fd, unsigned shmsize)
 /* Init a new ivshmem server */
 int
 ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
-                    const char *shm_path, size_t shm_size, unsigned n_vectors,
+                    const char *shm_path, bool use_shm_open,
+                    size_t shm_size, unsigned n_vectors,
                    bool verbose)
 {
    int ret;
@@ -278,6 +276,7 @@ ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
        return -1;
    }

+    server->use_shm_open = use_shm_open;
    server->shm_size = shm_size;
    server->n_vectors = n_vectors;

@@ -286,31 +285,6 @@ ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
    return 0;
 }

-#ifdef CONFIG_LINUX
-
-#define HUGETLBFS_MAGIC       0x958458f6
-
-static long gethugepagesize(const char *path)
-{
-    struct statfs fs;
-    int ret;
-
-    do {
-        ret = statfs(path, &fs);
-    } while (ret != 0 && errno == EINTR);
-
-    if (ret != 0) {
-        return -1;
-    }
-
-    if (fs.f_type != HUGETLBFS_MAGIC) {
-        return -1;
-    }
-
-    return fs.f_bsize;
-}
-#endif
-
 /* open shm, create and bind to the unix socket */
 int
 ivshmem_server_start(IvshmemServer *server)
@@ -319,27 +293,17 @@ ivshmem_server_start(IvshmemServer *server)
    int shm_fd, sock_fd, ret;

    /* open shm file */
-#ifdef CONFIG_LINUX
-    long hpagesize;
-
-    hpagesize = gethugepagesize(server->shm_path);
-    if (hpagesize < 0 && errno != ENOENT) {
-        IVSHMEM_SERVER_DEBUG(server, "cannot stat shm file %s: %s\n",
-                             server->shm_path, strerror(errno));
-    }
-
-    if (hpagesize > 0) {
+    if (server->use_shm_open) {
+        IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
+                             server->shm_path);
+        shm_fd = shm_open(server->shm_path, O_CREAT | O_RDWR, S_IRWXU);
+    } else {
        gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
-        IVSHMEM_SERVER_DEBUG(server, "Using hugepages: %s\n", server->shm_path);
+        IVSHMEM_SERVER_DEBUG(server, "Using file-backed shared memory: %s\n",
+                             server->shm_path);
        shm_fd = mkstemp(filename);
        unlink(filename);
        g_free(filename);
-    } else
-#endif
-    {
-        IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
-                             server->shm_path);
-        shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
    }

    if (shm_fd < 0) {
--- a/contrib/ivshmem-server/ivshmem-server.h
+++ b/contrib/ivshmem-server/ivshmem-server.h
@@ -66,6 +66,7 @@ typedef struct IvshmemServer {
    char unix_sock_path[PATH_MAX];   /**< path to unix socket */
    int sock_fd;                     /**< unix sock file descriptor */
    char shm_path[PATH_MAX];         /**< path to shm */
+    bool use_shm_open;
    size_t shm_size;                 /**< size of shm */
    int shm_fd;                      /**< shm file descriptor */
    unsigned n_vectors;              /**< number of vectors */
@@ -89,7 +90,8 @@ typedef struct IvshmemServer {
 */
 int
 ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
-                    const char *shm_path, size_t shm_size, unsigned n_vectors,
+                    const char *shm_path, bool use_shm_open,
+                    size_t shm_size, unsigned n_vectors,
                    bool verbose);

 /**
--- a/contrib/ivshmem-server/main.c
+++ b/contrib/ivshmem-server/main.c
@@ -7,7 +7,8 @@
 */

 #include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"

 #include "ivshmem-server.h"

@@ -29,35 +30,38 @@ typedef struct IvshmemServerArgs {
    const char *pid_file;
    const char *unix_socket_path;
    const char *shm_path;
+    bool use_shm_open;
    uint64_t shm_size;
    unsigned n_vectors;
 } IvshmemServerArgs;

-/* show ivshmem_server_usage and exit with given error code */
 static void
-ivshmem_server_usage(const char *name, int code)
+ivshmem_server_usage(const char *progname)
 {
-    fprintf(stderr, "%s [opts]\n", name);
-    fprintf(stderr, "  -h: show this help\n");
-    fprintf(stderr, "  -v: verbose mode\n");
-    fprintf(stderr, "  -F: foreground mode (default is to daemonize)\n");
-    fprintf(stderr, "  -p <pid_file>: path to the PID file (used in daemon\n"
-                    "     mode only).\n"
-                    "     Default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
-    fprintf(stderr, "  -S <unix_socket_path>: path to the unix socket\n"
-                    "     to listen to.\n"
-                    "     Default=%s\n", IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH);
-    fprintf(stderr, "  -m <shm_path>: path to the shared memory.\n"
-                    "     The path corresponds to a POSIX shm name or a\n"
-                    "     hugetlbfs mount point.\n"
-                    "     default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
-    fprintf(stderr, "  -l <size>: size of shared memory in bytes. The suffix\n"
-                    "     K, M and G can be used (ex: 1K means 1024).\n"
-                    "     default=%u\n", IVSHMEM_SERVER_DEFAULT_SHM_SIZE);
-    fprintf(stderr, "  -n <n_vects>: number of vectors.\n"
-                    "     default=%u\n", IVSHMEM_SERVER_DEFAULT_N_VECTORS);
+    printf("Usage: %s [OPTION]...\n"
+           "  -h: show this help\n"
+           "  -v: verbose mode\n"
+           "  -F: foreground mode (default is to daemonize)\n"
+           "  -p <pid-file>: path to the PID file (used in daemon mode only)\n"
+           "     default " IVSHMEM_SERVER_DEFAULT_PID_FILE "\n"
+           "  -S <unix-socket-path>: path to the unix socket to listen to\n"
+           "     default " IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "\n"
+           "  -M <shm-name>: POSIX shared memory object to use\n"
+           "     default " IVSHMEM_SERVER_DEFAULT_SHM_PATH "\n"
+           "  -m <dir-name>: where to create shared memory\n"
+           "  -l <size>: size of shared memory in bytes\n"
+           "     suffixes K, M and G can be used, e.g. 1K means 1024\n"
+           "     default %u\n"
+           "  -n <nvectors>: number of vectors\n"
+           "     default %u\n",
+           progname, IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
+           IVSHMEM_SERVER_DEFAULT_N_VECTORS);
+}

-    exit(code);
+static void
+ivshmem_server_help(const char *progname)
+{
+    fprintf(stderr, "Try '%s -h' for more information.\n", progname);
 }

 /* parse the program arguments, exit on error */
@@ -68,20 +72,12 @@ ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
    unsigned long long v;
    Error *err = NULL;

-    while ((c = getopt(argc, argv,
-                       "h"  /* help */
-                       "v"  /* verbose */
-                       "F"  /* foreground */
-                       "p:" /* pid_file */
-                       "S:" /* unix_socket_path */
-                       "m:" /* shm_path */
-                       "l:" /* shm_size */
-                       "n:" /* n_vectors */
-                      )) != -1) {
+    while ((c = getopt(argc, argv, "hvFp:S:m:M:l:n:")) != -1) {

        switch (c) {
        case 'h': /* help */
-            ivshmem_server_usage(argv[0], 0);
+            ivshmem_server_usage(argv[0]);
+            exit(0);
            break;

        case 'v': /* verbose */
@@ -92,36 +88,41 @@ ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
            args->foreground = 1;
            break;

-        case 'p': /* pid_file */
+        case 'p': /* pid file */
            args->pid_file = optarg;
            break;

-        case 'S': /* unix_socket_path */
+        case 'S': /* unix socket path */
            args->unix_socket_path = optarg;
            break;

-        case 'm': /* shm_path */
+        case 'M': /* shm name */
+        case 'm': /* dir name */
            args->shm_path = optarg;
+            args->use_shm_open = c == 'M';
            break;

-        case 'l': /* shm_size */
+        case 'l': /* shm size */
            parse_option_size("shm_size", optarg, &args->shm_size, &err);
            if (err) {
                error_report_err(err);
-                ivshmem_server_usage(argv[0], 1);
+                ivshmem_server_help(argv[0]);
+                exit(1);
            }
            break;

-        case 'n': /* n_vectors */
+        case 'n': /* number of vectors */
            if (parse_uint_full(optarg, &v, 0) < 0) {
                fprintf(stderr, "cannot parse n_vectors\n");
-                ivshmem_server_usage(argv[0], 1);
+                ivshmem_server_help(argv[0]);
+                exit(1);
            }
            args->n_vectors = v;
            break;

        default:
-            ivshmem_server_usage(argv[0], 1);
+            ivshmem_server_usage(argv[0]);
+            exit(1);
            break;
        }
    }
@@ -129,12 +130,14 @@ ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
    if (args->n_vectors > IVSHMEM_SERVER_MAX_VECTORS) {
        fprintf(stderr, "too many requested vectors (max is %d)\n",
                IVSHMEM_SERVER_MAX_VECTORS);
-        ivshmem_server_usage(argv[0], 1);
+        ivshmem_server_help(argv[0]);
+        exit(1);
    }

    if (args->verbose == 1 && args->foreground == 0) {
        fprintf(stderr, "cannot use verbose in daemon mode\n");
-        ivshmem_server_usage(argv[0], 1);
+        ivshmem_server_help(argv[0]);
+        exit(1);
    }
 }

@@ -192,11 +195,18 @@ main(int argc, char *argv[])
        .pid_file = IVSHMEM_SERVER_DEFAULT_PID_FILE,
        .unix_socket_path = IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH,
        .shm_path = IVSHMEM_SERVER_DEFAULT_SHM_PATH,
+        .use_shm_open = true,
        .shm_size = IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
        .n_vectors = IVSHMEM_SERVER_DEFAULT_N_VECTORS,
    };
    int ret = 1;

+    /*
+     * Do not remove this notice without adding proper error handling!
+     * Start with handling ivshmem_server_send_one_msg() failure.
+     */
+    printf("*** Example code, do not use in production ***\n");
+
    /* parse arguments, will exit on error */
    ivshmem_server_parse_args(&args, argc, argv);

@@ -219,7 +229,8 @@ main(int argc, char *argv[])
    }

    /* init the ivshms structure */
-    if (ivshmem_server_init(&server, args.unix_socket_path, args.shm_path,
+    if (ivshmem_server_init(&server, args.unix_socket_path,
+                            args.shm_path, args.use_shm_open,
                            args.shm_size, args.n_vectors, args.verbose) < 0) {
        fprintf(stderr, "cannot init server\n");
        goto err;
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -133,10 +133,15 @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
 #endif /* CONFIG USER ONLY */

 /* Execute a TB, and fix up the CPU state afterwards if necessary */
-static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
+static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
 {
    CPUArchState *env = cpu->env_ptr;
    uintptr_t next_tb;
+    uint8_t *tb_ptr = itb->tc_ptr;
+
+    qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc,
+                           "Trace %p [" TARGET_FMT_lx "] %s\n",
+                           itb->tc_ptr, itb->pc, lookup_symbol(itb->pc));

 #if defined(DEBUG_DISAS)
    if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
@@ -167,6 +172,10 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
         */
        CPUClass *cc = CPU_GET_CLASS(cpu);
        TranslationBlock *tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
+        qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc,
+                               "Stopped execution of TB chain before %p ["
+                               TARGET_FMT_lx "] %s\n",
+                               itb->tc_ptr, itb->pc, lookup_symbol(itb->pc));
        if (cc->synchronize_from_tb) {
            cc->synchronize_from_tb(cpu, tb);
        } else {
@@ -202,7 +211,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
    cpu->current_tb = tb;
    /* execute the generated code */
    trace_exec_tb_nocache(tb, tb->pc);
-    cpu_tb_exec(cpu, tb->tc_ptr);
+    cpu_tb_exec(cpu, tb);
    cpu->current_tb = NULL;
    tb_phys_invalidate(tb, -1);
    tb_free(tb);
@@ -344,7 +353,6 @@ int cpu_exec(CPUState *cpu)
 #endif
    int ret, interrupt_request;
    TranslationBlock *tb;
-    uint8_t *tc_ptr;
    uintptr_t next_tb;
    SyncClocks sc;

@@ -500,10 +508,6 @@ int cpu_exec(CPUState *cpu)
                    next_tb = 0;
                    tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
                }
-                if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
-                    qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n",
-                             tb->tc_ptr, tb->pc, lookup_symbol(tb->pc));
-                }
                /* see if we can patch the calling TB. When the TB
                   spans two pages, we cannot safely do a direct
                   jump. */
@@ -515,10 +519,9 @@ int cpu_exec(CPUState *cpu)
                tb_unlock();
                if (likely(!cpu->exit_request)) {
                    trace_exec_tb(tb, tb->pc);
-                    tc_ptr = tb->tc_ptr;
                    /* execute the generated code */
                    cpu->current_tb = tb;
-                    next_tb = cpu_tb_exec(cpu, tc_ptr);
+                    next_tb = cpu_tb_exec(cpu, tb);
                    cpu->current_tb = NULL;
                    switch (next_tb & TB_EXIT_MASK) {
                    case TB_EXIT_REQUESTED:
--- a/cpus.c
+++ b/cpus.c
@@ -29,6 +29,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/block-backend.h"
 #include "exec/gdbstub.h"
 #include "sysemu/dma.h"
 #include "sysemu/kvm.h"
@@ -275,7 +276,7 @@ void cpu_disable_ticks(void)
   fairly approximate, so ignore small variation.
   When the guest is idle real and virtual time will be aligned in
   the IO wait loop.  */
-#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
+#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)

 static void icount_adjust(void)
 {
@@ -326,7 +327,7 @@ static void icount_adjust_vm(void *opaque)
 {
    timer_mod(icount_vm_timer,
                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                   get_ticks_per_sec() / 10);
+                   NANOSECONDS_PER_SECOND / 10);
    icount_adjust();
 }

@@ -337,10 +338,18 @@ static int64_t qemu_icount_round(int64_t count)

 static void icount_warp_rt(void)
 {
+    unsigned seq;
+    int64_t warp_start;
+
    /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
     * changes from -1 to another value, so the race here is okay.
     */
-    if (atomic_read(&vm_clock_warp_start) == -1) {
+    do {
+        seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+        warp_start = vm_clock_warp_start;
+    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
+
+    if (warp_start == -1) {
        return;
    }

@@ -370,9 +379,12 @@ static void icount_warp_rt(void)
    }
 }

-static void icount_dummy_timer(void *opaque)
+static void icount_timer_cb(void *opaque)
 {
-    (void)opaque;
+    /* No need for a checkpoint because the timer already synchronizes
+     * with CHECKPOINT_CLOCK_VIRTUAL_RT.
+     */
+    icount_warp_rt();
 }

 void qtest_clock_warp(int64_t dest)
@@ -396,17 +408,12 @@ void qtest_clock_warp(int64_t dest)
    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 }

-void qemu_clock_warp(QEMUClockType type)
+void qemu_start_warp_timer(void)
 {
    int64_t clock;
    int64_t deadline;

-    /*
-     * There are too many global variables to make the "warp" behavior
-     * applicable to other clocks.  But a clock argument removes the
-     * need for if statements all over the place.
-     */
-    if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
+    if (!use_icount) {
        return;
    }

@@ -418,29 +425,17 @@ void qemu_clock_warp(QEMUClockType type)
    }

    /* warp clock deterministically in record/replay mode */
-    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP)) {
+    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
        return;
    }

-    if (icount_sleep) {
-        /*
-         * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
-         * This ensures that the deadline for the timer is computed correctly
-         * below.
-         * This also makes sure that the insn counter is synchronized before
-         * the CPU starts running, in case the CPU is woken by an event other
-         * than the earliest QEMU_CLOCK_VIRTUAL timer.
-         */
-        icount_warp_rt();
-        timer_del(icount_warp_timer);
-    }
    if (!all_cpu_threads_idle()) {
        return;
    }

    if (qtest_enabled()) {
        /* When testing, qtest commands advance icount.  */
-	return;
+        return;
    }

    /* We want to use the earliest deadline from ALL vm_clocks */
@@ -496,6 +491,28 @@ void qemu_clock_warp(QEMUClockType type)
    }
 }

+static void qemu_account_warp_timer(void)
+{
+    if (!use_icount || !icount_sleep) {
+        return;
+    }
+
+    /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
+     * do not fire, so computing the deadline does not make sense.
+     */
+    if (!runstate_is_running()) {
+        return;
+    }
+
+    /* warp clock deterministically in record/replay mode */
+    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
+        return;
+    }
+
+    timer_del(icount_warp_timer);
+    icount_warp_rt();
+}
+
 static bool icount_state_needed(void *opaque)
 {
    return use_icount;
@@ -624,13 +641,13 @@ void configure_icount(QemuOpts *opts, Error **errp)
    icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
    if (icount_sleep) {
        icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
-                                         icount_dummy_timer, NULL);
+                                         icount_timer_cb, NULL);
    }

    icount_align_option = qemu_opt_get_bool(opts, "align", false);

    if (icount_align_option && !icount_sleep) {
-        error_setg(errp, "align=on and sleep=no are incompatible");
+        error_setg(errp, "align=on and sleep=off are incompatible");
    }
    if (strcmp(option, "auto") != 0) {
        errno = 0;
@@ -643,7 +660,7 @@ void configure_icount(QemuOpts *opts, Error **errp)
    } else if (icount_align_option) {
        error_setg(errp, "shift=auto and align=on are incompatible");
    } else if (!icount_sleep) {
-        error_setg(errp, "shift=auto and sleep=no are incompatible");
+        error_setg(errp, "shift=auto and sleep=off are incompatible");
    }

    use_icount = 2;
@@ -665,7 +682,7 @@ void configure_icount(QemuOpts *opts, Error **errp)
                                        icount_adjust_vm, NULL);
    timer_mod(icount_vm_timer,
                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                   get_ticks_per_sec() / 10);
+                   NANOSECONDS_PER_SECOND / 10);
 }

 /***********************************************************/
@@ -726,7 +743,7 @@ static int do_vm_stop(RunState state)
    }

    bdrv_drain_all();
-    ret = bdrv_flush_all();
+    ret = blk_flush_all();

    return ret;
 }
@@ -995,9 +1012,6 @@ static void qemu_wait_io_event_common(CPUState *cpu)
 static void qemu_tcg_wait_io_event(CPUState *cpu)
 {
    while (all_cpu_threads_idle()) {
-       /* Start accounting real time to the virtual clock if the CPUs
-          are idle.  */
-        qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
    }

@@ -1428,7 +1442,7 @@ int vm_stop_force_state(RunState state)
        bdrv_drain_all();
        /* Make sure to return an error if the flush in a previous vm_stop()
         * failed. */
-        return bdrv_flush_all();
+        return blk_flush_all();
    }
 }

@@ -1499,7 +1513,7 @@ static void tcg_exec_all(void)
    int r;

    /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
-    qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
+    qemu_account_warp_timer();

    if (next_cpu == NULL) {
        next_cpu = first_cpu;
--- a/cputlb.c
+++ b/cputlb.c
@@ -30,8 +30,30 @@
 #include "exec/ram_addr.h"
 #include "tcg/tcg.h"

-//#define DEBUG_TLB
-//#define DEBUG_TLB_CHECK
+/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
+/* #define DEBUG_TLB */
+/* #define DEBUG_TLB_LOG */
+
+#ifdef DEBUG_TLB
+# define DEBUG_TLB_GATE 1
+# ifdef DEBUG_TLB_LOG
+#  define DEBUG_TLB_LOG_GATE 1
+# else
+#  define DEBUG_TLB_LOG_GATE 0
+# endif
+#else
+# define DEBUG_TLB_GATE 0
+# define DEBUG_TLB_LOG_GATE 0
+#endif
+
+#define tlb_debug(fmt, ...) do { \
+    if (DEBUG_TLB_LOG_GATE) { \
+        qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
+                      ## __VA_ARGS__); \
+    } else if (DEBUG_TLB_GATE) { \
+        fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
+    } \
+} while (0)

 /* statistics */
 int tlb_flush_count;
@@ -52,9 +74,8 @@ void tlb_flush(CPUState *cpu, int flush_global)
 {
    CPUArchState *env = cpu->env_ptr;

-#if defined(DEBUG_TLB)
-    printf("tlb_flush:\n");
-#endif
+    tlb_debug("(%d)\n", flush_global);
+
    /* must reset current TB so that interrupts cannot modify the
       links while we are modifying them */
    cpu->current_tb = NULL;
@@ -73,9 +94,7 @@ static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
 {
    CPUArchState *env = cpu->env_ptr;

-#if defined(DEBUG_TLB)
-    printf("tlb_flush_by_mmuidx:");
-#endif
+    tlb_debug("start\n");
    /* must reset current TB so that interrupts cannot modify the
       links while we are modifying them */
    cpu->current_tb = NULL;
@@ -87,18 +106,12 @@ static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
            break;
        }

-#if defined(DEBUG_TLB)
-        printf(" %d", mmu_idx);
-#endif
+        tlb_debug("%d\n", mmu_idx);

        memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
        memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
    }

-#if defined(DEBUG_TLB)
-    printf("\n");
-#endif
-
    memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
 }

@@ -128,16 +141,14 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
    int i;
    int mmu_idx;

-#if defined(DEBUG_TLB)
-    printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
-#endif
+    tlb_debug("page :" TARGET_FMT_lx "\n", addr);
+
    /* Check if we need to flush due to large pages.  */
    if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
-#if defined(DEBUG_TLB)
-        printf("tlb_flush_page: forced full flush ("
-               TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
-               env->tlb_flush_addr, env->tlb_flush_mask);
-#endif
+        tlb_debug("forcing full flush ("
+                  TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
+                  env->tlb_flush_addr, env->tlb_flush_mask);
+
        tlb_flush(cpu, 1);
        return;
    }
@@ -170,16 +181,14 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)

    va_start(argp, addr);

-#if defined(DEBUG_TLB)
-    printf("tlb_flush_page_by_mmu_idx: " TARGET_FMT_lx, addr);
-#endif
+    tlb_debug("addr "TARGET_FMT_lx"\n", addr);
+
    /* Check if we need to flush due to large pages.  */
    if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
-#if defined(DEBUG_TLB)
-        printf(" forced full flush ("
-               TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
-               env->tlb_flush_addr, env->tlb_flush_mask);
-#endif
+        tlb_debug("forced full flush ("
+                  TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
+                  env->tlb_flush_addr, env->tlb_flush_mask);
+
        v_tlb_flush_by_mmuidx(cpu, argp);
        va_end(argp);
        return;
@@ -198,9 +207,7 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
            break;
        }

-#if defined(DEBUG_TLB)
-        printf(" %d", mmu_idx);
-#endif
+        tlb_debug("idx %d\n", mmu_idx);

        tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);

@@ -211,10 +218,6 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
    }
    va_end(argp);

-#if defined(DEBUG_TLB)
-    printf("\n");
-#endif
-
    tb_flush_jmp_cache(cpu, addr);
 }

@@ -367,12 +370,9 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
    section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz);
    assert(sz >= TARGET_PAGE_SIZE);

-#if defined(DEBUG_TLB)
-    qemu_log_mask(CPU_LOG_MMU,
-           "tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
-           " prot=%x idx=%d\n",
-           vaddr, paddr, prot, mmu_idx);
-#endif
+    tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
+              " prot=%x idx=%d\n",
+              vaddr, paddr, prot, mmu_idx);

    address = vaddr;
    if (!memory_region_is_ram(section->mr) && !memory_region_is_romd(section->mr)) {
@@ -416,8 +416,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
            /* Write access calls the I/O callback.  */
            te->addr_write = address | TLB_MMIO;
        } else if (memory_region_is_ram(section->mr)
-                   && cpu_physical_memory_is_clean(section->mr->ram_addr
-                                                   + xlat)) {
+                   && cpu_physical_memory_is_clean(
+                        memory_region_get_ram_addr(section->mr) + xlat)) {
            te->addr_write = address | TLB_NOTDIRTY;
        } else {
            te->addr_write = address;
--- a/crypto/Makefile.objs
+++ b/crypto/Makefile.objs
@@ -8,6 +8,23 @@ crypto-obj-y += tlscredsanon.o
 crypto-obj-y += tlscredsx509.o
 crypto-obj-y += tlssession.o
 crypto-obj-y += secret.o
+crypto-obj-$(CONFIG_GCRYPT) += random-gcrypt.o
+crypto-obj-$(if $(CONFIG_GCRYPT),n,$(CONFIG_GNUTLS_RND)) += random-gnutls.o
+crypto-obj-y += pbkdf.o
+crypto-obj-$(CONFIG_NETTLE_KDF) += pbkdf-nettle.o
+crypto-obj-$(if $(CONFIG_NETTLE_KDF),n,$(CONFIG_GCRYPT_KDF)) += pbkdf-gcrypt.o
+crypto-obj-y += ivgen.o
+crypto-obj-y += ivgen-essiv.o
+crypto-obj-y += ivgen-plain.o
+crypto-obj-y += ivgen-plain64.o
+crypto-obj-y += afsplit.o
+crypto-obj-y += xts.o
+crypto-obj-y += block.o
+crypto-obj-y += block-qcow.o
+crypto-obj-y += block-luks.o

 # Let the userspace emulators avoid linking gnutls/etc
 crypto-aes-obj-y = aes.o
+
+stub-obj-y += random-stub.o
+stub-obj-y += pbkdf-stub.o
--- a/crypto/afsplit.c
+++ b/crypto/afsplit.c
@@ -0,0 +1,158 @@
+/*
+ * QEMU Crypto anti forensic information splitter
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * Derived from cryptsetup package lib/luks1/af.c
+ *
+ * Copyright (C) 2004, Clemens Fruhwirth <clemens@endorphin.org>
+ * Copyright (C) 2009-2012, Red Hat, Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "crypto/afsplit.h"
+#include "crypto/random.h"
+
+
+static void qcrypto_afsplit_xor(size_t blocklen,
+                                const uint8_t *in1,
+                                const uint8_t *in2,
+                                uint8_t *out)
+{
+    size_t i;
+    for (i = 0; i < blocklen; i++) {
+        out[i] = in1[i] ^ in2[i];
+    }
+}
+
+
+static int qcrypto_afsplit_hash(QCryptoHashAlgorithm hash,
+                                size_t blocklen,
+                                uint8_t *block,
+                                Error **errp)
+{
+    size_t digestlen = qcrypto_hash_digest_len(hash);
+
+    size_t hashcount = blocklen / digestlen;
+    size_t finallen = blocklen % digestlen;
+    uint32_t i;
+
+    if (finallen) {
+        hashcount++;
+    } else {
+        finallen = digestlen;
+    }
+
+    for (i = 0; i < hashcount; i++) {
+        uint8_t *out = NULL;
+        size_t outlen = 0;
+        uint32_t iv = cpu_to_be32(i);
+        struct iovec in[] = {
+            { .iov_base = &iv,
+              .iov_len = sizeof(iv) },
+            { .iov_base = block + (i * digestlen),
+              .iov_len = (i == (hashcount - 1)) ? finallen : digestlen },
+        };
+
+        if (qcrypto_hash_bytesv(hash,
+                                in,
+                                G_N_ELEMENTS(in),
+                                &out, &outlen,
+                                errp) < 0) {
+            return -1;
+        }
+
+        assert(outlen == digestlen);
+        memcpy(block + (i * digestlen), out,
+               (i == (hashcount - 1)) ? finallen : digestlen);
+        g_free(out);
+    }
+
+    return 0;
+}
+
+
+int qcrypto_afsplit_encode(QCryptoHashAlgorithm hash,
+                           size_t blocklen,
+                           uint32_t stripes,
+                           const uint8_t *in,
+                           uint8_t *out,
+                           Error **errp)
+{
+    uint8_t *block = g_new0(uint8_t, blocklen);
+    size_t i;
+    int ret = -1;
+
+    for (i = 0; i < (stripes - 1); i++) {
+        if (qcrypto_random_bytes(out + (i * blocklen), blocklen, errp) < 0) {
+            goto cleanup;
+        }
+
+        qcrypto_afsplit_xor(blocklen,
+                            out + (i * blocklen),
+                            block,
+                            block);
+        if (qcrypto_afsplit_hash(hash, blocklen, block,
+                                 errp) < 0) {
+            goto cleanup;
+        }
+    }
+    qcrypto_afsplit_xor(blocklen,
+                        in,
+                        block,
+                        out + (i * blocklen));
+    ret = 0;
+
+ cleanup:
+    g_free(block);
+    return ret;
+}
+
+
+int qcrypto_afsplit_decode(QCryptoHashAlgorithm hash,
+                           size_t blocklen,
+                           uint32_t stripes,
+                           const uint8_t *in,
+                           uint8_t *out,
+                           Error **errp)
+{
+    uint8_t *block = g_new0(uint8_t, blocklen);
+    size_t i;
+    int ret = -1;
+
+    for (i = 0; i < (stripes - 1); i++) {
+        qcrypto_afsplit_xor(blocklen,
+                            in + (i * blocklen),
+                            block,
+                            block);
+        if (qcrypto_afsplit_hash(hash, blocklen, block,
+                                 errp) < 0) {
+            goto cleanup;
+        }
+    }
+
+    qcrypto_afsplit_xor(blocklen,
+                        in + (i * blocklen),
+                        block,
+                        out);
+
+    ret = 0;
+
+ cleanup:
+    g_free(block);
+    return ret;
+}
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
--- a/crypto/block-luks.h
+++ b/crypto/block-luks.h
@@ -0,0 +1,28 @@
+/*
+ * QEMU Crypto block device encryption LUKS format
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QCRYPTO_BLOCK_LUKS_H__
+#define QCRYPTO_BLOCK_LUKS_H__
+
+#include "crypto/blockpriv.h"
+
+extern const QCryptoBlockDriver qcrypto_block_driver_luks;
+
+#endif /* QCRYPTO_BLOCK_LUKS_H__ */
--- a/crypto/block-qcow.c
+++ b/crypto/block-qcow.c
@@ -0,0 +1,174 @@
+/*
+ * QEMU Crypto block device encryption QCow/QCow2 AES-CBC format
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+/*
+ * Note that the block encryption implemented in this file is broken
+ * by design. This exists only to allow data to be liberated from
+ * existing qcow[2] images and should not be used in any new areas.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+
+#include "crypto/block-qcow.h"
+#include "crypto/secret.h"
+
+#define QCRYPTO_BLOCK_QCOW_SECTOR_SIZE 512
+
+
+static bool
+qcrypto_block_qcow_has_format(const uint8_t *buf G_GNUC_UNUSED,
+                              size_t buf_size G_GNUC_UNUSED)
+{
+    return false;
+}
+
+
+static int
+qcrypto_block_qcow_init(QCryptoBlock *block,
+                        const char *keysecret,
+                        Error **errp)
+{
+    char *password;
+    int ret;
+    uint8_t keybuf[16];
+    int len;
+
+    memset(keybuf, 0, 16);
+
+    password = qcrypto_secret_lookup_as_utf8(keysecret, errp);
+    if (!password) {
+        return -1;
+    }
+
+    len = strlen(password);
+    memcpy(keybuf, password, MIN(len, sizeof(keybuf)));
+    g_free(password);
+
+    block->niv = qcrypto_cipher_get_iv_len(QCRYPTO_CIPHER_ALG_AES_128,
+                                           QCRYPTO_CIPHER_MODE_CBC);
+    block->ivgen = qcrypto_ivgen_new(QCRYPTO_IVGEN_ALG_PLAIN64,
+                                     0, 0, NULL, 0, errp);
+    if (!block->ivgen) {
+        ret = -ENOTSUP;
+        goto fail;
+    }
+
+    block->cipher = qcrypto_cipher_new(QCRYPTO_CIPHER_ALG_AES_128,
+                                       QCRYPTO_CIPHER_MODE_CBC,
+                                       keybuf, G_N_ELEMENTS(keybuf),
+                                       errp);
+    if (!block->cipher) {
+        ret = -ENOTSUP;
+        goto fail;
+    }
+
+    block->payload_offset = 0;
+
+    return 0;
+
+ fail:
+    qcrypto_cipher_free(block->cipher);
+    qcrypto_ivgen_free(block->ivgen);
+    return ret;
+}
+
+
+static int
+qcrypto_block_qcow_open(QCryptoBlock *block,
+                        QCryptoBlockOpenOptions *options,
+                        QCryptoBlockReadFunc readfunc G_GNUC_UNUSED,
+                        void *opaque G_GNUC_UNUSED,
+                        unsigned int flags,
+                        Error **errp)
+{
+    if (flags & QCRYPTO_BLOCK_OPEN_NO_IO) {
+        return 0;
+    } else {
+        if (!options->u.qcow.key_secret) {
+            error_setg(errp,
+                       "Parameter 'key-secret' is required for cipher");
+            return -1;
+        }
+        return qcrypto_block_qcow_init(block,
+                                       options->u.qcow.key_secret, errp);
+    }
+}
+
+
+static int
+qcrypto_block_qcow_create(QCryptoBlock *block,
+                          QCryptoBlockCreateOptions *options,
+                          QCryptoBlockInitFunc initfunc G_GNUC_UNUSED,
+                          QCryptoBlockWriteFunc writefunc G_GNUC_UNUSED,
+                          void *opaque G_GNUC_UNUSED,
+                          Error **errp)
+{
+    if (!options->u.qcow.key_secret) {
+        error_setg(errp, "Parameter 'key-secret' is required for cipher");
+        return -1;
+    }
+    /* QCow2 has no special header, since everything is hardwired */
+    return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, errp);
+}
+
+
+static void
+qcrypto_block_qcow_cleanup(QCryptoBlock *block)
+{
+}
+
+
+static int
+qcrypto_block_qcow_decrypt(QCryptoBlock *block,
+                           uint64_t startsector,
+                           uint8_t *buf,
+                           size_t len,
+                           Error **errp)
+{
+    return qcrypto_block_decrypt_helper(block->cipher,
+                                        block->niv, block->ivgen,
+                                        QCRYPTO_BLOCK_QCOW_SECTOR_SIZE,
+                                        startsector, buf, len, errp);
+}
+
+
+static int
+qcrypto_block_qcow_encrypt(QCryptoBlock *block,
+                           uint64_t startsector,
+                           uint8_t *buf,
+                           size_t len,
+                           Error **errp)
+{
+    return qcrypto_block_encrypt_helper(block->cipher,
+                                        block->niv, block->ivgen,
+                                        QCRYPTO_BLOCK_QCOW_SECTOR_SIZE,
+                                        startsector, buf, len, errp);
+}
+
+
+const QCryptoBlockDriver qcrypto_block_driver_qcow = {
+    .open = qcrypto_block_qcow_open,
+    .create = qcrypto_block_qcow_create,
+    .cleanup = qcrypto_block_qcow_cleanup,
+    .decrypt = qcrypto_block_qcow_decrypt,
+    .encrypt = qcrypto_block_qcow_encrypt,
+    .has_format = qcrypto_block_qcow_has_format,
+};
--- a/crypto/block-qcow.h
+++ b/crypto/block-qcow.h
@@ -0,0 +1,28 @@
+/*
+ * QEMU Crypto block device encryption QCow/QCow2 AES-CBC format
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QCRYPTO_BLOCK_QCOW_H__
+#define QCRYPTO_BLOCK_QCOW_H__
+
+#include "crypto/blockpriv.h"
+
+extern const QCryptoBlockDriver qcrypto_block_driver_qcow;
+
+#endif /* QCRYPTO_BLOCK_QCOW_H__ */
--- a/crypto/block.c
+++ b/crypto/block.c
@@ -0,0 +1,261 @@
+/*
+ * QEMU Crypto block device encryption
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "crypto/blockpriv.h"
+#include "crypto/block-qcow.h"
+#include "crypto/block-luks.h"
+
+static const QCryptoBlockDriver *qcrypto_block_drivers[] = {
+    [Q_CRYPTO_BLOCK_FORMAT_QCOW] = &qcrypto_block_driver_qcow,
+    [Q_CRYPTO_BLOCK_FORMAT_LUKS] = &qcrypto_block_driver_luks,
+};
+
+
+bool qcrypto_block_has_format(QCryptoBlockFormat format,
+                              const uint8_t *buf,
+                              size_t len)
+{
+    const QCryptoBlockDriver *driver;
+
+    if (format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
+        !qcrypto_block_drivers[format]) {
+        return false;
+    }
+
+    driver = qcrypto_block_drivers[format];
+
+    return driver->has_format(buf, len);
+}
+
+
+QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
+                                 QCryptoBlockReadFunc readfunc,
+                                 void *opaque,
+                                 unsigned int flags,
+                                 Error **errp)
+{
+    QCryptoBlock *block = g_new0(QCryptoBlock, 1);
+
+    block->format = options->format;
+
+    if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
+        !qcrypto_block_drivers[options->format]) {
+        error_setg(errp, "Unsupported block driver %d", options->format);
+        g_free(block);
+        return NULL;
+    }
+
+    block->driver = qcrypto_block_drivers[options->format];
+
+    if (block->driver->open(block, options,
+                            readfunc, opaque, flags, errp) < 0) {
+        g_free(block);
+        return NULL;
+    }
+
+    return block;
+}
+
+
+QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
+                                   QCryptoBlockInitFunc initfunc,
+                                   QCryptoBlockWriteFunc writefunc,
+                                   void *opaque,
+                                   Error **errp)
+{
+    QCryptoBlock *block = g_new0(QCryptoBlock, 1);
+
+    block->format = options->format;
+
+    if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
+        !qcrypto_block_drivers[options->format]) {
+        error_setg(errp, "Unsupported block driver %d", options->format);
+        g_free(block);
+        return NULL;
+    }
+
+    block->driver = qcrypto_block_drivers[options->format];
+
+    if (block->driver->create(block, options, initfunc,
+                              writefunc, opaque, errp) < 0) {
+        g_free(block);
+        return NULL;
+    }
+
+    return block;
+}
+
+
+int qcrypto_block_decrypt(QCryptoBlock *block,
+                          uint64_t startsector,
+                          uint8_t *buf,
+                          size_t len,
+                          Error **errp)
+{
+    return block->driver->decrypt(block, startsector, buf, len, errp);
+}
+
+
+int qcrypto_block_encrypt(QCryptoBlock *block,
+                          uint64_t startsector,
+                          uint8_t *buf,
+                          size_t len,
+                          Error **errp)
+{
+    return block->driver->encrypt(block, startsector, buf, len, errp);
+}
+
+
+QCryptoCipher *qcrypto_block_get_cipher(QCryptoBlock *block)
+{
+    return block->cipher;
+}
+
+
+QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block)
+{
+    return block->ivgen;
+}
+
+
+QCryptoHashAlgorithm qcrypto_block_get_kdf_hash(QCryptoBlock *block)
+{
+    return block->kdfhash;
+}
+
+
+uint64_t qcrypto_block_get_payload_offset(QCryptoBlock *block)
+{
+    return block->payload_offset;
+}
+
+
+void qcrypto_block_free(QCryptoBlock *block)
+{
+    if (!block) {
+        return;
+    }
+
+    block->driver->cleanup(block);
+
+    qcrypto_cipher_free(block->cipher);
+    qcrypto_ivgen_free(block->ivgen);
+    g_free(block);
+}
+
+
+int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
+                                 size_t niv,
+                                 QCryptoIVGen *ivgen,
+                                 int sectorsize,
+                                 uint64_t startsector,
+                                 uint8_t *buf,
+                                 size_t len,
+                                 Error **errp)
+{
+    uint8_t *iv;
+    int ret = -1;
+
+    iv = niv ? g_new0(uint8_t, niv) : NULL;
+
+    while (len > 0) {
+        size_t nbytes;
+        if (niv) {
+            if (qcrypto_ivgen_calculate(ivgen,
+                                        startsector,
+                                        iv, niv,
+                                        errp) < 0) {
+                goto cleanup;
+            }
+
+            if (qcrypto_cipher_setiv(cipher,
+                                     iv, niv,
+                                     errp) < 0) {
+                goto cleanup;
+            }
+        }
+
+        nbytes = len > sectorsize ? sectorsize : len;
+        if (qcrypto_cipher_decrypt(cipher, buf, buf,
+                                   nbytes, errp) < 0) {
+            goto cleanup;
+        }
+
+        startsector++;
+        buf += nbytes;
+        len -= nbytes;
+    }
+
+    ret = 0;
+ cleanup:
+    g_free(iv);
+    return ret;
+}
+
+
+int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
+                                 size_t niv,
+                                 QCryptoIVGen *ivgen,
+                                 int sectorsize,
+                                 uint64_t startsector,
+                                 uint8_t *buf,
+                                 size_t len,
+                                 Error **errp)
+{
+    uint8_t *iv;
+    int ret = -1;
+
+    iv = niv ? g_new0(uint8_t, niv) : NULL;
+
+    while (len > 0) {
+        size_t nbytes;
+        if (niv) {
+            if (qcrypto_ivgen_calculate(ivgen,
+                                        startsector,
+                                        iv, niv,
+                                        errp) < 0) {
+                goto cleanup;
+            }
+
+            if (qcrypto_cipher_setiv(cipher,
+                                     iv, niv,
+                                     errp) < 0) {
+                goto cleanup;
+            }
+        }
+
+        nbytes = len > sectorsize ? sectorsize : len;
+        if (qcrypto_cipher_encrypt(cipher, buf, buf,
+                                   nbytes, errp) < 0) {
+            goto cleanup;
+        }
+
+        startsector++;
+        buf += nbytes;
+        len -= nbytes;
+    }
+
+    ret = 0;
+ cleanup:
+    g_free(iv);
+    return ret;
+}
--- a/crypto/blockpriv.h
+++ b/crypto/blockpriv.h
@@ -0,0 +1,92 @@
+/*
+ * QEMU Crypto block device encryption
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QCRYPTO_BLOCK_PRIV_H__
+#define QCRYPTO_BLOCK_PRIV_H__
+
+#include "crypto/block.h"
+
+typedef struct QCryptoBlockDriver QCryptoBlockDriver;
+
+struct QCryptoBlock {
+    QCryptoBlockFormat format;
+
+    const QCryptoBlockDriver *driver;
+    void *opaque;
+
+    QCryptoCipher *cipher;
+    QCryptoIVGen *ivgen;
+    QCryptoHashAlgorithm kdfhash;
+    size_t niv;
+    uint64_t payload_offset; /* In bytes */
+};
+
+struct QCryptoBlockDriver {
+    int (*open)(QCryptoBlock *block,
+                QCryptoBlockOpenOptions *options,
+                QCryptoBlockReadFunc readfunc,
+                void *opaque,
+                unsigned int flags,
+                Error **errp);
+
+    int (*create)(QCryptoBlock *block,
+                  QCryptoBlockCreateOptions *options,
+                  QCryptoBlockInitFunc initfunc,
+                  QCryptoBlockWriteFunc writefunc,
+                  void *opaque,
+                  Error **errp);
+
+    void (*cleanup)(QCryptoBlock *block);
+
+    int (*encrypt)(QCryptoBlock *block,
+                   uint64_t startsector,
+                   uint8_t *buf,
+                   size_t len,
+                   Error **errp);
+    int (*decrypt)(QCryptoBlock *block,
+                   uint64_t startsector,
+                   uint8_t *buf,
+                   size_t len,
+                   Error **errp);
+
+    bool (*has_format)(const uint8_t *buf,
+                       size_t buflen);
+};
+
+
+int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
+                                 size_t niv,
+                                 QCryptoIVGen *ivgen,
+                                 int sectorsize,
+                                 uint64_t startsector,
+                                 uint8_t *buf,
+                                 size_t len,
+                                 Error **errp);
+
+int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
+                                 size_t niv,
+                                 QCryptoIVGen *ivgen,
+                                 int sectorsize,
+                                 uint64_t startsector,
+                                 uint8_t *buf,
+                                 size_t len,
+                                 Error **errp);
+
+#endif /* QCRYPTO_BLOCK_PRIV_H__ */
--- a/crypto/cipher-builtin.c
+++ b/crypto/cipher-builtin.c
@@ -21,11 +21,17 @@
 #include "qemu/osdep.h"
 #include "crypto/aes.h"
 #include "crypto/desrfb.h"
+#include "crypto/xts.h"

+typedef struct QCryptoCipherBuiltinAESContext QCryptoCipherBuiltinAESContext;
+struct QCryptoCipherBuiltinAESContext {
+    AES_KEY enc;
+    AES_KEY dec;
+};
 typedef struct QCryptoCipherBuiltinAES QCryptoCipherBuiltinAES;
 struct QCryptoCipherBuiltinAES {
-    AES_KEY encrypt_key;
-    AES_KEY decrypt_key;
+    QCryptoCipherBuiltinAESContext key;
+    QCryptoCipherBuiltinAESContext key_tweak;
    uint8_t iv[AES_BLOCK_SIZE];
 };
 typedef struct QCryptoCipherBuiltinDESRFB QCryptoCipherBuiltinDESRFB;
@@ -67,6 +73,82 @@ static void qcrypto_cipher_free_aes(QCryptoCipher *cipher)
 }


+static void qcrypto_cipher_aes_ecb_encrypt(AES_KEY *key,
+                                           const void *in,
+                                           void *out,
+                                           size_t len)
+{
+    const uint8_t *inptr = in;
+    uint8_t *outptr = out;
+    while (len) {
+        if (len > AES_BLOCK_SIZE) {
+            AES_encrypt(inptr, outptr, key);
+            inptr += AES_BLOCK_SIZE;
+            outptr += AES_BLOCK_SIZE;
+            len -= AES_BLOCK_SIZE;
+        } else {
+            uint8_t tmp1[AES_BLOCK_SIZE], tmp2[AES_BLOCK_SIZE];
+            memcpy(tmp1, inptr, len);
+            /* Fill with 0 to avoid valgrind uninitialized reads */
+            memset(tmp1 + len, 0, sizeof(tmp1) - len);
+            AES_encrypt(tmp1, tmp2, key);
+            memcpy(outptr, tmp2, len);
+            len = 0;
+        }
+    }
+}
+
+
+static void qcrypto_cipher_aes_ecb_decrypt(AES_KEY *key,
+                                           const void *in,
+                                           void *out,
+                                           size_t len)
+{
+    const uint8_t *inptr = in;
+    uint8_t *outptr = out;
+    while (len) {
+        if (len > AES_BLOCK_SIZE) {
+            AES_decrypt(inptr, outptr, key);
+            inptr += AES_BLOCK_SIZE;
+            outptr += AES_BLOCK_SIZE;
+            len -= AES_BLOCK_SIZE;
+        } else {
+            uint8_t tmp1[AES_BLOCK_SIZE], tmp2[AES_BLOCK_SIZE];
+            memcpy(tmp1, inptr, len);
+            /* Fill with 0 to avoid valgrind uninitialized reads */
+            memset(tmp1 + len, 0, sizeof(tmp1) - len);
+            AES_decrypt(tmp1, tmp2, key);
+            memcpy(outptr, tmp2, len);
+            len = 0;
+        }
+    }
+}
+
+
+static void qcrypto_cipher_aes_xts_encrypt(const void *ctx,
+                                           size_t length,
+                                           uint8_t *dst,
+                                           const uint8_t *src)
+{
+    const QCryptoCipherBuiltinAESContext *aesctx = ctx;
+
+    qcrypto_cipher_aes_ecb_encrypt((AES_KEY *)&aesctx->enc,
+                                   src, dst, length);
+}
+
+
+static void qcrypto_cipher_aes_xts_decrypt(const void *ctx,
+                                           size_t length,
+                                           uint8_t *dst,
+                                           const uint8_t *src)
+{
+    const QCryptoCipherBuiltinAESContext *aesctx = ctx;
+
+    qcrypto_cipher_aes_ecb_decrypt((AES_KEY *)&aesctx->dec,
+                                   src, dst, length);
+}
+
+
 static int qcrypto_cipher_encrypt_aes(QCryptoCipher *cipher,
                                      const void *in,
                                      void *out,
@@ -75,29 +157,26 @@ static int qcrypto_cipher_encrypt_aes(QCryptoCipher *cipher,
 {
    QCryptoCipherBuiltin *ctxt = cipher->opaque;

-    if (cipher->mode == QCRYPTO_CIPHER_MODE_ECB) {
-        const uint8_t *inptr = in;
-        uint8_t *outptr = out;
-        while (len) {
-            if (len > AES_BLOCK_SIZE) {
-                AES_encrypt(inptr, outptr, &ctxt->state.aes.encrypt_key);
-                inptr += AES_BLOCK_SIZE;
-                outptr += AES_BLOCK_SIZE;
-                len -= AES_BLOCK_SIZE;
-            } else {
-                uint8_t tmp1[AES_BLOCK_SIZE], tmp2[AES_BLOCK_SIZE];
-                memcpy(tmp1, inptr, len);
-                /* Fill with 0 to avoid valgrind uninitialized reads */
-                memset(tmp1 + len, 0, sizeof(tmp1) - len);
-                AES_encrypt(tmp1, tmp2, &ctxt->state.aes.encrypt_key);
-                memcpy(outptr, tmp2, len);
-                len = 0;
-            }
-        }
-    } else {
+    switch (cipher->mode) {
+    case QCRYPTO_CIPHER_MODE_ECB:
+        qcrypto_cipher_aes_ecb_encrypt(&ctxt->state.aes.key.enc,
+                                       in, out, len);
+        break;
+    case QCRYPTO_CIPHER_MODE_CBC:
        AES_cbc_encrypt(in, out, len,
-                        &ctxt->state.aes.encrypt_key,
+                        &ctxt->state.aes.key.enc,
                        ctxt->state.aes.iv, 1);
+        break;
+    case QCRYPTO_CIPHER_MODE_XTS:
+        xts_encrypt(&ctxt->state.aes.key,
+                    &ctxt->state.aes.key_tweak,
+                    qcrypto_cipher_aes_xts_encrypt,
+                    qcrypto_cipher_aes_xts_decrypt,
+                    ctxt->state.aes.iv,
+                    len, out, in);
+        break;
+    default:
+        g_assert_not_reached();
    }

    return 0;
@@ -112,29 +191,26 @@ static int qcrypto_cipher_decrypt_aes(QCryptoCipher *cipher,
 {
    QCryptoCipherBuiltin *ctxt = cipher->opaque;

-    if (cipher->mode == QCRYPTO_CIPHER_MODE_ECB) {
-        const uint8_t *inptr = in;
-        uint8_t *outptr = out;
-        while (len) {
-            if (len > AES_BLOCK_SIZE) {
-                AES_decrypt(inptr, outptr, &ctxt->state.aes.decrypt_key);
-                inptr += AES_BLOCK_SIZE;
-                outptr += AES_BLOCK_SIZE;
-                len -= AES_BLOCK_SIZE;
-            } else {
-                uint8_t tmp1[AES_BLOCK_SIZE], tmp2[AES_BLOCK_SIZE];
-                memcpy(tmp1, inptr, len);
-                /* Fill with 0 to avoid valgrind uninitialized reads */
-                memset(tmp1 + len, 0, sizeof(tmp1) - len);
-                AES_decrypt(tmp1, tmp2, &ctxt->state.aes.decrypt_key);
-                memcpy(outptr, tmp2, len);
-                len = 0;
-            }
-        }
-    } else {
+    switch (cipher->mode) {
+    case QCRYPTO_CIPHER_MODE_ECB:
+        qcrypto_cipher_aes_ecb_decrypt(&ctxt->state.aes.key.dec,
+                                       in, out, len);
+        break;
+    case QCRYPTO_CIPHER_MODE_CBC:
        AES_cbc_encrypt(in, out, len,
-                        &ctxt->state.aes.decrypt_key,
+                        &ctxt->state.aes.key.dec,
                        ctxt->state.aes.iv, 0);
+        break;
+    case QCRYPTO_CIPHER_MODE_XTS:
+        xts_decrypt(&ctxt->state.aes.key,
+                    &ctxt->state.aes.key_tweak,
+                    qcrypto_cipher_aes_xts_encrypt,
+                    qcrypto_cipher_aes_xts_decrypt,
+                    ctxt->state.aes.iv,
+                    len, out, in);
+        break;
+    default:
+        g_assert_not_reached();
    }

    return 0;
@@ -166,21 +242,46 @@ static int qcrypto_cipher_init_aes(QCryptoCipher *cipher,
    QCryptoCipherBuiltin *ctxt;

    if (cipher->mode != QCRYPTO_CIPHER_MODE_CBC &&
-        cipher->mode != QCRYPTO_CIPHER_MODE_ECB) {
+        cipher->mode != QCRYPTO_CIPHER_MODE_ECB &&
+        cipher->mode != QCRYPTO_CIPHER_MODE_XTS) {
        error_setg(errp, "Unsupported cipher mode %d", cipher->mode);
        return -1;
    }

    ctxt = g_new0(QCryptoCipherBuiltin, 1);

-    if (AES_set_encrypt_key(key, nkey * 8, &ctxt->state.aes.encrypt_key) != 0) {
-        error_setg(errp, "Failed to set encryption key");
-        goto error;
-    }
+    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+        if (AES_set_encrypt_key(key, nkey * 4, &ctxt->state.aes.key.enc) != 0) {
+            error_setg(errp, "Failed to set encryption key");
+            goto error;
+        }

-    if (AES_set_decrypt_key(key, nkey * 8, &ctxt->state.aes.decrypt_key) != 0) {
-        error_setg(errp, "Failed to set decryption key");
-        goto error;
+        if (AES_set_decrypt_key(key, nkey * 4, &ctxt->state.aes.key.dec) != 0) {
+            error_setg(errp, "Failed to set decryption key");
+            goto error;
+        }
+
+        if (AES_set_encrypt_key(key + (nkey / 2), nkey * 4,
+                                &ctxt->state.aes.key_tweak.enc) != 0) {
+            error_setg(errp, "Failed to set encryption key");
+            goto error;
+        }
+
+        if (AES_set_decrypt_key(key + (nkey / 2), nkey * 4,
+                                &ctxt->state.aes.key_tweak.dec) != 0) {
+            error_setg(errp, "Failed to set decryption key");
+            goto error;
+        }
+    } else {
+        if (AES_set_encrypt_key(key, nkey * 8, &ctxt->state.aes.key.enc) != 0) {
+            error_setg(errp, "Failed to set encryption key");
+            goto error;
+        }
+
+        if (AES_set_decrypt_key(key, nkey * 8, &ctxt->state.aes.key.dec) != 0) {
+            error_setg(errp, "Failed to set decryption key");
+            goto error;
+        }
    }

    ctxt->blocksize = AES_BLOCK_SIZE;
@@ -322,7 +423,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
    cipher->alg = alg;
    cipher->mode = mode;

-    if (!qcrypto_cipher_validate_key_length(alg, nkey, errp)) {
+    if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) {
        goto error;
    }

--- a/crypto/cipher-gcrypt.c
+++ b/crypto/cipher-gcrypt.c
@@ -19,6 +19,8 @@
 */

 #include "qemu/osdep.h"
+#include "crypto/xts.h"
+
 #include <gcrypt.h>


@@ -29,6 +31,12 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
    case QCRYPTO_CIPHER_ALG_AES_128:
    case QCRYPTO_CIPHER_ALG_AES_192:
    case QCRYPTO_CIPHER_ALG_AES_256:
+    case QCRYPTO_CIPHER_ALG_CAST5_128:
+    case QCRYPTO_CIPHER_ALG_SERPENT_128:
+    case QCRYPTO_CIPHER_ALG_SERPENT_192:
+    case QCRYPTO_CIPHER_ALG_SERPENT_256:
+    case QCRYPTO_CIPHER_ALG_TWOFISH_128:
+    case QCRYPTO_CIPHER_ALG_TWOFISH_256:
        return true;
    default:
        return false;
@@ -38,7 +46,9 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
 typedef struct QCryptoCipherGcrypt QCryptoCipherGcrypt;
 struct QCryptoCipherGcrypt {
    gcry_cipher_hd_t handle;
+    gcry_cipher_hd_t tweakhandle;
    size_t blocksize;
+    uint8_t *iv;
 };

 QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
@@ -53,6 +63,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,

    switch (mode) {
    case QCRYPTO_CIPHER_MODE_ECB:
+    case QCRYPTO_CIPHER_MODE_XTS:
        gcrymode = GCRY_CIPHER_MODE_ECB;
        break;
    case QCRYPTO_CIPHER_MODE_CBC:
@@ -63,7 +74,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        return NULL;
    }

-    if (!qcrypto_cipher_validate_key_length(alg, nkey, errp)) {
+    if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) {
        return NULL;
    }

@@ -84,6 +95,30 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        gcryalg = GCRY_CIPHER_AES256;
        break;

+    case QCRYPTO_CIPHER_ALG_CAST5_128:
+        gcryalg = GCRY_CIPHER_CAST5;
+        break;
+
+    case QCRYPTO_CIPHER_ALG_SERPENT_128:
+        gcryalg = GCRY_CIPHER_SERPENT128;
+        break;
+
+    case QCRYPTO_CIPHER_ALG_SERPENT_192:
+        gcryalg = GCRY_CIPHER_SERPENT192;
+        break;
+
+    case QCRYPTO_CIPHER_ALG_SERPENT_256:
+        gcryalg = GCRY_CIPHER_SERPENT256;
+        break;
+
+    case QCRYPTO_CIPHER_ALG_TWOFISH_128:
+        gcryalg = GCRY_CIPHER_TWOFISH128;
+        break;
+
+    case QCRYPTO_CIPHER_ALG_TWOFISH_256:
+        gcryalg = GCRY_CIPHER_TWOFISH;
+        break;
+
    default:
        error_setg(errp, "Unsupported cipher algorithm %d", alg);
        return NULL;
@@ -101,6 +136,14 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
                   gcry_strerror(err));
        goto error;
    }
+    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+        err = gcry_cipher_open(&ctx->tweakhandle, gcryalg, gcrymode, 0);
+        if (err != 0) {
+            error_setg(errp, "Cannot initialize cipher: %s",
+                       gcry_strerror(err));
+            goto error;
+        }
+    }

    if (cipher->alg == QCRYPTO_CIPHER_ALG_DES_RFB) {
        /* We're using standard DES cipher from gcrypt, so we need
@@ -112,13 +155,44 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        g_free(rfbkey);
        ctx->blocksize = 8;
    } else {
-        err = gcry_cipher_setkey(ctx->handle, key, nkey);
-        ctx->blocksize = 16;
+        if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+            nkey /= 2;
+            err = gcry_cipher_setkey(ctx->handle, key, nkey);
+            if (err != 0) {
+                error_setg(errp, "Cannot set key: %s",
+                           gcry_strerror(err));
+                goto error;
+            }
+            err = gcry_cipher_setkey(ctx->tweakhandle, key + nkey, nkey);
+        } else {
+            err = gcry_cipher_setkey(ctx->handle, key, nkey);
+        }
+        if (err != 0) {
+            error_setg(errp, "Cannot set key: %s",
+                       gcry_strerror(err));
+            goto error;
+        }
+        switch (cipher->alg) {
+        case QCRYPTO_CIPHER_ALG_AES_128:
+        case QCRYPTO_CIPHER_ALG_AES_192:
+        case QCRYPTO_CIPHER_ALG_AES_256:
+        case QCRYPTO_CIPHER_ALG_SERPENT_128:
+        case QCRYPTO_CIPHER_ALG_SERPENT_192:
+        case QCRYPTO_CIPHER_ALG_SERPENT_256:
+        case QCRYPTO_CIPHER_ALG_TWOFISH_128:
+        case QCRYPTO_CIPHER_ALG_TWOFISH_256:
+            ctx->blocksize = 16;
+            break;
+        case QCRYPTO_CIPHER_ALG_CAST5_128:
+            ctx->blocksize = 8;
+            break;
+        default:
+            g_assert_not_reached();
+        }
    }
-    if (err != 0) {
-        error_setg(errp, "Cannot set key: %s",
-                   gcry_strerror(err));
-        goto error;
+
+    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+        ctx->iv = g_new0(uint8_t, ctx->blocksize);
    }

    cipher->opaque = ctx;
@@ -126,6 +200,9 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,

 error:
    gcry_cipher_close(ctx->handle);
+    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+        gcry_cipher_close(ctx->tweakhandle);
+    }
    g_free(ctx);
    g_free(cipher);
    return NULL;
@@ -140,11 +217,35 @@ void qcrypto_cipher_free(QCryptoCipher *cipher)
    }
    ctx = cipher->opaque;
    gcry_cipher_close(ctx->handle);
+    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+        gcry_cipher_close(ctx->tweakhandle);
+    }
+    g_free(ctx->iv);
    g_free(ctx);
    g_free(cipher);
 }


+static void qcrypto_gcrypt_xts_encrypt(const void *ctx,
+                                       size_t length,
+                                       uint8_t *dst,
+                                       const uint8_t *src)
+{
+    gcry_error_t err;
+    err = gcry_cipher_encrypt((gcry_cipher_hd_t)ctx, dst, length, src, length);
+    g_assert(err == 0);
+}
+
+static void qcrypto_gcrypt_xts_decrypt(const void *ctx,
+                                       size_t length,
+                                       uint8_t *dst,
+                                       const uint8_t *src)
+{
+    gcry_error_t err;
+    err = gcry_cipher_decrypt((gcry_cipher_hd_t)ctx, dst, length, src, length);
+    g_assert(err == 0);
+}
+
 int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
                           const void *in,
                           void *out,
@@ -160,13 +261,20 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
        return -1;
    }

-    err = gcry_cipher_encrypt(ctx->handle,
-                              out, len,
-                              in, len);
-    if (err != 0) {
-        error_setg(errp, "Cannot encrypt data: %s",
-                   gcry_strerror(err));
-        return -1;
+    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+        xts_encrypt(ctx->handle, ctx->tweakhandle,
+                    qcrypto_gcrypt_xts_encrypt,
+                    qcrypto_gcrypt_xts_decrypt,
+                    ctx->iv, len, out, in);
+    } else {
+        err = gcry_cipher_encrypt(ctx->handle,
+                                  out, len,
+                                  in, len);
+        if (err != 0) {
+            error_setg(errp, "Cannot encrypt data: %s",
+                       gcry_strerror(err));
+            return -1;
+        }
    }

    return 0;
@@ -188,13 +296,20 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
        return -1;
    }

-    err = gcry_cipher_decrypt(ctx->handle,
-                              out, len,
-                              in, len);
-    if (err != 0) {
-        error_setg(errp, "Cannot decrypt data: %s",
-                   gcry_strerror(err));
-        return -1;
+    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+        xts_decrypt(ctx->handle, ctx->tweakhandle,
+                    qcrypto_gcrypt_xts_encrypt,
+                    qcrypto_gcrypt_xts_decrypt,
+                    ctx->iv, len, out, in);
+    } else {
+        err = gcry_cipher_decrypt(ctx->handle,
+                                  out, len,
+                                  in, len);
+        if (err != 0) {
+            error_setg(errp, "Cannot decrypt data: %s",
+                       gcry_strerror(err));
+            return -1;
+        }
    }

    return 0;
@@ -213,12 +328,16 @@ int qcrypto_cipher_setiv(QCryptoCipher *cipher,
        return -1;
    }

-    gcry_cipher_reset(ctx->handle);
-    err = gcry_cipher_setiv(ctx->handle, iv, niv);
-    if (err != 0) {
-        error_setg(errp, "Cannot set IV: %s",
+    if (ctx->iv) {
+        memcpy(ctx->iv, iv, niv);
+    } else {
+        gcry_cipher_reset(ctx->handle);
+        err = gcry_cipher_setiv(ctx->handle, iv, niv);
+        if (err != 0) {
+            error_setg(errp, "Cannot set IV: %s",
                   gcry_strerror(err));
-        return -1;
+            return -1;
+        }
    }

    return 0;
--- a/crypto/cipher-nettle.c
+++ b/crypto/cipher-nettle.c
@@ -19,56 +19,174 @@
 */

 #include "qemu/osdep.h"
+#include "crypto/xts.h"
+
 #include <nettle/nettle-types.h>
 #include <nettle/aes.h>
 #include <nettle/des.h>
 #include <nettle/cbc.h>
+#include <nettle/cast128.h>
+#include <nettle/serpent.h>
+#include <nettle/twofish.h>
+
+typedef void (*QCryptoCipherNettleFuncWrapper)(const void *ctx,
+                                               size_t length,
+                                               uint8_t *dst,
+                                               const uint8_t *src);

 #if CONFIG_NETTLE_VERSION_MAJOR < 3
-typedef nettle_crypt_func nettle_cipher_func;
-
+typedef nettle_crypt_func * QCryptoCipherNettleFuncNative;
 typedef void *       cipher_ctx_t;
 typedef unsigned     cipher_length_t;
+
+#define cast5_set_key cast128_set_key
 #else
+typedef nettle_cipher_func * QCryptoCipherNettleFuncNative;
 typedef const void * cipher_ctx_t;
 typedef size_t       cipher_length_t;
 #endif

-static nettle_cipher_func aes_encrypt_wrapper;
-static nettle_cipher_func aes_decrypt_wrapper;
-static nettle_cipher_func des_encrypt_wrapper;
-static nettle_cipher_func des_decrypt_wrapper;
+typedef struct QCryptoNettleAES {
+    struct aes_ctx enc;
+    struct aes_ctx dec;
+} QCryptoNettleAES;

-static void aes_encrypt_wrapper(cipher_ctx_t ctx, cipher_length_t length,
-                                uint8_t *dst, const uint8_t *src)
+static void aes_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+                               uint8_t *dst, const uint8_t *src)
 {
-    aes_encrypt(ctx, length, dst, src);
+    const QCryptoNettleAES *aesctx = ctx;
+    aes_encrypt(&aesctx->enc, length, dst, src);
 }

-static void aes_decrypt_wrapper(cipher_ctx_t ctx, cipher_length_t length,
-                                uint8_t *dst, const uint8_t *src)
+static void aes_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+                               uint8_t *dst, const uint8_t *src)
 {
-    aes_decrypt(ctx, length, dst, src);
+    const QCryptoNettleAES *aesctx = ctx;
+    aes_decrypt(&aesctx->dec, length, dst, src);
 }

-static void des_encrypt_wrapper(cipher_ctx_t ctx, cipher_length_t length,
+static void des_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+                               uint8_t *dst, const uint8_t *src)
+{
+    des_encrypt(ctx, length, dst, src);
+}
+
+static void des_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+                               uint8_t *dst, const uint8_t *src)
+{
+    des_decrypt(ctx, length, dst, src);
+}
+
+static void cast128_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+                                   uint8_t *dst, const uint8_t *src)
+{
+    cast128_encrypt(ctx, length, dst, src);
+}
+
+static void cast128_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+                                   uint8_t *dst, const uint8_t *src)
+{
+    cast128_decrypt(ctx, length, dst, src);
+}
+
+static void serpent_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+                                   uint8_t *dst, const uint8_t *src)
+{
+    serpent_encrypt(ctx, length, dst, src);
+}
+
+static void serpent_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+                                   uint8_t *dst, const uint8_t *src)
+{
+    serpent_decrypt(ctx, length, dst, src);
+}
+
+static void twofish_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+                                   uint8_t *dst, const uint8_t *src)
+{
+    twofish_encrypt(ctx, length, dst, src);
+}
+
+static void twofish_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+                                   uint8_t *dst, const uint8_t *src)
+{
+    twofish_decrypt(ctx, length, dst, src);
+}
+
+static void aes_encrypt_wrapper(const void *ctx, size_t length,
+                                uint8_t *dst, const uint8_t *src)
+{
+    const QCryptoNettleAES *aesctx = ctx;
+    aes_encrypt(&aesctx->enc, length, dst, src);
+}
+
+static void aes_decrypt_wrapper(const void *ctx, size_t length,
+                                uint8_t *dst, const uint8_t *src)
+{
+    const QCryptoNettleAES *aesctx = ctx;
+    aes_decrypt(&aesctx->dec, length, dst, src);
+}
+
+static void des_encrypt_wrapper(const void *ctx, size_t length,
                                uint8_t *dst, const uint8_t *src)
 {
    des_encrypt(ctx, length, dst, src);
 }

-static void des_decrypt_wrapper(cipher_ctx_t ctx, cipher_length_t length,
+static void des_decrypt_wrapper(const void *ctx, size_t length,
                                uint8_t *dst, const uint8_t *src)
 {
    des_decrypt(ctx, length, dst, src);
 }

+static void cast128_encrypt_wrapper(const void *ctx, size_t length,
+                                    uint8_t *dst, const uint8_t *src)
+{
+    cast128_encrypt(ctx, length, dst, src);
+}
+
+static void cast128_decrypt_wrapper(const void *ctx, size_t length,
+                                    uint8_t *dst, const uint8_t *src)
+{
+    cast128_decrypt(ctx, length, dst, src);
+}
+
+static void serpent_encrypt_wrapper(const void *ctx, size_t length,
+                                    uint8_t *dst, const uint8_t *src)
+{
+    serpent_encrypt(ctx, length, dst, src);
+}
+
+static void serpent_decrypt_wrapper(const void *ctx, size_t length,
+                                    uint8_t *dst, const uint8_t *src)
+{
+    serpent_decrypt(ctx, length, dst, src);
+}
+
+static void twofish_encrypt_wrapper(const void *ctx, size_t length,
+                                    uint8_t *dst, const uint8_t *src)
+{
+    twofish_encrypt(ctx, length, dst, src);
+}
+
+static void twofish_decrypt_wrapper(const void *ctx, size_t length,
+                                    uint8_t *dst, const uint8_t *src)
+{
+    twofish_decrypt(ctx, length, dst, src);
+}
+
 typedef struct QCryptoCipherNettle QCryptoCipherNettle;
 struct QCryptoCipherNettle {
-    void *ctx_encrypt;
-    void *ctx_decrypt;
-    nettle_cipher_func *alg_encrypt;
-    nettle_cipher_func *alg_decrypt;
+    /* Primary cipher context for all modes */
+    void *ctx;
+    /* Second cipher context for XTS mode only */
+    void *ctx_tweak;
+    /* Cipher callbacks for both contexts */
+    QCryptoCipherNettleFuncNative alg_encrypt_native;
+    QCryptoCipherNettleFuncNative alg_decrypt_native;
+    QCryptoCipherNettleFuncWrapper alg_encrypt_wrapper;
+    QCryptoCipherNettleFuncWrapper alg_decrypt_wrapper;
+
    uint8_t *iv;
    size_t blocksize;
 };
@@ -80,6 +198,13 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
    case QCRYPTO_CIPHER_ALG_AES_128:
    case QCRYPTO_CIPHER_ALG_AES_192:
    case QCRYPTO_CIPHER_ALG_AES_256:
+    case QCRYPTO_CIPHER_ALG_CAST5_128:
+    case QCRYPTO_CIPHER_ALG_SERPENT_128:
+    case QCRYPTO_CIPHER_ALG_SERPENT_192:
+    case QCRYPTO_CIPHER_ALG_SERPENT_256:
+    case QCRYPTO_CIPHER_ALG_TWOFISH_128:
+    case QCRYPTO_CIPHER_ALG_TWOFISH_192:
+    case QCRYPTO_CIPHER_ALG_TWOFISH_256:
        return true;
    default:
        return false;
@@ -99,13 +224,14 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
    switch (mode) {
    case QCRYPTO_CIPHER_MODE_ECB:
    case QCRYPTO_CIPHER_MODE_CBC:
+    case QCRYPTO_CIPHER_MODE_XTS:
        break;
    default:
        error_setg(errp, "Unsupported cipher mode %d", mode);
        return NULL;
    }

-    if (!qcrypto_cipher_validate_key_length(alg, nkey, errp)) {
+    if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) {
        return NULL;
    }

@@ -117,14 +243,15 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,

    switch (alg) {
    case QCRYPTO_CIPHER_ALG_DES_RFB:
-        ctx->ctx_encrypt = g_new0(struct des_ctx, 1);
-        ctx->ctx_decrypt = NULL; /* 1 ctx can do both */
+        ctx->ctx = g_new0(struct des_ctx, 1);
        rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey);
-        des_set_key(ctx->ctx_encrypt, rfbkey);
+        des_set_key(ctx->ctx, rfbkey);
        g_free(rfbkey);

-        ctx->alg_encrypt = des_encrypt_wrapper;
-        ctx->alg_decrypt = des_decrypt_wrapper;
+        ctx->alg_encrypt_native = des_encrypt_native;
+        ctx->alg_decrypt_native = des_decrypt_native;
+        ctx->alg_encrypt_wrapper = des_encrypt_wrapper;
+        ctx->alg_decrypt_wrapper = des_decrypt_wrapper;

        ctx->blocksize = DES_BLOCK_SIZE;
        break;
@@ -132,17 +259,103 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
    case QCRYPTO_CIPHER_ALG_AES_128:
    case QCRYPTO_CIPHER_ALG_AES_192:
    case QCRYPTO_CIPHER_ALG_AES_256:
-        ctx->ctx_encrypt = g_new0(struct aes_ctx, 1);
-        ctx->ctx_decrypt = g_new0(struct aes_ctx, 1);
+        ctx->ctx = g_new0(QCryptoNettleAES, 1);

-        aes_set_encrypt_key(ctx->ctx_encrypt, nkey, key);
-        aes_set_decrypt_key(ctx->ctx_decrypt, nkey, key);
+        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+            ctx->ctx_tweak = g_new0(QCryptoNettleAES, 1);

-        ctx->alg_encrypt = aes_encrypt_wrapper;
-        ctx->alg_decrypt = aes_decrypt_wrapper;
+            nkey /= 2;
+            aes_set_encrypt_key(&((QCryptoNettleAES *)ctx->ctx)->enc,
+                                nkey, key);
+            aes_set_decrypt_key(&((QCryptoNettleAES *)ctx->ctx)->dec,
+                                nkey, key);
+
+            aes_set_encrypt_key(&((QCryptoNettleAES *)ctx->ctx_tweak)->enc,
+                                nkey, key + nkey);
+            aes_set_decrypt_key(&((QCryptoNettleAES *)ctx->ctx_tweak)->dec,
+                                nkey, key + nkey);
+        } else {
+            aes_set_encrypt_key(&((QCryptoNettleAES *)ctx->ctx)->enc,
+                                nkey, key);
+            aes_set_decrypt_key(&((QCryptoNettleAES *)ctx->ctx)->dec,
+                                nkey, key);
+        }
+
+        ctx->alg_encrypt_native = aes_encrypt_native;
+        ctx->alg_decrypt_native = aes_decrypt_native;
+        ctx->alg_encrypt_wrapper = aes_encrypt_wrapper;
+        ctx->alg_decrypt_wrapper = aes_decrypt_wrapper;

        ctx->blocksize = AES_BLOCK_SIZE;
        break;
+
+    case QCRYPTO_CIPHER_ALG_CAST5_128:
+        ctx->ctx = g_new0(struct cast128_ctx, 1);
+
+        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+            ctx->ctx_tweak = g_new0(struct cast128_ctx, 1);
+
+            nkey /= 2;
+            cast5_set_key(ctx->ctx, nkey, key);
+            cast5_set_key(ctx->ctx_tweak, nkey, key + nkey);
+        } else {
+            cast5_set_key(ctx->ctx, nkey, key);
+        }
+
+        ctx->alg_encrypt_native = cast128_encrypt_native;
+        ctx->alg_decrypt_native = cast128_decrypt_native;
+        ctx->alg_encrypt_wrapper = cast128_encrypt_wrapper;
+        ctx->alg_decrypt_wrapper = cast128_decrypt_wrapper;
+
+        ctx->blocksize = CAST128_BLOCK_SIZE;
+        break;
+
+    case QCRYPTO_CIPHER_ALG_SERPENT_128:
+    case QCRYPTO_CIPHER_ALG_SERPENT_192:
+    case QCRYPTO_CIPHER_ALG_SERPENT_256:
+        ctx->ctx = g_new0(struct serpent_ctx, 1);
+
+        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+            ctx->ctx_tweak = g_new0(struct serpent_ctx, 1);
+
+            nkey /= 2;
+            serpent_set_key(ctx->ctx, nkey, key);
+            serpent_set_key(ctx->ctx_tweak, nkey, key + nkey);
+        } else {
+            serpent_set_key(ctx->ctx, nkey, key);
+        }
+
+        ctx->alg_encrypt_native = serpent_encrypt_native;
+        ctx->alg_decrypt_native = serpent_decrypt_native;
+        ctx->alg_encrypt_wrapper = serpent_encrypt_wrapper;
+        ctx->alg_decrypt_wrapper = serpent_decrypt_wrapper;
+
+        ctx->blocksize = SERPENT_BLOCK_SIZE;
+        break;
+
+    case QCRYPTO_CIPHER_ALG_TWOFISH_128:
+    case QCRYPTO_CIPHER_ALG_TWOFISH_192:
+    case QCRYPTO_CIPHER_ALG_TWOFISH_256:
+        ctx->ctx = g_new0(struct twofish_ctx, 1);
+
+        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+            ctx->ctx_tweak = g_new0(struct twofish_ctx, 1);
+
+            nkey /= 2;
+            twofish_set_key(ctx->ctx, nkey, key);
+            twofish_set_key(ctx->ctx_tweak, nkey, key + nkey);
+        } else {
+            twofish_set_key(ctx->ctx, nkey, key);
+        }
+
+        ctx->alg_encrypt_native = twofish_encrypt_native;
+        ctx->alg_decrypt_native = twofish_decrypt_native;
+        ctx->alg_encrypt_wrapper = twofish_encrypt_wrapper;
+        ctx->alg_decrypt_wrapper = twofish_decrypt_wrapper;
+
+        ctx->blocksize = TWOFISH_BLOCK_SIZE;
+        break;
+
    default:
        error_setg(errp, "Unsupported cipher algorithm %d", alg);
        goto error;
@@ -170,8 +383,8 @@ void qcrypto_cipher_free(QCryptoCipher *cipher)

    ctx = cipher->opaque;
    g_free(ctx->iv);
-    g_free(ctx->ctx_encrypt);
-    g_free(ctx->ctx_decrypt);
+    g_free(ctx->ctx);
+    g_free(ctx->ctx_tweak);
    g_free(ctx);
    g_free(cipher);
 }
@@ -193,14 +406,21 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,

    switch (cipher->mode) {
    case QCRYPTO_CIPHER_MODE_ECB:
-        ctx->alg_encrypt(ctx->ctx_encrypt, len, out, in);
+        ctx->alg_encrypt_wrapper(ctx->ctx, len, out, in);
        break;

    case QCRYPTO_CIPHER_MODE_CBC:
-        cbc_encrypt(ctx->ctx_encrypt, ctx->alg_encrypt,
+        cbc_encrypt(ctx->ctx, ctx->alg_encrypt_native,
                    ctx->blocksize, ctx->iv,
                    len, out, in);
        break;
+
+    case QCRYPTO_CIPHER_MODE_XTS:
+        xts_encrypt(ctx->ctx, ctx->ctx_tweak,
+                    ctx->alg_encrypt_wrapper, ctx->alg_encrypt_wrapper,
+                    ctx->iv, len, out, in);
+        break;
+
    default:
        error_setg(errp, "Unsupported cipher algorithm %d",
                   cipher->alg);
@@ -226,15 +446,26 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,

    switch (cipher->mode) {
    case QCRYPTO_CIPHER_MODE_ECB:
-        ctx->alg_decrypt(ctx->ctx_decrypt ? ctx->ctx_decrypt : ctx->ctx_encrypt,
-                         len, out, in);
+        ctx->alg_decrypt_wrapper(ctx->ctx, len, out, in);
        break;

    case QCRYPTO_CIPHER_MODE_CBC:
-        cbc_decrypt(ctx->ctx_decrypt ? ctx->ctx_decrypt : ctx->ctx_encrypt,
-                    ctx->alg_decrypt, ctx->blocksize, ctx->iv,
+        cbc_decrypt(ctx->ctx, ctx->alg_decrypt_native,
+                    ctx->blocksize, ctx->iv,
                    len, out, in);
        break;
+
+    case QCRYPTO_CIPHER_MODE_XTS:
+        if (ctx->blocksize != XTS_BLOCK_SIZE) {
+            error_setg(errp, "Block size must be %d not %zu",
+                       XTS_BLOCK_SIZE, ctx->blocksize);
+            return -1;
+        }
+        xts_decrypt(ctx->ctx, ctx->ctx_tweak,
+                    ctx->alg_encrypt_wrapper, ctx->alg_decrypt_wrapper,
+                    ctx->iv, len, out, in);
+        break;
+
    default:
        error_setg(errp, "Unsupported cipher algorithm %d",
                   cipher->alg);
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -19,6 +19,7 @@
 */

 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "crypto/cipher.h"


@@ -27,6 +28,13 @@ static size_t alg_key_len[QCRYPTO_CIPHER_ALG__MAX] = {
    [QCRYPTO_CIPHER_ALG_AES_192] = 24,
    [QCRYPTO_CIPHER_ALG_AES_256] = 32,
    [QCRYPTO_CIPHER_ALG_DES_RFB] = 8,
+    [QCRYPTO_CIPHER_ALG_CAST5_128] = 16,
+    [QCRYPTO_CIPHER_ALG_SERPENT_128] = 16,
+    [QCRYPTO_CIPHER_ALG_SERPENT_192] = 24,
+    [QCRYPTO_CIPHER_ALG_SERPENT_256] = 32,
+    [QCRYPTO_CIPHER_ALG_TWOFISH_128] = 16,
+    [QCRYPTO_CIPHER_ALG_TWOFISH_192] = 24,
+    [QCRYPTO_CIPHER_ALG_TWOFISH_256] = 32,
 };

 static size_t alg_block_len[QCRYPTO_CIPHER_ALG__MAX] = {
@@ -34,11 +42,19 @@ static size_t alg_block_len[QCRYPTO_CIPHER_ALG__MAX] = {
    [QCRYPTO_CIPHER_ALG_AES_192] = 16,
    [QCRYPTO_CIPHER_ALG_AES_256] = 16,
    [QCRYPTO_CIPHER_ALG_DES_RFB] = 8,
+    [QCRYPTO_CIPHER_ALG_CAST5_128] = 8,
+    [QCRYPTO_CIPHER_ALG_SERPENT_128] = 16,
+    [QCRYPTO_CIPHER_ALG_SERPENT_192] = 16,
+    [QCRYPTO_CIPHER_ALG_SERPENT_256] = 16,
+    [QCRYPTO_CIPHER_ALG_TWOFISH_128] = 16,
+    [QCRYPTO_CIPHER_ALG_TWOFISH_192] = 16,
+    [QCRYPTO_CIPHER_ALG_TWOFISH_256] = 16,
 };

 static bool mode_need_iv[QCRYPTO_CIPHER_MODE__MAX] = {
    [QCRYPTO_CIPHER_MODE_ECB] = false,
    [QCRYPTO_CIPHER_MODE_CBC] = true,
+    [QCRYPTO_CIPHER_MODE_XTS] = true,
 };


@@ -79,6 +95,7 @@ size_t qcrypto_cipher_get_iv_len(QCryptoCipherAlgorithm alg,

 static bool
 qcrypto_cipher_validate_key_length(QCryptoCipherAlgorithm alg,
+                                   QCryptoCipherMode mode,
                                   size_t nkey,
                                   Error **errp)
 {
@@ -88,10 +105,27 @@ qcrypto_cipher_validate_key_length(QCryptoCipherAlgorithm alg,
        return false;
    }

-    if (alg_key_len[alg] != nkey) {
-        error_setg(errp, "Cipher key length %zu should be %zu",
-                   nkey, alg_key_len[alg]);
-        return false;
+    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+        if (alg == QCRYPTO_CIPHER_ALG_DES_RFB) {
+            error_setg(errp, "XTS mode not compatible with DES-RFB");
+            return false;
+        }
+        if (nkey % 2) {
+            error_setg(errp, "XTS cipher key length should be a multiple of 2");
+            return false;
+        }
+
+        if (alg_key_len[alg] != (nkey / 2)) {
+            error_setg(errp, "Cipher key length %zu should be %zu",
+                       nkey, alg_key_len[alg] * 2);
+            return false;
+        }
+    } else {
+        if (alg_key_len[alg] != nkey) {
+            error_setg(errp, "Cipher key length %zu should be %zu",
+                       nkey, alg_key_len[alg]);
+            return false;
+        }
    }
    return true;
 }
--- a/crypto/hash.c
+++ b/crypto/hash.c
@@ -19,6 +19,7 @@
 */

 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "crypto/hash.h"

 #ifdef CONFIG_GNUTLS_HASH
--- a/crypto/init.c
+++ b/crypto/init.c
@@ -20,6 +20,7 @@

 #include "qemu/osdep.h"
 #include "crypto/init.h"
+#include "qapi/error.h"
 #include "qemu/thread.h"

 #ifdef CONFIG_GNUTLS
--- a/crypto/ivgen-essiv.c
+++ b/crypto/ivgen-essiv.c
@@ -0,0 +1,120 @@
+/*
+ * QEMU Crypto block IV generator - essiv
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/bswap.h"
+#include "crypto/ivgen-essiv.h"
+
+typedef struct QCryptoIVGenESSIV QCryptoIVGenESSIV;
+struct QCryptoIVGenESSIV {
+    QCryptoCipher *cipher;
+};
+
+static int qcrypto_ivgen_essiv_init(QCryptoIVGen *ivgen,
+                                    const uint8_t *key, size_t nkey,
+                                    Error **errp)
+{
+    uint8_t *salt;
+    size_t nhash;
+    size_t nsalt;
+    QCryptoIVGenESSIV *essiv = g_new0(QCryptoIVGenESSIV, 1);
+
+    /* Not necessarily the same as nkey */
+    nsalt = qcrypto_cipher_get_key_len(ivgen->cipher);
+
+    nhash = qcrypto_hash_digest_len(ivgen->hash);
+    /* Salt must be larger of hash size or key size */
+    salt = g_new0(uint8_t, MAX(nhash, nsalt));
+
+    if (qcrypto_hash_bytes(ivgen->hash, (const gchar *)key, nkey,
+                           &salt, &nhash,
+                           errp) < 0) {
+        g_free(essiv);
+        return -1;
+    }
+
+    /* Now potentially truncate salt to match cipher key len */
+    essiv->cipher = qcrypto_cipher_new(ivgen->cipher,
+                                       QCRYPTO_CIPHER_MODE_ECB,
+                                       salt, MIN(nhash, nsalt),
+                                       errp);
+    if (!essiv->cipher) {
+        g_free(essiv);
+        g_free(salt);
+        return -1;
+    }
+
+    g_free(salt);
+    ivgen->private = essiv;
+
+    return 0;
+}
+
+static int qcrypto_ivgen_essiv_calculate(QCryptoIVGen *ivgen,
+                                         uint64_t sector,
+                                         uint8_t *iv, size_t niv,
+                                         Error **errp)
+{
+    QCryptoIVGenESSIV *essiv = ivgen->private;
+    size_t ndata = qcrypto_cipher_get_block_len(ivgen->cipher);
+    uint8_t *data = g_new(uint8_t, ndata);
+
+    sector = cpu_to_le64(sector);
+    memcpy(data, (uint8_t *)&sector, ndata);
+    if (sizeof(sector) < ndata) {
+        memset(data + sizeof(sector), 0, ndata - sizeof(sector));
+    }
+
+    if (qcrypto_cipher_encrypt(essiv->cipher,
+                               data,
+                               data,
+                               ndata,
+                               errp) < 0) {
+        g_free(data);
+        return -1;
+    }
+
+    if (ndata > niv) {
+        ndata = niv;
+    }
+    memcpy(iv, data, ndata);
+    if (ndata < niv) {
+        memset(iv + ndata, 0, niv - ndata);
+    }
+    g_free(data);
+    return 0;
+}
+
+static void qcrypto_ivgen_essiv_cleanup(QCryptoIVGen *ivgen)
+{
+    QCryptoIVGenESSIV *essiv = ivgen->private;
+
+    qcrypto_cipher_free(essiv->cipher);
+    g_free(essiv);
+}
+
+
+struct QCryptoIVGenDriver qcrypto_ivgen_essiv = {
+    .init = qcrypto_ivgen_essiv_init,
+    .calculate = qcrypto_ivgen_essiv_calculate,
+    .cleanup = qcrypto_ivgen_essiv_cleanup,
+};
+
--- a/crypto/ivgen-essiv.h
+++ b/crypto/ivgen-essiv.h
@@ -0,0 +1,28 @@
+/*
+ * QEMU Crypto block IV generator - essiv
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "crypto/ivgenpriv.h"
+
+#ifndef QCRYPTO_IVGEN_ESSIV_H__
+#define QCRYPTO_IVGEN_ESSIV_H__
+
+extern struct QCryptoIVGenDriver qcrypto_ivgen_essiv;
+
+#endif /* QCRYPTO_IVGEN_ESSIV_H__ */
--- a/crypto/ivgen-plain.c
+++ b/crypto/ivgen-plain.c
@@ -0,0 +1,61 @@
+/*
+ * QEMU Crypto block IV generator - plain
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/bswap.h"
+#include "crypto/ivgen-plain.h"
+
+static int qcrypto_ivgen_plain_init(QCryptoIVGen *ivgen,
+                                    const uint8_t *key, size_t nkey,
+                                    Error **errp)
+{
+    return 0;
+}
+
+static int qcrypto_ivgen_plain_calculate(QCryptoIVGen *ivgen,
+                                         uint64_t sector,
+                                         uint8_t *iv, size_t niv,
+                                         Error **errp)
+{
+    size_t ivprefix;
+    uint32_t shortsector = cpu_to_le32((sector & 0xffffffff));
+    ivprefix = sizeof(shortsector);
+    if (ivprefix > niv) {
+        ivprefix = niv;
+    }
+    memcpy(iv, &shortsector, ivprefix);
+    if (ivprefix < niv) {
+        memset(iv + ivprefix, 0, niv - ivprefix);
+    }
+    return 0;
+}
+
+static void qcrypto_ivgen_plain_cleanup(QCryptoIVGen *ivgen)
+{
+}
+
+
+struct QCryptoIVGenDriver qcrypto_ivgen_plain = {
+    .init = qcrypto_ivgen_plain_init,
+    .calculate = qcrypto_ivgen_plain_calculate,
+    .cleanup = qcrypto_ivgen_plain_cleanup,
+};
+
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .5.50
 .5.94