vnc: fix qemu crash because of SIGSEGV

The backtrace is: 0x00007f0b75cdf880 in pixman_image_get_stride () from /lib64/libpixman-1.so.0 0x00007f0b77bcb3cf in vnc_server_fb_stride (vd=0x7f0b7a1a2bb0) at ui/vnc.c:680 vnc_dpy_copy (dcl=0x7f0b7a1a2c00, src_x=224, src_y=263, dst_x=319, dst_y=363, w=1, h=1) at ui/vnc.c:915 0x00007f0b77bbcc35 in dpy_gfx_copy (con=0x7f0b7a146210, src_x=src_x@entry=224, src_y=src_y@entry=263, dst_x=dst_x@entry=319, dst_y=dst_y@entry=363, w=1, h=1) at ui/console.c:1575 0x00007f0b77bbda4e in qemu_console_copy (con=<optimized out>, src_x=src_x@entry=224, src_y=src_y@entry=263, dst_x=dst_x@entry=319, dst_y=dst_y@entry=363, w=<optimized out>, h=<optimized out>) at ui/console.c:2111 0x00007f0b77ac0980 in cirrus_do_copy (h=<optimized out>, w=<optimized out>, src=<optimized out>, dst=<optimized out>, s=0x7f0b7b086090) at hw/display/cirrus_vga.c:774 cirrus_bitblt_videotovideo_copy (s=0x7f0b7b086090) at hw/display/cirrus_vga.c:793 cirrus_bitblt_videotovideo (s=0x7f0b7b086090) at hw/display/cirrus_vga.c:915 cirrus_bitblt_start (s=0x7f0b7b086090) at hw/display/cirrus_vga.c:1056 0x00007f0b77965cfb in memory_region_write_accessor (mr=0x7f0b7b096e40, addr=320, value=<optimized out>, size=1, shift=<optimized out>,mask=<optimized out>, attrs=...) at /root/rpmbuild/BUILD/master/qemu/memory.c:525 0x00007f0b77963f59 in access_with_adjusted_size (addr=addr@entry=320, value=value@entry=0x7f0b69a268d8, size=size@entry=4, access_size_min=<optimized out>, access_size_max=<optimized out>, access=access@entry=0x7f0b77965c80 <memory_region_write_accessor>, mr=mr@entry=0x7f0b7b096e40, attrs=attrs@entry=...) at /root/rpmbuild/BUILD/master/qemu/memory.c:591 0x00007f0b77968315 in memory_region_dispatch_write (mr=mr@entry=0x7f0b7b096e40, addr=addr@entry=320, data=18446744073709551362, size=size@entry=4, attrs=attrs@entry=...) at /root/rpmbuild/BUILD/master/qemu/memory.c:1262 0x00007f0b779256a9 in address_space_write_continue (mr=0x7f0b7b096e40, l=4, addr1=320, len=4, buf=0x7f0b77713028 "\002\377\377\377", attrs=..., addr=4273930560, as=0x7f0b7827d280 <address_space_memory>) at /root/rpmbuild/BUILD/master/qemu/exec.c:2544 address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=<optimized out>, len=<optimized out>) at /root/rpmbuild/BUILD/master/qemu/exec.c:2601 0x00007f0b77925c1d in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=..., attrs@entry=..., buf=buf@entry=0x7f0b77713028 "\002\377\377\377", len=<optimized out>, is_write=<optimized out>) at /root/rpmbuild/BUILD/master/qemu/exec.c:2703 0x00007f0b77962f53 in kvm_cpu_exec (cpu=cpu@entry=0x7f0b79fcc2d0) at /root/rpmbuild/BUILD/master/qemu/kvm-all.c:1965 0x00007f0b77950cc6 in qemu_kvm_cpu_thread_fn (arg=0x7f0b79fcc2d0) at /root/rpmbuild/BUILD/master/qemu/cpus.c:1078 0x00007f0b744b3dc5 in start_thread (arg=0x7f0b69a27700) at pthread_create.c:308 0x00007f0b70d3d66d in clone () from /lib64/libc.so.6 The code path while meeting segfault: vnc_dpy_copy vnc_update_client vnc_disconnect_finish [while vnc_disconnect_start() is invoked because somethins wrong] vnc_update_server_surface vd->server = NULL; vnc_server_fb_stride pixman_image_get_stride(vd->server) Let's add a non-NULL check before calling vnc_server_fb_stride() to avoid segmentation fault. Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Daniel P. Berrange <berrange@redhat.com> Reported-by: Yanying Zhuang <ann.zhuangyanying@huawei.com> Signed-off-by: Gonglei <arei.gonglei@huawei.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-id: 1472788698-120964-1-git-send-email-arei.gonglei@huawei.com Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
qemu-options.hx: correct spice options streaming-video default document value to 'off'
2016-09-13 08:01:39 +02:00 · 2016-09-13 08:01:39 +02:00 · 2016-09-13 08:01:39 +02:00 · 2016-09-13 08:01:39 +02:00 · 2016-09-12 15:09:47 +01:00 · 2016-09-12 12:48:47 +01:00
1357 changed files with 39509 additions and 16754 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -95,6 +95,10 @@
 /pc-bios/optionrom/linuxboot.bin
 /pc-bios/optionrom/linuxboot.raw
 /pc-bios/optionrom/linuxboot.img
+/pc-bios/optionrom/linuxboot_dma.asm
+/pc-bios/optionrom/linuxboot_dma.bin
+/pc-bios/optionrom/linuxboot_dma.raw
+/pc-bios/optionrom/linuxboot_dma.img
 /pc-bios/optionrom/multiboot.asm
 /pc-bios/optionrom/multiboot.bin
 /pc-bios/optionrom/multiboot.raw
--- a/.travis.yml
+++ b/.travis.yml
@@ -34,10 +34,13 @@ addons:
      - sparse
      - uuid-dev

+# The channel name "irc.oftc.net#qemu" is encrypted against qemu/qemu
+# to prevent IRC notifications from forks. This was created using:
+# $ travis encrypt -r "qemu/qemu" "irc.oftc.net#qemu"
 notifications:
  irc:
    channels:
-      - "irc.oftc.net#qemu"
+      - secure: "F7GDRgjuOo5IUyRLqSkmDL7kvdU4UcH3Lm/W2db2JnDHTGCqgEdaYEYKciyCLZ57vOTsTsOgesN8iUT7hNHBd1KWKjZe9KDTZWppWRYVwAwQMzVeSOsbbU4tRoJ6Pp+3qhH1Z0eGYR9ZgKYAoTumDFgSAYRp4IscKS8jkoedOqM="
    on_success: change
    on_failure: always
 env:
--- a/8
+++ b/8
@@ -31,7 +31,11 @@ Do not leave whitespace dangling off the ends of lines.

 2. Line width

-Lines are 80 characters; not longer.
+Lines should be 80 characters; try not to make them longer.
+
+Sometimes it is hard to do, especially when dealing with QEMU subsystems
+that use long function or symbol names.  Even in that case, do not make
+lines much longer than 80 characters.

 Rationale:
 - Some people like to tile their 24" screens with a 6x4 matrix of 80x24
@@ -39,6 +43,8 @@ Rationale:
   let them keep doing it.
 - Code and especially patches is much more readable if limited to a sane
   line length.  Eighty is traditional.
+ - The four-space indentation makes the most common excuse ("But look
+   at all that white space on the left!") moot.
 - It is the QEMU coding style.

 3. Naming
--- a/4
+++ b/4
@@ -158,6 +158,10 @@ painful. These are:
 * you may assume that right shift of a signed integer duplicates
   the sign bit (ie it is an arithmetic shift, not a logical shift)

+In addition, QEMU assumes that the compiler does not use the latitude
+given in C99 and C11 to treat aspects of signed '<<' as undefined, as
+documented in the GNU Compiler Collection manual starting at version 4.0.
+
 7. Error handling and reporting

 7.1 Reporting errors to the human user
--- a/39
+++ b/39
@@ -449,23 +449,23 @@ S: Maintained
 F: hw/*/versatile*

 Xilinx Zynq
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 M: Alistair Francis <alistair.francis@xilinx.com>
-M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
 L: qemu-arm@nongnu.org
 S: Maintained
-F: hw/arm/xilinx_zynq.c
-F: hw/misc/zynq_slcr.c
+F: hw/*/xilinx_*
 F: hw/*/cadence_*
-F: hw/ssi/xilinx_spips.c
+F: hw/misc/zynq_slcr.c
+F: include/hw/xilinx.h
+X: hw/ssi/xilinx_*

 Xilinx ZynqMP
 M: Alistair Francis <alistair.francis@xilinx.com>
-M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 L: qemu-arm@nongnu.org
 S: Maintained
-F: hw/arm/xlnx-zynqmp.c
-F: hw/arm/xlnx-ep108.c
-F: include/hw/arm/xlnx-zynqmp.h
+F: hw/*/xlnx*.c
+F: include/hw/*/xlnx*.c

 ARM ACPI Subsystem
 M: Shannon Zhao <zhaoshenglong@huawei.com>
@@ -948,14 +948,6 @@ S: Supported
 F: hw/scsi/megasas.c
 F: hw/scsi/mfi.h

-Xilinx EDK
-M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
-M: Alistair Francis <alistair.francis@xilinx.com>
-M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
-S: Maintained
-F: hw/*/xilinx_*
-F: include/hw/xilinx.h
-
 Network packet abstractions
 M: Dmitry Fleytman <dmitry@daynix.com>
 S: Maintained
@@ -971,7 +963,6 @@ F: hw/net/vmxnet*
 F: hw/scsi/vmw_pvscsi*

 Rocker
-M: Scott Feldman <sfeldma@gmail.com>
 M: Jiri Pirko <jiri@resnulli.us>
 S: Maintained
 F: hw/net/rocker/
@@ -1027,6 +1018,7 @@ F: async.c
 F: aio-*.c
 F: block/io.c
 F: migration/block*
+F: include/block/aio.h
 T: git git://github.com/stefanha/qemu.git block

 Block Jobs
@@ -1177,6 +1169,13 @@ F: numa.c
 F: include/sysemu/numa.h
 T: git git://github.com/ehabkost/qemu.git numa

+Host Memory Backends
+M: Eduardo Habkost <ehabkost@redhat.com>
+M: Igor Mammedov <imammedo@redhat.com>
+S: Maintained
+F: backends/hostmem*.c
+F: include/sysemu/hostmem.h
+
 QAPI
 M: Markus Armbruster <armbru@redhat.com>
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
@@ -1243,6 +1242,12 @@ F: docs/*qmp-*
 F: scripts/qmp/
 T: git git://repo.or.cz/qemu/armbru.git qapi-next

+Register API
+M: Alistair Francis <alistair.francis@xilinx.com>
+S: Maintained
+F: hw/core/register.c
+F: include/hw/register.h
+
 SLIRP
 M: Samuel Thibault <samuel.thibault@ens-lyon.org>
 M: Jan Kiszka <jan.kiszka@siemens.com>
--- a/13
+++ b/13
@@ -30,7 +30,7 @@ CONFIG_ALL=y
 -include config-all-devices.mak
 -include config-all-disas.mak

-config-host.mak: $(SRC_PATH)/configure
+config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/pc-bios
 	@echo $@ is out-of-date, running configure
 	@# TODO: The next lines include code which supports a smooth
 	@# transition from old configurations without config.status.
@@ -185,7 +185,7 @@ qemu-version.h: FORCE
 				printf '""\n'; \
 			fi; \
 		fi) > $@.tmp)
-	$(call quiet-command, cmp --quiet $@ $@.tmp || mv $@.tmp $@)
+	$(call quiet-command, cmp -s $@ $@.tmp || mv $@.tmp $@)

 config-host.h: config-host.h-timestamp
 config-host.h-timestamp: config-host.mak
@@ -225,8 +225,9 @@ dtc/%:
 $(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))

 ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
+# Only keep -O and -g cflags
 romsubdir-%:
-	$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C pc-bios/$* V="$(V)" TARGET_DIR="$*/",)
+	$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C pc-bios/$* V="$(V)" TARGET_DIR="$*/" CFLAGS="$(filter -O% -g%,$(CFLAGS))",)

 ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))

@@ -408,8 +409,7 @@ common  de-ch  es     fo  fr-ca  hu     ja  mk  nl-be      pt  sl     tr \
 bepo    cz

 ifdef INSTALL_BLOBS
-BLOBS=bios.bin bios-256k.bin bios-fast.bin \
-sgabios.bin vgabios.bin vgabios-cirrus.bin \
+BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
 vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin vgabios-virtio.bin \
 acpi-dsdt.aml \
 ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
@@ -417,9 +417,10 @@ pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
 pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
 efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \
 efi-pcnet.rom efi-rtl8139.rom efi-virtio.rom \
+efi-e1000e.rom efi-vmxnet3.rom \
 qemu-icon.bmp qemu_logo_no_text.svg \
 bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
-multiboot.bin linuxboot.bin kvmvapic.bin \
+multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
 s390-ccw.img \
 spapr-rtas.bin slof.bin \
 palcode-clipper \
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -153,6 +153,7 @@ trace-events-y += hw/alpha/trace-events
 trace-events-y += ui/trace-events
 trace-events-y += audio/trace-events
 trace-events-y += net/trace-events
+trace-events-y += target-i386/trace-events
 trace-events-y += target-sparc/trace-events
 trace-events-y += target-s390x/trace-events
 trace-events-y += target-ppc/trace-events
--- a/Makefile.target
+++ b/Makefile.target
@@ -212,7 +212,7 @@ hmp-commands-info.h: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxtool
 qmp-commands-old.h: $(SRC_PATH)/qmp-commands.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")

-clean:
+clean: clean-target
 	rm -f *.a *~ $(PROGS)
 	rm -f $(shell find . -name '*.[od]')
 	rm -f hmp-commands.h qmp-commands-old.h gdbstub-xml.c
--- a/2
+++ b/2
@@ -1 +1 @@
-2.6.50
+2.7.50
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -485,12 +485,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
    return progress;
 }

-void aio_context_setup(AioContext *ctx, Error **errp)
+void aio_context_setup(AioContext *ctx)
 {
 #ifdef CONFIG_EPOLL_CREATE1
    assert(!ctx->epollfd);
    ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
    if (ctx->epollfd == -1) {
+        fprintf(stderr, "Failed to create epoll instance: %s", strerror(errno));
        ctx->epoll_available = false;
    } else {
        ctx->epoll_available = true;
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -371,6 +371,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
    return progress;
 }

-void aio_context_setup(AioContext *ctx, Error **errp)
+void aio_context_setup(AioContext *ctx)
 {
 }
--- a/async.c
+++ b/async.c
@@ -29,6 +29,7 @@
 #include "block/thread-pool.h"
 #include "qemu/main-loop.h"
 #include "qemu/atomic.h"
+#include "block/raw-aio.h"

 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */
@@ -217,7 +218,7 @@ aio_ctx_check(GSource *source)
    for (bh = ctx->first_bh; bh; bh = bh->next) {
        if (!bh->deleted && bh->scheduled) {
            return true;
-	}
+        }
    }
    return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
 }
@@ -242,6 +243,14 @@ aio_ctx_finalize(GSource     *source)
    qemu_bh_delete(ctx->notify_dummy_bh);
    thread_pool_free(ctx->thread_pool);

+#ifdef CONFIG_LINUX_AIO
+    if (ctx->linux_aio) {
+        laio_detach_aio_context(ctx->linux_aio, ctx);
+        laio_cleanup(ctx->linux_aio);
+        ctx->linux_aio = NULL;
+    }
+#endif
+
    qemu_mutex_lock(&ctx->bh_lock);
    while (ctx->first_bh) {
        QEMUBH *next = ctx->first_bh->next;
@@ -282,6 +291,17 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx)
    return ctx->thread_pool;
 }

+#ifdef CONFIG_LINUX_AIO
+LinuxAioState *aio_get_linux_aio(AioContext *ctx)
+{
+    if (!ctx->linux_aio) {
+        ctx->linux_aio = laio_init();
+        laio_attach_aio_context(ctx->linux_aio, ctx);
+    }
+    return ctx->linux_aio;
+}
+#endif
+
 void aio_notify(AioContext *ctx)
 {
    /* Write e.g. bh->scheduled before reading ctx->notify_me.  Pairs
@@ -327,14 +347,10 @@ AioContext *aio_context_new(Error **errp)
 {
    int ret;
    AioContext *ctx;
-    Error *local_err = NULL;

    ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
-    aio_context_setup(ctx, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        goto fail;
-    }
+    aio_context_setup(ctx);
+
    ret = event_notifier_init(&ctx->notifier, false);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Failed to initialize event notifier");
@@ -345,6 +361,9 @@ AioContext *aio_context_new(Error **errp)
                           false,
                           (EventNotifierHandler *)
                           event_notifier_dummy_cb);
+#ifdef CONFIG_LINUX_AIO
+    ctx->linux_aio = NULL;
+#endif
    ctx->thread_pool = NULL;
    qemu_mutex_init(&ctx->bh_lock);
    rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1739,13 +1739,21 @@ static void audio_vm_change_state_handler (void *opaque, int running,
    audio_reset_timer (s);
 }

-static void audio_atexit (void)
+static bool is_cleaning_up;
+
+bool audio_is_cleaning_up(void)
+{
+    return is_cleaning_up;
+}
+
+void audio_cleanup(void)
 {
    AudioState *s = &glob_audio_state;
-    HWVoiceOut *hwo = NULL;
-    HWVoiceIn *hwi = NULL;
+    HWVoiceOut *hwo, *hwon;
+    HWVoiceIn *hwi, *hwin;

-    while ((hwo = audio_pcm_hw_find_any_out (hwo))) {
+    is_cleaning_up = true;
+    QLIST_FOREACH_SAFE(hwo, &glob_audio_state.hw_head_out, entries, hwon) {
        SWVoiceCap *sc;

        if (hwo->enabled) {
@@ -1761,17 +1769,20 @@ static void audio_atexit (void)
                cb->ops.destroy (cb->opaque);
            }
        }
+        QLIST_REMOVE(hwo, entries);
    }

-    while ((hwi = audio_pcm_hw_find_any_in (hwi))) {
+    QLIST_FOREACH_SAFE(hwi, &glob_audio_state.hw_head_in, entries, hwin) {
        if (hwi->enabled) {
            hwi->pcm_ops->ctl_in (hwi, VOICE_DISABLE);
        }
        hwi->pcm_ops->fini_in (hwi);
+        QLIST_REMOVE(hwi, entries);
    }

    if (s->drv) {
        s->drv->fini (s->drv_opaque);
+        s->drv = NULL;
    }
 }

@@ -1799,7 +1810,7 @@ static void audio_init (void)
    QLIST_INIT (&s->hw_head_out);
    QLIST_INIT (&s->hw_head_in);
    QLIST_INIT (&s->cap_head);
-    atexit (audio_atexit);
+    atexit(audio_cleanup);

    s->ts = timer_new_ns(QEMU_CLOCK_VIRTUAL, audio_timer, s);

@@ -1966,8 +1977,7 @@ CaptureVoiceOut *AUD_add_capture (
        QLIST_INSERT_HEAD (&s->cap_head, cap, entries);
        QLIST_INSERT_HEAD (&cap->cb_head, cb, entries);

-        hw = NULL;
-        while ((hw = audio_pcm_hw_find_any_out (hw))) {
+        QLIST_FOREACH(hw, &glob_audio_state.hw_head_out, entries) {
            audio_attach_capture (hw);
        }
        return cap;
--- a/audio/audio.h
+++ b/audio/audio.h
@@ -21,6 +21,7 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
+
 #ifndef QEMU_AUDIO_H
 #define QEMU_AUDIO_H

@@ -162,4 +163,7 @@ static inline void *advance (void *p, int incr)
 int wav_start_capture (CaptureState *s, const char *path, int freq,
                       int bits, int nchannels);

-#endif  /* audio.h */
+bool audio_is_cleaning_up(void);
+void audio_cleanup(void);
+
+#endif /* QEMU_AUDIO_H */
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -21,6 +21,7 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
+
 #ifndef QEMU_AUDIO_INT_H
 #define QEMU_AUDIO_INT_H

@@ -257,4 +258,4 @@ static inline int audio_ring_dist (int dst, int src, int len)
 #define AUDIO_FUNC __FILE__ ":" AUDIO_STRINGIFY (__LINE__)
 #endif

-#endif /* audio_int.h */
+#endif /* QEMU_AUDIO_INT_H */
--- a/audio/audio_pt_int.h
+++ b/audio/audio_pt_int.h
@@ -19,4 +19,4 @@ int audio_pt_wait (struct audio_pt *, const char *);
 int audio_pt_unlock_and_signal (struct audio_pt *, const char *);
 int audio_pt_join (struct audio_pt *, void **, const char *);

-#endif /* audio_pt_int.h */
+#endif /* QEMU_AUDIO_PT_INT_H */
--- a/audio/coreaudio.c
+++ b/audio/coreaudio.c
@@ -36,8 +36,6 @@
 #define MAC_OS_X_VERSION_10_6 1060
 #endif

-static int isAtexit;
-
 typedef struct {
    int buffer_frames;
    int nbuffers;
@@ -378,11 +376,6 @@ static inline UInt32 isPlaying (AudioDeviceID outputDeviceID)
    return result;
 }

-static void coreaudio_atexit (void)
-{
-    isAtexit = 1;
-}
-
 static int coreaudio_lock (coreaudioVoiceOut *core, const char *fn_name)
 {
    int err;
@@ -630,7 +623,7 @@ static void coreaudio_fini_out (HWVoiceOut *hw)
    int err;
    coreaudioVoiceOut *core = (coreaudioVoiceOut *) hw;

-    if (!isAtexit) {
+    if (!audio_is_cleaning_up()) {
        /* stop playback */
        if (isPlaying(core->outputDeviceID)) {
            status = AudioDeviceStop(core->outputDeviceID, core->ioprocid);
@@ -673,7 +666,7 @@ static int coreaudio_ctl_out (HWVoiceOut *hw, int cmd, ...)

    case VOICE_DISABLE:
        /* stop playback */
-        if (!isAtexit) {
+        if (!audio_is_cleaning_up()) {
            if (isPlaying(core->outputDeviceID)) {
                status = AudioDeviceStop(core->outputDeviceID,
                                         core->ioprocid);
@@ -697,7 +690,6 @@ static void *coreaudio_audio_init (void)
    CoreaudioConf *conf = g_malloc(sizeof(CoreaudioConf));
    *conf = glob_conf;

-    atexit(coreaudio_atexit);
    return conf;
 }

--- a/audio/mixeng.h
+++ b/audio/mixeng.h
@@ -21,6 +21,7 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
+
 #ifndef QEMU_MIXENG_H
 #define QEMU_MIXENG_H

@@ -48,4 +49,4 @@ void st_rate_stop (void *opaque);
 void mixeng_clear (struct st_sample *buf, int len);
 void mixeng_volume (struct st_sample *buf, int len, struct mixeng_volume *vol);

-#endif  /* mixeng.h */
+#endif /* QEMU_MIXENG_H */
--- a/audio/trace-events
+++ b/audio/trace-events
@@ -1,4 +1,4 @@
-# See docs/trace-events.txt for syntax documentation.
+# See docs/tracing.txt for syntax documentation.

 # audio/alsaaudio.c
 alsa_revents(int revents) "revents = %d"
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -64,6 +64,14 @@ out:
    error_propagate(errp, local_err);
 }

+static uint16List **host_memory_append_node(uint16List **node,
+                                            unsigned long value)
+{
+     *node = g_malloc0(sizeof(**node));
+     (*node)->value = value;
+     return &(*node)->next;
+}
+
 static void
 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
                                   void *opaque, Error **errp)
@@ -74,13 +82,12 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
    unsigned long value;

    value = find_first_bit(backend->host_nodes, MAX_NODES);
-    if (value == MAX_NODES) {
-        return;
-    }

-    *node = g_malloc0(sizeof(**node));
-    (*node)->value = value;
-    node = &(*node)->next;
+    node = host_memory_append_node(node, value);
+
+    if (value == MAX_NODES) {
+        goto out;
+    }

    do {
        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
@@ -88,11 +95,10 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
            break;
        }

-        *node = g_malloc0(sizeof(**node));
-        (*node)->value = value;
-        node = &(*node)->next;
+        node = host_memory_append_node(node, value);
    } while (true);

+out:
    visit_type_uint16List(v, name, &host_nodes, errp);
 }

@@ -197,6 +203,7 @@ static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 static void host_memory_backend_set_prealloc(Object *obj, bool value,
                                             Error **errp)
 {
+    Error *local_err = NULL;
    HostMemoryBackend *backend = MEMORY_BACKEND(obj);

    if (backend->force_prealloc) {
@@ -217,7 +224,11 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
        void *ptr = memory_region_get_ram_ptr(&backend->mr);
        uint64_t sz = memory_region_size(&backend->mr);

-        os_mem_prealloc(fd, ptr, sz);
+        os_mem_prealloc(fd, ptr, sz, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
        backend->prealloc = true;
    }
 }
@@ -258,6 +269,16 @@ host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
    return memory_region_size(&backend->mr) ? &backend->mr : NULL;
 }

+void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
+{
+    backend->is_mapped = mapped;
+}
+
+bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
+{
+    return backend->is_mapped;
+}
+
 static void
 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 {
@@ -270,8 +291,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
    if (bc->alloc) {
        bc->alloc(backend, &local_err);
        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
+            goto out;
        }

        ptr = memory_region_get_ram_ptr(&backend->mr);
@@ -327,18 +347,21 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
         * specified NUMA policy in place.
         */
        if (backend->prealloc) {
-            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz);
+            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
+                            &local_err);
+            if (local_err) {
+                goto out;
+            }
        }
    }
+out:
+    error_propagate(errp, local_err);
 }

 static bool
 host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
 {
-    MemoryRegion *mr;
-
-    mr = host_memory_backend_get_memory(MEMORY_BACKEND(uc), errp);
-    if (memory_region_is_mapped(mr)) {
+    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
        return false;
    } else {
        return true;
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -25,16 +25,51 @@
 #include "qemu-common.h"
 #include "sysemu/char.h"
 #include "ui/console.h"
+#include "ui/input.h"

 #define MSMOUSE_LO6(n) ((n) & 0x3f)
 #define MSMOUSE_HI2(n) (((n) & 0xc0) >> 6)

-static void msmouse_event(void *opaque,
-                          int dx, int dy, int dz, int buttons_state)
-{
-    CharDriverState *chr = (CharDriverState *)opaque;
+typedef struct {
+    CharDriverState *chr;
+    QemuInputHandlerState *hs;
+    int axis[INPUT_AXIS__MAX];
+    bool btns[INPUT_BUTTON__MAX];
+    bool btnc[INPUT_BUTTON__MAX];
+    uint8_t outbuf[32];
+    int outlen;
+} MouseState;

+static void msmouse_chr_accept_input(CharDriverState *chr)
+{
+    MouseState *mouse = chr->opaque;
+    int len;
+
+    len = qemu_chr_be_can_write(chr);
+    if (len > mouse->outlen) {
+        len = mouse->outlen;
+    }
+    if (!len) {
+        return;
+    }
+
+    qemu_chr_be_write(chr, mouse->outbuf, len);
+    mouse->outlen -= len;
+    if (mouse->outlen) {
+        memmove(mouse->outbuf, mouse->outbuf + len, mouse->outlen);
+    }
+}
+
+static void msmouse_queue_event(MouseState *mouse)
+{
    unsigned char bytes[4] = { 0x40, 0x00, 0x00, 0x00 };
+    int dx, dy, count = 3;
+
+    dx = mouse->axis[INPUT_AXIS_X];
+    mouse->axis[INPUT_AXIS_X] = 0;
+
+    dy = mouse->axis[INPUT_AXIS_Y];
+    mouse->axis[INPUT_AXIS_Y] = 0;

    /* Movement deltas */
    bytes[0] |= (MSMOUSE_HI2(dy) << 2) | MSMOUSE_HI2(dx);
@@ -42,14 +77,54 @@ static void msmouse_event(void *opaque,
    bytes[2] |= MSMOUSE_LO6(dy);

    /* Buttons */
-    bytes[0] |= (buttons_state & 0x01 ? 0x20 : 0x00);
-    bytes[0] |= (buttons_state & 0x02 ? 0x10 : 0x00);
-    bytes[3] |= (buttons_state & 0x04 ? 0x20 : 0x00);
+    bytes[0] |= (mouse->btns[INPUT_BUTTON_LEFT]   ? 0x20 : 0x00);
+    bytes[0] |= (mouse->btns[INPUT_BUTTON_RIGHT]  ? 0x10 : 0x00);
+    if (mouse->btns[INPUT_BUTTON_MIDDLE] ||
+        mouse->btnc[INPUT_BUTTON_MIDDLE]) {
+        bytes[3] |= (mouse->btns[INPUT_BUTTON_MIDDLE] ? 0x20 : 0x00);
+        mouse->btnc[INPUT_BUTTON_MIDDLE] = false;
+        count = 4;
+    }

-    /* We always send the packet of, so that we do not have to keep track
-       of previous state of the middle button. This can potentially confuse
-       some very old drivers for two button mice though. */
-    qemu_chr_be_write(chr, bytes, 4);
+    if (mouse->outlen <= sizeof(mouse->outbuf) - count) {
+        memcpy(mouse->outbuf + mouse->outlen, bytes, count);
+        mouse->outlen += count;
+    } else {
+        /* queue full -> drop event */
+    }
+}
+
+static void msmouse_input_event(DeviceState *dev, QemuConsole *src,
+                                InputEvent *evt)
+{
+    MouseState *mouse = (MouseState *)dev;
+    InputMoveEvent *move;
+    InputBtnEvent *btn;
+
+    switch (evt->type) {
+    case INPUT_EVENT_KIND_REL:
+        move = evt->u.rel.data;
+        mouse->axis[move->axis] += move->value;
+        break;
+
+    case INPUT_EVENT_KIND_BTN:
+        btn = evt->u.btn.data;
+        mouse->btns[btn->button] = btn->down;
+        mouse->btnc[btn->button] = true;
+        break;
+
+    default:
+        /* keep gcc happy */
+        break;
+    }
+}
+
+static void msmouse_input_sync(DeviceState *dev)
+{
+    MouseState *mouse = (MouseState *)dev;
+
+    msmouse_queue_event(mouse);
+    msmouse_chr_accept_input(mouse->chr);
 }

 static int msmouse_chr_write (struct CharDriverState *s, const uint8_t *buf, int len)
@@ -60,26 +135,41 @@ static int msmouse_chr_write (struct CharDriverState *s, const uint8_t *buf, int

 static void msmouse_chr_close (struct CharDriverState *chr)
 {
-    g_free (chr);
+    MouseState *mouse = chr->opaque;
+
+    qemu_input_handler_unregister(mouse->hs);
+    g_free(mouse);
+    g_free(chr);
 }

+static QemuInputHandler msmouse_handler = {
+    .name  = "QEMU Microsoft Mouse",
+    .mask  = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_REL,
+    .event = msmouse_input_event,
+    .sync  = msmouse_input_sync,
+};
+
 static CharDriverState *qemu_chr_open_msmouse(const char *id,
                                              ChardevBackend *backend,
                                              ChardevReturn *ret,
                                              Error **errp)
 {
    ChardevCommon *common = backend->u.msmouse.data;
+    MouseState *mouse;
    CharDriverState *chr;

    chr = qemu_chr_alloc(common, errp);
-    if (!chr) {
-        return NULL;
-    }
    chr->chr_write = msmouse_chr_write;
    chr->chr_close = msmouse_chr_close;
+    chr->chr_accept_input = msmouse_chr_accept_input;
    chr->explicit_be_open = true;

-    qemu_add_mouse_event_handler(msmouse_event, chr, 0, "QEMU Microsoft Mouse");
+    mouse = g_new0(MouseState, 1);
+    mouse->hs = qemu_input_handler_register((DeviceState *)mouse,
+                                            &msmouse_handler);
+
+    mouse->chr = chr;
+    chr->opaque = mouse;

    return chr;
 }
--- a/block.c
+++ b/block.c
@@ -25,6 +25,7 @@
 #include "trace.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/nbd.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qapi/qmp/qerror.h"
@@ -234,6 +235,8 @@ BlockDriverState *bdrv_new(void)
    bs->refcnt = 1;
    bs->aio_context = qemu_get_aio_context();

+    qemu_co_queue_init(&bs->flush_queue);
+
    QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);

    return bs;
@@ -329,8 +332,8 @@ int bdrv_create(BlockDriver *drv, const char* filename,
        /* Fast-path if already in coroutine context */
        bdrv_create_co_entry(&cco);
    } else {
-        co = qemu_coroutine_create(bdrv_create_co_entry);
-        qemu_coroutine_enter(co, &cco);
+        co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
+        qemu_coroutine_enter(co);
        while (cco.ret == NOT_DONE) {
            aio_poll(qemu_get_aio_context(), true);
        }
@@ -536,9 +539,10 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
    return drv;
 }

-static int find_image_format(BlockDriverState *bs, const char *filename,
+static int find_image_format(BdrvChild *file, const char *filename,
                             BlockDriver **pdrv, Error **errp)
 {
+    BlockDriverState *bs = file->bs;
    BlockDriver *drv;
    uint8_t buf[BLOCK_PROBE_BUF_SIZE];
    int ret = 0;
@@ -549,7 +553,7 @@ static int find_image_format(BlockDriverState *bs, const char *filename,
        return ret;
    }

-    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
+    ret = bdrv_pread(file, 0, buf, sizeof(buf));
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not read image for determining its "
                         "format");
@@ -937,7 +941,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
        goto fail_opts;
    }

-    bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512;
    bs->read_only = !(bs->open_flags & BDRV_O_RDWR);

    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
@@ -1017,7 +1020,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,

    assert(bdrv_opt_mem_align(bs) != 0);
    assert(bdrv_min_mem_align(bs) != 0);
-    assert(is_power_of_2(bs->request_alignment) || bdrv_is_sg(bs));
+    assert(is_power_of_2(bs->bl.request_alignment));

    qemu_opts_del(opts);
    return 0;
@@ -1653,7 +1656,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
    /* Image format probing */
    bs->probed = !drv;
    if (!drv && file) {
-        ret = find_image_format(file->bs, filename, &drv, &local_err);
+        ret = find_image_format(file, filename, &drv, &local_err);
        if (ret < 0) {
            goto fail;
        }
@@ -2184,9 +2187,9 @@ static void bdrv_close(BlockDriverState *bs)
        bs->backing_file[0] = '\0';
        bs->backing_format[0] = '\0';
        bs->total_sectors = 0;
-        bs->encrypted = 0;
-        bs->valid_key = 0;
-        bs->sg = 0;
+        bs->encrypted = false;
+        bs->valid_key = false;
+        bs->sg = false;
        QDECREF(bs->options);
        QDECREF(bs->explicit_options);
        bs->options = NULL;
@@ -2204,6 +2207,7 @@ static void bdrv_close(BlockDriverState *bs)
 void bdrv_close_all(void)
 {
    block_job_cancel_sync_all();
+    nbd_export_close_all();

    /* Drop references from requests still in flight, such as canceled block
     * jobs whose AIO context has not been polled yet */
@@ -2323,116 +2327,6 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
    return bs->drv->bdrv_check(bs, res, fix);
 }

-#define COMMIT_BUF_SECTORS 2048
-
-/* commit COW file into the raw image */
-int bdrv_commit(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-    int64_t sector, total_sectors, length, backing_length;
-    int n, ro, open_flags;
-    int ret = 0;
-    uint8_t *buf = NULL;
-
-    if (!drv)
-        return -ENOMEDIUM;
-
-    if (!bs->backing) {
-        return -ENOTSUP;
-    }
-
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
-        bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
-        return -EBUSY;
-    }
-
-    ro = bs->backing->bs->read_only;
-    open_flags =  bs->backing->bs->open_flags;
-
-    if (ro) {
-        if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
-            return -EACCES;
-        }
-    }
-
-    length = bdrv_getlength(bs);
-    if (length < 0) {
-        ret = length;
-        goto ro_cleanup;
-    }
-
-    backing_length = bdrv_getlength(bs->backing->bs);
-    if (backing_length < 0) {
-        ret = backing_length;
-        goto ro_cleanup;
-    }
-
-    /* If our top snapshot is larger than the backing file image,
-     * grow the backing file image if possible.  If not possible,
-     * we must return an error */
-    if (length > backing_length) {
-        ret = bdrv_truncate(bs->backing->bs, length);
-        if (ret < 0) {
-            goto ro_cleanup;
-        }
-    }
-
-    total_sectors = length >> BDRV_SECTOR_BITS;
-
-    /* qemu_try_blockalign() for bs will choose an alignment that works for
-     * bs->backing->bs as well, so no need to compare the alignment manually. */
-    buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
-    if (buf == NULL) {
-        ret = -ENOMEM;
-        goto ro_cleanup;
-    }
-
-    for (sector = 0; sector < total_sectors; sector += n) {
-        ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
-        if (ret < 0) {
-            goto ro_cleanup;
-        }
-        if (ret) {
-            ret = bdrv_read(bs, sector, buf, n);
-            if (ret < 0) {
-                goto ro_cleanup;
-            }
-
-            ret = bdrv_write(bs->backing->bs, sector, buf, n);
-            if (ret < 0) {
-                goto ro_cleanup;
-            }
-        }
-    }
-
-    if (drv->bdrv_make_empty) {
-        ret = drv->bdrv_make_empty(bs);
-        if (ret < 0) {
-            goto ro_cleanup;
-        }
-        bdrv_flush(bs);
-    }
-
-    /*
-     * Make sure all data we wrote to the backing device is actually
-     * stable on disk.
-     */
-    if (bs->backing) {
-        bdrv_flush(bs->backing->bs);
-    }
-
-    ret = 0;
-ro_cleanup:
-    qemu_vfree(buf);
-
-    if (ro) {
-        /* ignoring error return here */
-        bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
-    }
-
-    return ret;
-}
-
 /*
 * Return values:
 * 0        - success
@@ -2582,6 +2476,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset)
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
        bdrv_dirty_bitmap_truncate(bs);
        bdrv_parent_cb_resize(bs);
+        ++bs->write_gen;
    }
    return ret;
 }
@@ -2644,30 +2539,30 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
    *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
 }

-int bdrv_is_read_only(BlockDriverState *bs)
+bool bdrv_is_read_only(BlockDriverState *bs)
 {
    return bs->read_only;
 }

-int bdrv_is_sg(BlockDriverState *bs)
+bool bdrv_is_sg(BlockDriverState *bs)
 {
    return bs->sg;
 }

-int bdrv_is_encrypted(BlockDriverState *bs)
+bool bdrv_is_encrypted(BlockDriverState *bs)
 {
    if (bs->backing && bs->backing->bs->encrypted) {
-        return 1;
+        return true;
    }
    return bs->encrypted;
 }

-int bdrv_key_required(BlockDriverState *bs)
+bool bdrv_key_required(BlockDriverState *bs)
 {
    BdrvChild *backing = bs->backing;

    if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
-        return 1;
+        return true;
    }
    return (bs->encrypted && !bs->valid_key);
 }
@@ -2689,10 +2584,10 @@ int bdrv_set_key(BlockDriverState *bs, const char *key)
    }
    ret = bs->drv->bdrv_set_key(bs, key);
    if (ret < 0) {
-        bs->valid_key = 0;
+        bs->valid_key = false;
    } else if (!bs->valid_key) {
        /* call the change callback now, we skipped it on open */
-        bs->valid_key = 1;
+        bs->valid_key = true;
        bdrv_parent_cb_change_media(bs, true);
    }
    return ret;
@@ -2944,7 +2839,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
 {
    BlockDriverInfo bdi;

-    if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
+    if (!(bs->open_flags & BDRV_O_UNMAP)) {
        return false;
    }

--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -9,7 +9,7 @@ block-obj-y += block-backend.o snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
-block-obj-y += null.o mirror.o io.o
+block-obj-y += null.o mirror.o commit.o io.o
 block-obj-y += throttle-groups.o

 block-obj-y += nbd.o nbd-client.o sheepdog.o
@@ -26,7 +26,6 @@ block-obj-y += write-threshold.o
 block-obj-y += crypto.o

 common-obj-y += stream.o
-common-obj-y += commit.o
 common-obj-y += backup.o

 iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
--- a/block/backup.c
+++ b/block/backup.c
@@ -47,6 +47,7 @@ typedef struct BackupBlockJob {
    uint64_t sectors_read;
    unsigned long *done_bitmap;
    int64_t cluster_size;
+    bool compress;
    NotifierWithReturn before_write;
    QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;
@@ -154,7 +155,8 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
                                       bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
        } else {
            ret = blk_co_pwritev(job->target, start * job->cluster_size,
-                                 bounce_qiov.size, &bounce_qiov, 0);
+                                 bounce_qiov.size, &bounce_qiov,
+                                 job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
        }
        if (ret < 0) {
            trace_backup_do_cow_write_fail(job, start, ret);
@@ -474,9 +476,10 @@ static void coroutine_fn backup_run(void *opaque)
    block_job_defer_to_main_loop(&job->common, backup_complete, data);
 }

-void backup_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, MirrorSyncMode sync_mode,
-                  BdrvDirtyBitmap *sync_bitmap,
+void backup_start(const char *job_id, BlockDriverState *bs,
+                  BlockDriverState *target, int64_t speed,
+                  MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
+                  bool compress,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
                  BlockCompletionFunc *cb, void *opaque,
@@ -507,6 +510,12 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
        return;
    }

+    if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
+        error_setg(errp, "Compression is not supported for this drive %s",
+                   bdrv_get_device_name(target));
+        return;
+    }
+
    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
        return;
    }
@@ -541,7 +550,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
        goto error;
    }

-    job = block_job_create(&backup_job_driver, bs, speed, cb, opaque, errp);
+    job = block_job_create(job_id, &backup_job_driver, bs, speed,
+                           cb, opaque, errp);
    if (!job) {
        goto error;
    }
@@ -554,6 +564,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
    job->sync_mode = sync_mode;
    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
                       sync_bitmap : NULL;
+    job->compress = compress;

    /* If there is no backing file on the target, we cannot rely on COW if our
     * backup cluster size is smaller than the target cluster size. Even for
@@ -575,9 +586,9 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,

    bdrv_op_block_all(target, job->common.blocker);
    job->common.len = len;
-    job->common.co = qemu_coroutine_create(backup_run);
+    job->common.co = qemu_coroutine_create(backup_run, job);
    block_job_txn_add_job(txn, &job->common);
-    qemu_coroutine_enter(job->common.co, job);
+    qemu_coroutine_enter(job->common.co);
    return;

 error:
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -37,6 +37,10 @@
 typedef struct BDRVBlkdebugState {
    int state;
    int new_state;
+    int align;
+
+    /* For blkdebug_refresh_filename() */
+    char *config_file;

    QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
    QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
@@ -350,7 +354,6 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVBlkdebugState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
-    const char *config;
    uint64_t align;
    int ret;

@@ -363,8 +366,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* Read rules from config file or command line options */
-    config = qemu_opt_get(opts, "config");
-    ret = read_config(s, config, options, errp);
+    s->config_file = g_strdup(qemu_opt_get(opts, "config"));
+    ret = read_config(s, s->config_file, options, errp);
    if (ret) {
        goto out;
    }
@@ -382,10 +385,10 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* Set request alignment */
-    align = qemu_opt_get_size(opts, "align", bs->request_alignment);
-    if (align > 0 && align < INT_MAX && !(align & (align - 1))) {
-        bs->request_alignment = align;
-    } else {
+    align = qemu_opt_get_size(opts, "align", 0);
+    if (align < INT_MAX && is_power_of_2(align)) {
+        s->align = align;
+    } else if (align) {
        error_setg(errp, "Invalid alignment");
        ret = -EINVAL;
        goto fail_unref;
@@ -397,6 +400,9 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
 fail_unref:
    bdrv_unref_child(bs, bs->file);
 out:
+    if (ret < 0) {
+        g_free(s->config_file);
+    }
    qemu_opts_del(opts);
    return ret;
 }
@@ -456,7 +462,7 @@ static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
        return inject_error(bs, cb, opaque, rule);
    }

-    return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors,
+    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
                          cb, opaque);
 }

@@ -479,7 +485,7 @@ static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
        return inject_error(bs, cb, opaque, rule);
    }

-    return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
+    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
                           cb, opaque);
 }

@@ -514,6 +520,8 @@ static void blkdebug_close(BlockDriverState *bs)
            remove_rule(rule);
        }
    }
+
+    g_free(s->config_file);
 }

 static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
@@ -620,7 +628,7 @@ static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)

    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
        if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co, NULL);
+            qemu_coroutine_enter(r->co);
            return 0;
        }
    }
@@ -646,7 +654,7 @@ static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
    }
    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
        if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co, NULL);
+            qemu_coroutine_enter(r->co);
            ret = 0;
        }
    }
@@ -678,6 +686,7 @@ static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)

 static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
 {
+    BDRVBlkdebugState *s = bs->opaque;
    QDict *opts;
    const QDictEntry *e;
    bool force_json = false;
@@ -699,8 +708,7 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)

    if (!force_json && bs->file->bs->exact_filename[0]) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "blkdebug:%s:%s",
-                 qdict_get_try_str(options, "config") ?: "",
+                 "blkdebug:%s:%s", s->config_file ?: "",
                 bs->file->bs->exact_filename);
    }

@@ -720,6 +728,15 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
    bs->full_open_options = opts;
 }

+static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+
+    if (s->align) {
+        bs->bl.request_alignment = s->align;
+    }
+}
+
 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
                                   BlockReopenQueue *queue, Error **errp)
 {
@@ -738,6 +755,7 @@ static BlockDriver bdrv_blkdebug = {
    .bdrv_getlength         = blkdebug_getlength,
    .bdrv_truncate          = blkdebug_truncate,
    .bdrv_refresh_filename  = blkdebug_refresh_filename,
+    .bdrv_refresh_limits    = blkdebug_refresh_limits,

    .bdrv_aio_readv         = blkdebug_aio_readv,
    .bdrv_aio_writev        = blkdebug_aio_writev,
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -65,7 +65,7 @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
 static void blkreplay_bh_cb(void *opaque)
 {
    Request *req = opaque;
-    qemu_coroutine_enter(req->co, NULL);
+    qemu_coroutine_enter(req->co);
    qemu_bh_delete(req->bh);
    g_free(req);
 }
@@ -81,22 +81,22 @@ static void block_request_create(uint64_t reqid, BlockDriverState *bs,
    replay_block_event(req->bh, reqid);
 }

-static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    uint64_t reqid = request_id++;
-    int ret = bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
+    int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
    block_request_create(reqid, bs, qemu_coroutine_self());
    qemu_coroutine_yield();

    return ret;
 }

-static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    uint64_t reqid = request_id++;
-    int ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
+    int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
    block_request_create(reqid, bs, qemu_coroutine_self());
    qemu_coroutine_yield();

@@ -107,18 +107,18 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
    int64_t offset, int count, BdrvRequestFlags flags)
 {
    uint64_t reqid = request_id++;
-    int ret = bdrv_co_pwrite_zeroes(bs->file->bs, offset, count, flags);
+    int ret = bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
    block_request_create(reqid, bs, qemu_coroutine_self());
    qemu_coroutine_yield();

    return ret;
 }

-static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors)
+static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
+                                              int64_t offset, int count)
 {
    uint64_t reqid = request_id++;
-    int ret = bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+    int ret = bdrv_co_pdiscard(bs->file->bs, offset, count);
    block_request_create(reqid, bs, qemu_coroutine_self());
    qemu_coroutine_yield();

@@ -144,11 +144,11 @@ static BlockDriver bdrv_blkreplay = {
    .bdrv_close             = blkreplay_close,
    .bdrv_getlength         = blkreplay_getlength,

-    .bdrv_co_readv          = blkreplay_co_readv,
-    .bdrv_co_writev         = blkreplay_co_writev,
+    .bdrv_co_preadv         = blkreplay_co_preadv,
+    .bdrv_co_pwritev        = blkreplay_co_pwritev,

    .bdrv_co_pwrite_zeroes  = blkreplay_co_pwrite_zeroes,
-    .bdrv_co_discard        = blkreplay_co_discard,
+    .bdrv_co_pdiscard       = blkreplay_co_pdiscard,
    .bdrv_co_flush          = blkreplay_co_flush,
 };

--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -247,9 +247,9 @@ static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);

-    bdrv_aio_readv(s->test_file->bs, sector_num, qiov, nb_sectors,
+    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
                   blkverify_aio_cb, acb);
-    bdrv_aio_readv(bs->file->bs, sector_num, &acb->raw_qiov, nb_sectors,
+    bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
                   blkverify_aio_cb, acb);
    return &acb->common;
 }
@@ -262,9 +262,9 @@ static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
    BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
                                            nb_sectors, cb, opaque);

-    bdrv_aio_writev(s->test_file->bs, sector_num, qiov, nb_sectors,
+    bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
                    blkverify_aio_cb, acb);
-    bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
+    bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
                    blkverify_aio_cb, acb);
    return &acb->common;
 }
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -409,6 +409,22 @@ bool bdrv_has_blk(BlockDriverState *bs)
    return bdrv_first_blk(bs) != NULL;
 }

+/*
+ * Returns true if @bs has only BlockBackends as parents.
+ */
+bool bdrv_is_root_node(BlockDriverState *bs)
+{
+    BdrvChild *c;
+
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
+        if (c->role != &child_root) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
 /*
 * Return @blk's DriveInfo if any, else null.
 */
@@ -727,21 +743,6 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
    return 0;
 }

-static int blk_check_request(BlockBackend *blk, int64_t sector_num,
-                             int nb_sectors)
-{
-    if (sector_num < 0 || sector_num > INT64_MAX / BDRV_SECTOR_SIZE) {
-        return -EIO;
-    }
-
-    if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
-        return -EIO;
-    }
-
-    return blk_check_byte_request(blk, sector_num * BDRV_SECTOR_SIZE,
-                                  nb_sectors * BDRV_SECTOR_SIZE);
-}
-
 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
                               unsigned int bytes, QEMUIOVector *qiov,
                               BdrvRequestFlags flags)
@@ -760,7 +761,7 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
        throttle_group_co_io_limits_intercept(blk, bytes, false);
    }

-    return bdrv_co_preadv(blk_bs(blk), offset, bytes, qiov, flags);
+    return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
 }

 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
@@ -785,7 +786,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
        flags |= BDRV_REQ_FUA;
    }

-    return bdrv_co_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
+    return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
 }

 typedef struct BlkRwCo {
@@ -836,8 +837,8 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
        .ret    = NOT_DONE,
    };

-    co = qemu_coroutine_create(co_entry);
-    qemu_coroutine_enter(co, &rwco);
+    co = qemu_coroutine_create(co_entry, &rwco);
+    qemu_coroutine_enter(co);

    aio_context = blk_get_aio_context(blk);
    while (rwco.ret == NOT_DONE) {
@@ -870,6 +871,11 @@ int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
                   flags | BDRV_REQ_ZERO_WRITE);
 }

+int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
+{
+    return bdrv_make_zero(blk->root, flags);
+}
+
 static void error_callback_bh(void *opaque)
 {
    struct BlockBackendAIOCB *acb = opaque;
@@ -945,8 +951,8 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
    acb->bh = NULL;
    acb->has_returned = false;

-    co = qemu_coroutine_create(co_entry);
-    qemu_coroutine_enter(co, acb);
+    co = qemu_coroutine_create(co_entry, acb);
+    qemu_coroutine_enter(co);

    acb->has_returned = true;
    if (acb->rwco.ret != NOT_DONE) {
@@ -1060,16 +1066,16 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk,
    return bdrv_aio_flush(blk_bs(blk), cb, opaque);
 }

-BlockAIOCB *blk_aio_discard(BlockBackend *blk,
-                            int64_t sector_num, int nb_sectors,
-                            BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
+                             int64_t offset, int count,
+                             BlockCompletionFunc *cb, void *opaque)
 {
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
+    int ret = blk_check_byte_request(blk, offset, count);
    if (ret < 0) {
        return blk_abort_aio_request(blk, cb, opaque, ret);
    }

-    return bdrv_aio_discard(blk_bs(blk), sector_num, nb_sectors, cb, opaque);
+    return bdrv_aio_pdiscard(blk_bs(blk), offset, count, cb, opaque);
 }

 void blk_aio_cancel(BlockAIOCB *acb)
@@ -1101,14 +1107,14 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
    return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
 }

-int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
+int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
 {
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
+    int ret = blk_check_byte_request(blk, offset, count);
    if (ret < 0) {
        return ret;
    }

-    return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors);
+    return bdrv_co_pdiscard(blk_bs(blk), offset, count);
 }

 int blk_co_flush(BlockBackend *blk)
@@ -1168,6 +1174,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
        return BLOCK_ERROR_ACTION_REPORT;
    case BLOCKDEV_ON_ERROR_IGNORE:
        return BLOCK_ERROR_ACTION_IGNORE;
+    case BLOCKDEV_ON_ERROR_AUTO:
    default:
        abort();
    }
@@ -1303,15 +1310,16 @@ int blk_get_flags(BlockBackend *blk)
    }
 }

-int blk_get_max_transfer_length(BlockBackend *blk)
+/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
+uint32_t blk_get_max_transfer(BlockBackend *blk)
 {
    BlockDriverState *bs = blk_bs(blk);
+    uint32_t max = 0;

    if (bs) {
-        return bs->bl.max_transfer_length;
-    } else {
-        return 0;
+        max = bs->bl.max_transfer;
    }
+    return MIN_NON_ZERO(max, INT_MAX);
 }

 int blk_get_max_iov(BlockBackend *blk)
@@ -1477,15 +1485,11 @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
                          flags | BDRV_REQ_ZERO_WRITE);
 }

-int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
-                         const uint8_t *buf, int nb_sectors)
+int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
+                          int count)
 {
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
+    return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+                   BDRV_REQ_WRITE_COMPRESSED);
 }

 int blk_truncate(BlockBackend *blk, int64_t offset)
@@ -1497,14 +1501,14 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
    return bdrv_truncate(blk_bs(blk), offset);
 }

-int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
 {
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
+    int ret = blk_check_byte_request(blk, offset, count);
    if (ret < 0) {
        return ret;
    }

-    return bdrv_discard(blk_bs(blk), sector_num, nb_sectors);
+    return bdrv_pdiscard(blk_bs(blk), offset, count);
 }

 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -104,10 +104,9 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
    struct bochs_header bochs;
    int ret;

-    bs->read_only = 1; // no write support yet
-    bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
+    bs->read_only = true; /* no write support yet */

-    ret = bdrv_pread(bs->file->bs, 0, &bochs, sizeof(bochs));
+    ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
    if (ret < 0) {
        return ret;
    }
@@ -141,7 +140,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
        return -ENOMEM;
    }

-    ret = bdrv_pread(bs->file->bs, le32_to_cpu(bochs.header), s->catalog_bitmap,
+    ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
                     s->catalog_size * 4);
    if (ret < 0) {
        goto fail;
@@ -189,6 +188,11 @@ fail:
    return ret;
 }

+static void bochs_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+}
+
 static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 {
    BDRVBochsState *s = bs->opaque;
@@ -210,7 +214,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
        (s->extent_blocks + s->bitmap_blocks));

    /* read in bitmap for current extent */
-    ret = bdrv_pread(bs->file->bs, bitmap_offset + (extent_offset / 8),
+    ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
                     &bitmap_entry, 1);
    if (ret < 0) {
        return ret;
@@ -251,7 +255,7 @@ bochs_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
        qemu_iovec_concat(&local_qiov, qiov, bytes_done, 512);

        if (block_offset > 0) {
-            ret = bdrv_co_preadv(bs->file->bs, block_offset, 512,
+            ret = bdrv_co_preadv(bs->file, block_offset, 512,
                                 &local_qiov, 0);
            if (ret < 0) {
                goto fail;
@@ -283,6 +287,7 @@ static BlockDriver bdrv_bochs = {
    .instance_size	= sizeof(BDRVBochsState),
    .bdrv_probe		= bochs_probe,
    .bdrv_open		= bochs_open,
+    .bdrv_refresh_limits = bochs_refresh_limits,
    .bdrv_co_preadv = bochs_co_preadv,
    .bdrv_close		= bochs_close,
 };
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -66,11 +66,10 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
    uint32_t offsets_size, max_compressed_block_size = 1, i;
    int ret;

-    bs->read_only = 1;
-    bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
+    bs->read_only = true;

    /* read header */
-    ret = bdrv_pread(bs->file->bs, 128, &s->block_size, 4);
+    ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
    if (ret < 0) {
        return ret;
    }
@@ -96,7 +95,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

-    ret = bdrv_pread(bs->file->bs, 128 + 4, &s->n_blocks, 4);
+    ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
    if (ret < 0) {
        return ret;
    }
@@ -127,7 +126,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        return -ENOMEM;
    }

-    ret = bdrv_pread(bs->file->bs, 128 + 4 + 4, s->offsets, offsets_size);
+    ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
    if (ret < 0) {
        goto fail;
    }
@@ -199,6 +198,11 @@ fail:
    return ret;
 }

+static void cloop_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+}
+
 static inline int cloop_read_block(BlockDriverState *bs, int block_num)
 {
    BDRVCloopState *s = bs->opaque;
@@ -207,7 +211,7 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
        int ret;
        uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];

-        ret = bdrv_pread(bs->file->bs, s->offsets[block_num],
+        ret = bdrv_pread(bs->file, s->offsets[block_num],
                         s->compressed_block, bytes);
        if (ret != bytes) {
            return -1;
@@ -280,6 +284,7 @@ static BlockDriver bdrv_cloop = {
    .instance_size  = sizeof(BDRVCloopState),
    .bdrv_probe     = cloop_probe,
    .bdrv_open      = cloop_open,
+    .bdrv_refresh_limits = cloop_refresh_limits,
    .bdrv_co_preadv = cloop_co_preadv,
    .bdrv_close     = cloop_close,
 };
--- a/block/commit.c
+++ b/block/commit.c
@@ -113,6 +113,7 @@ static void coroutine_fn commit_run(void *opaque)
    CommitBlockJob *s = opaque;
    CommitCompleteData *data;
    int64_t sector_num, end;
+    uint64_t delay_ns = 0;
    int ret = 0;
    int n = 0;
    void *buf = NULL;
@@ -142,10 +143,8 @@ static void coroutine_fn commit_run(void *opaque)
    buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);

    for (sector_num = 0; sector_num < end; sector_num += n) {
-        uint64_t delay_ns = 0;
        bool copy;

-wait:
        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that bdrv_drain_all() returns.
         */
@@ -161,19 +160,13 @@ wait:
        copy = (ret == 1);
        trace_commit_one_iteration(s, sector_num, n, ret);
        if (copy) {
-            if (s->common.speed) {
-                delay_ns = ratelimit_calculate_delay(&s->limit, n);
-                if (delay_ns > 0) {
-                    goto wait;
-                }
-            }
            ret = commit_populate(s->top, s->base, sector_num, n, buf);
            bytes_written += n * BDRV_SECTOR_SIZE;
        }
        if (ret < 0) {
-            if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
-                s->on_error == BLOCKDEV_ON_ERROR_REPORT||
-                (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
+            BlockErrorAction action =
+                block_job_error_action(&s->common, false, s->on_error, -ret);
+            if (action == BLOCK_ERROR_ACTION_REPORT) {
                goto out;
            } else {
                n = 0;
@@ -182,6 +175,10 @@ wait:
        }
        /* Publish progress */
        s->common.offset += n * BDRV_SECTOR_SIZE;
+
+        if (copy && s->common.speed) {
+            delay_ns = ratelimit_calculate_delay(&s->limit, n);
+        }
    }

    ret = 0;
@@ -211,8 +208,8 @@ static const BlockJobDriver commit_job_driver = {
    .set_speed     = commit_set_speed,
 };

-void commit_start(BlockDriverState *bs, BlockDriverState *base,
-                  BlockDriverState *top, int64_t speed,
+void commit_start(const char *job_id, BlockDriverState *bs,
+                  BlockDriverState *base, BlockDriverState *top, int64_t speed,
                  BlockdevOnError on_error, BlockCompletionFunc *cb,
                  void *opaque, const char *backing_file_str, Error **errp)
 {
@@ -236,7 +233,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
        return;
    }

-    s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp);
+    s = block_job_create(job_id, &commit_job_driver, bs, speed,
+                         cb, opaque, errp);
    if (!s) {
        return;
    }
@@ -277,8 +275,129 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
    s->backing_file_str = g_strdup(backing_file_str);

    s->on_error = on_error;
-    s->common.co = qemu_coroutine_create(commit_run);
+    s->common.co = qemu_coroutine_create(commit_run, s);

    trace_commit_start(bs, base, top, s, s->common.co, opaque);
-    qemu_coroutine_enter(s->common.co, s);
+    qemu_coroutine_enter(s->common.co);
+}
+
+
+#define COMMIT_BUF_SECTORS 2048
+
+/* commit COW file into the raw image */
+int bdrv_commit(BlockDriverState *bs)
+{
+    BlockBackend *src, *backing;
+    BlockDriver *drv = bs->drv;
+    int64_t sector, total_sectors, length, backing_length;
+    int n, ro, open_flags;
+    int ret = 0;
+    uint8_t *buf = NULL;
+
+    if (!drv)
+        return -ENOMEDIUM;
+
+    if (!bs->backing) {
+        return -ENOTSUP;
+    }
+
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
+        bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
+        return -EBUSY;
+    }
+
+    ro = bs->backing->bs->read_only;
+    open_flags =  bs->backing->bs->open_flags;
+
+    if (ro) {
+        if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
+            return -EACCES;
+        }
+    }
+
+    src = blk_new();
+    blk_insert_bs(src, bs);
+
+    backing = blk_new();
+    blk_insert_bs(backing, bs->backing->bs);
+
+    length = blk_getlength(src);
+    if (length < 0) {
+        ret = length;
+        goto ro_cleanup;
+    }
+
+    backing_length = blk_getlength(backing);
+    if (backing_length < 0) {
+        ret = backing_length;
+        goto ro_cleanup;
+    }
+
+    /* If our top snapshot is larger than the backing file image,
+     * grow the backing file image if possible.  If not possible,
+     * we must return an error */
+    if (length > backing_length) {
+        ret = blk_truncate(backing, length);
+        if (ret < 0) {
+            goto ro_cleanup;
+        }
+    }
+
+    total_sectors = length >> BDRV_SECTOR_BITS;
+
+    /* blk_try_blockalign() for src will choose an alignment that works for
+     * backing as well, so no need to compare the alignment manually. */
+    buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+    if (buf == NULL) {
+        ret = -ENOMEM;
+        goto ro_cleanup;
+    }
+
+    for (sector = 0; sector < total_sectors; sector += n) {
+        ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
+        if (ret < 0) {
+            goto ro_cleanup;
+        }
+        if (ret) {
+            ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf,
+                            n * BDRV_SECTOR_SIZE);
+            if (ret < 0) {
+                goto ro_cleanup;
+            }
+
+            ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf,
+                             n * BDRV_SECTOR_SIZE, 0);
+            if (ret < 0) {
+                goto ro_cleanup;
+            }
+        }
+    }
+
+    if (drv->bdrv_make_empty) {
+        ret = drv->bdrv_make_empty(bs);
+        if (ret < 0) {
+            goto ro_cleanup;
+        }
+        blk_flush(src);
+    }
+
+    /*
+     * Make sure all data we wrote to the backing device is actually
+     * stable on disk.
+     */
+    blk_flush(backing);
+
+    ret = 0;
+ro_cleanup:
+    qemu_vfree(buf);
+
+    blk_unref(src);
+    blk_unref(backing);
+
+    if (ro) {
+        /* ignoring error return here */
+        bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
+    }
+
+    return ret;
 }
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -64,7 +64,7 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block,
    BlockDriverState *bs = opaque;
    ssize_t ret;

-    ret = bdrv_pread(bs->file->bs, offset, buf, buflen);
+    ret = bdrv_pread(bs->file, offset, buf, buflen);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not read encryption header");
        return ret;
@@ -193,17 +193,16 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
                            QemuOpts *opts,
                            Error **errp)
 {
-    OptsVisitor *ov;
+    Visitor *v;
    QCryptoBlockOpenOptions *ret = NULL;
    Error *local_err = NULL;

    ret = g_new0(QCryptoBlockOpenOptions, 1);
    ret->format = format;

-    ov = opts_visitor_new(opts);
+    v = opts_visitor_new(opts);

-    visit_start_struct(opts_get_visitor(ov),
-                       NULL, NULL, 0, &local_err);
+    visit_start_struct(v, NULL, NULL, 0, &local_err);
    if (local_err) {
        goto out;
    }
@@ -211,7 +210,7 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
    switch (format) {
    case Q_CRYPTO_BLOCK_FORMAT_LUKS:
        visit_type_QCryptoBlockOptionsLUKS_members(
-            opts_get_visitor(ov), &ret->u.luks, &local_err);
+            v, &ret->u.luks, &local_err);
        break;

    default:
@@ -219,10 +218,10 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
        break;
    }
    if (!local_err) {
-        visit_check_struct(opts_get_visitor(ov), &local_err);
+        visit_check_struct(v, &local_err);
    }

-    visit_end_struct(opts_get_visitor(ov));
+    visit_end_struct(v, NULL);

 out:
    if (local_err) {
@@ -230,7 +229,7 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
        qapi_free_QCryptoBlockOpenOptions(ret);
        ret = NULL;
    }
-    opts_visitor_cleanup(ov);
+    visit_free(v);
    return ret;
 }

@@ -240,17 +239,16 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
                              QemuOpts *opts,
                              Error **errp)
 {
-    OptsVisitor *ov;
+    Visitor *v;
    QCryptoBlockCreateOptions *ret = NULL;
    Error *local_err = NULL;

    ret = g_new0(QCryptoBlockCreateOptions, 1);
    ret->format = format;

-    ov = opts_visitor_new(opts);
+    v = opts_visitor_new(opts);

-    visit_start_struct(opts_get_visitor(ov),
-                       NULL, NULL, 0, &local_err);
+    visit_start_struct(v, NULL, NULL, 0, &local_err);
    if (local_err) {
        goto out;
    }
@@ -258,7 +256,7 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
    switch (format) {
    case Q_CRYPTO_BLOCK_FORMAT_LUKS:
        visit_type_QCryptoBlockCreateOptionsLUKS_members(
-            opts_get_visitor(ov), &ret->u.luks, &local_err);
+            v, &ret->u.luks, &local_err);
        break;

    default:
@@ -266,10 +264,10 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
        break;
    }
    if (!local_err) {
-        visit_check_struct(opts_get_visitor(ov), &local_err);
+        visit_check_struct(v, &local_err);
    }

-    visit_end_struct(opts_get_visitor(ov));
+    visit_end_struct(v, NULL);

 out:
    if (local_err) {
@@ -277,7 +275,7 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
        qapi_free_QCryptoBlockCreateOptions(ret);
        ret = NULL;
    }
-    opts_visitor_cleanup(ov);
+    visit_free(v);
    return ret;
 }

@@ -322,8 +320,8 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
        goto cleanup;
    }

-    bs->encrypted = 1;
-    bs->valid_key = 1;
+    bs->encrypted = true;
+    bs->valid_key = true;

    ret = 0;
 cleanup:
@@ -428,7 +426,7 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
        qemu_iovec_reset(&hd_qiov);
        qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);

-        ret = bdrv_co_readv(bs->file->bs,
+        ret = bdrv_co_readv(bs->file,
                            payload_offset + sector_num,
                            cur_nr_sectors, &hd_qiov);
        if (ret < 0) {
@@ -507,7 +505,7 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
        qemu_iovec_reset(&hd_qiov);
        qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);

-        ret = bdrv_co_writev(bs->file->bs,
+        ret = bdrv_co_writev(bs->file,
                             payload_offset + sector_num,
                             cur_nr_sectors, &hd_qiov);
        if (ret < 0) {
@@ -565,6 +563,53 @@ static int block_crypto_create_luks(const char *filename,
                                       filename, opts, errp);
 }

+static int block_crypto_get_info_luks(BlockDriverState *bs,
+                                      BlockDriverInfo *bdi)
+{
+    BlockDriverInfo subbdi;
+    int ret;
+
+    ret = bdrv_get_info(bs->file->bs, &subbdi);
+    if (ret != 0) {
+        return ret;
+    }
+
+    bdi->unallocated_blocks_are_zero = false;
+    bdi->can_write_zeroes_with_unmap = false;
+    bdi->cluster_size = subbdi.cluster_size;
+
+    return 0;
+}
+
+static ImageInfoSpecific *
+block_crypto_get_specific_info_luks(BlockDriverState *bs)
+{
+    BlockCrypto *crypto = bs->opaque;
+    ImageInfoSpecific *spec_info;
+    QCryptoBlockInfo *info;
+
+    info = qcrypto_block_get_info(crypto->block, NULL);
+    if (!info) {
+        return NULL;
+    }
+    if (info->format != Q_CRYPTO_BLOCK_FORMAT_LUKS) {
+        qapi_free_QCryptoBlockInfo(info);
+        return NULL;
+    }
+
+    spec_info = g_new(ImageInfoSpecific, 1);
+    spec_info->type = IMAGE_INFO_SPECIFIC_KIND_LUKS;
+    spec_info->u.luks.data = g_new(QCryptoBlockInfoLUKS, 1);
+    *spec_info->u.luks.data = info->u.luks;
+
+    /* Blank out pointers we've just stolen to avoid double free */
+    memset(&info->u.luks, 0, sizeof(info->u.luks));
+
+    qapi_free_QCryptoBlockInfo(info);
+
+    return spec_info;
+}
+
 BlockDriver bdrv_crypto_luks = {
    .format_name        = "luks",
    .instance_size      = sizeof(BlockCrypto),
@@ -578,6 +623,8 @@ BlockDriver bdrv_crypto_luks = {
    .bdrv_co_readv      = block_crypto_co_readv,
    .bdrv_co_writev     = block_crypto_co_writev,
    .bdrv_getlength     = block_crypto_getlength,
+    .bdrv_get_info      = block_crypto_get_info_luks,
+    .bdrv_get_specific_info = block_crypto_get_specific_info_luks,
 };

 static void block_crypto_init(void)
--- a/block/curl.c
+++ b/block/curl.c
@@ -169,7 +169,7 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
    state->sock_fd = fd;
    s = state->s;

-    DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
+    DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, (int)fd);
    switch (action) {
        case CURL_POLL_IN:
            aio_set_fd_handler(s->aio_context, fd, false,
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -326,14 +326,14 @@ void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
 }

 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                           int64_t cur_sector, int nr_sectors)
+                           int64_t cur_sector, int64_t nr_sectors)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
 }

 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                             int64_t cur_sector, int nr_sectors)
+                             int64_t cur_sector, int64_t nr_sectors)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
@@ -361,7 +361,7 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
 }

 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
-                    int nr_sectors)
+                    int64_t nr_sectors)
 {
    BdrvDirtyBitmap *bitmap;
    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -86,7 +86,7 @@ static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result)
    uint64_t buffer;
    int ret;

-    ret = bdrv_pread(bs->file->bs, offset, &buffer, 8);
+    ret = bdrv_pread(bs->file, offset, &buffer, 8);
    if (ret < 0) {
        return ret;
    }
@@ -100,7 +100,7 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
    uint32_t buffer;
    int ret;

-    ret = bdrv_pread(bs->file->bs, offset, &buffer, 4);
+    ret = bdrv_pread(bs->file, offset, &buffer, 4);
    if (ret < 0) {
        return ret;
    }
@@ -153,8 +153,9 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
    }
 }

-static int64_t dmg_find_koly_offset(BlockDriverState *file_bs, Error **errp)
+static int64_t dmg_find_koly_offset(BdrvChild *file, Error **errp)
 {
+    BlockDriverState *file_bs = file->bs;
    int64_t length;
    int64_t offset = 0;
    uint8_t buffer[515];
@@ -178,7 +179,7 @@ static int64_t dmg_find_koly_offset(BlockDriverState *file_bs, Error **errp)
        offset = length - 511 - 512;
    }
    length = length < 515 ? length : 515;
-    ret = bdrv_pread(file_bs, offset, buffer, length);
+    ret = bdrv_pread(file, offset, buffer, length);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Failed while reading UDIF trailer");
        return ret;
@@ -355,7 +356,7 @@ static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
        offset += 4;

        buffer = g_realloc(buffer, count);
-        ret = bdrv_pread(bs->file->bs, offset, buffer, count);
+        ret = bdrv_pread(bs->file, offset, buffer, count);
        if (ret < 0) {
            goto fail;
        }
@@ -392,7 +393,7 @@ static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,

    buffer = g_malloc(info_length + 1);
    buffer[info_length] = '\0';
-    ret = bdrv_pread(bs->file->bs, info_begin, buffer, info_length);
+    ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
    if (ret != info_length) {
        ret = -EINVAL;
        goto fail;
@@ -438,8 +439,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
    int64_t offset;
    int ret;

-    bs->read_only = 1;
-    bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
+    bs->read_only = true;

    s->n_chunks = 0;
    s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
@@ -449,7 +449,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
    ds.max_sectors_per_chunk = 1;

    /* locate the UDIF trailer */
-    offset = dmg_find_koly_offset(bs->file->bs, errp);
+    offset = dmg_find_koly_offset(bs->file, errp);
    if (offset < 0) {
        ret = offset;
        goto fail;
@@ -547,6 +547,11 @@ fail:
    return ret;
 }

+static void dmg_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+}
+
 static inline int is_sector_in_chunk(BDRVDMGState* s,
                uint32_t chunk_num, uint64_t sector_num)
 {
@@ -595,7 +600,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
        case 0x80000005: { /* zlib compressed */
            /* we need to buffer, because only the chunk as whole can be
             * inflated. */
-            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
+            ret = bdrv_pread(bs->file, s->offsets[chunk],
                             s->compressed_chunk, s->lengths[chunk]);
            if (ret != s->lengths[chunk]) {
                return -1;
@@ -619,7 +624,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
        case 0x80000006: /* bzip2 compressed */
            /* we need to buffer, because only the chunk as whole can be
             * inflated. */
-            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
+            ret = bdrv_pread(bs->file, s->offsets[chunk],
                             s->compressed_chunk, s->lengths[chunk]);
            if (ret != s->lengths[chunk]) {
                return -1;
@@ -644,7 +649,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
            break;
 #endif /* CONFIG_BZIP2 */
        case 1: /* copy */
-            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
+            ret = bdrv_pread(bs->file, s->offsets[chunk],
                             s->uncompressed_chunk, s->lengths[chunk]);
            if (ret != s->lengths[chunk]) {
                return -1;
@@ -720,6 +725,7 @@ static BlockDriver bdrv_dmg = {
    .instance_size  = sizeof(BDRVDMGState),
    .bdrv_probe     = dmg_probe,
    .bdrv_open      = dmg_open,
+    .bdrv_refresh_limits = dmg_refresh_limits,
    .bdrv_co_preadv = dmg_co_preadv,
    .bdrv_close     = dmg_close,
 };
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -11,7 +11,27 @@
 #include <glusterfs/api/glfs.h>
 #include "block/block_int.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
 #include "qemu/uri.h"
+#include "qemu/error-report.h"
+
+#define GLUSTER_OPT_FILENAME        "filename"
+#define GLUSTER_OPT_VOLUME          "volume"
+#define GLUSTER_OPT_PATH            "path"
+#define GLUSTER_OPT_TYPE            "type"
+#define GLUSTER_OPT_SERVER_PATTERN  "server."
+#define GLUSTER_OPT_HOST            "host"
+#define GLUSTER_OPT_PORT            "port"
+#define GLUSTER_OPT_TO              "to"
+#define GLUSTER_OPT_IPV4            "ipv4"
+#define GLUSTER_OPT_IPV6            "ipv6"
+#define GLUSTER_OPT_SOCKET          "socket"
+#define GLUSTER_OPT_DEBUG           "debug"
+#define GLUSTER_DEFAULT_PORT        24007
+#define GLUSTER_DEBUG_DEFAULT       4
+#define GLUSTER_DEBUG_MAX           9
+
+#define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n"

 typedef struct GlusterAIOCB {
    int64_t size;
@@ -28,27 +48,141 @@ typedef struct BDRVGlusterState {
    int debug_level;
 } BDRVGlusterState;

-typedef struct GlusterConf {
-    char *server;
-    int port;
-    char *volname;
-    char *image;
-    char *transport;
-    int debug_level;
-} GlusterConf;
+typedef struct BDRVGlusterReopenState {
+    struct glfs *glfs;
+    struct glfs_fd *fd;
+} BDRVGlusterReopenState;

-static void qemu_gluster_gconf_free(GlusterConf *gconf)
-{
-    if (gconf) {
-        g_free(gconf->server);
-        g_free(gconf->volname);
-        g_free(gconf->image);
-        g_free(gconf->transport);
-        g_free(gconf);
+
+static QemuOptsList qemu_gluster_create_opts = {
+    .name = "qemu-gluster-create-opts",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
+    .desc = {
+        {
+            .name = BLOCK_OPT_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Virtual disk size"
+        },
+        {
+            .name = BLOCK_OPT_PREALLOC,
+            .type = QEMU_OPT_STRING,
+            .help = "Preallocation mode (allowed values: off, full)"
+        },
+        {
+            .name = GLUSTER_OPT_DEBUG,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Gluster log level, valid range is 0-9",
+        },
+        { /* end of list */ }
    }
-}
+};

-static int parse_volume_options(GlusterConf *gconf, char *path)
+static QemuOptsList runtime_opts = {
+    .name = "gluster",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = GLUSTER_OPT_FILENAME,
+            .type = QEMU_OPT_STRING,
+            .help = "URL to the gluster image",
+        },
+        {
+            .name = GLUSTER_OPT_DEBUG,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Gluster log level, valid range is 0-9",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList runtime_json_opts = {
+    .name = "gluster_json",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_json_opts.head),
+    .desc = {
+        {
+            .name = GLUSTER_OPT_VOLUME,
+            .type = QEMU_OPT_STRING,
+            .help = "name of gluster volume where VM image resides",
+        },
+        {
+            .name = GLUSTER_OPT_PATH,
+            .type = QEMU_OPT_STRING,
+            .help = "absolute path to image file in gluster volume",
+        },
+        {
+            .name = GLUSTER_OPT_DEBUG,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Gluster log level, valid range is 0-9",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList runtime_type_opts = {
+    .name = "gluster_type",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_type_opts.head),
+    .desc = {
+        {
+            .name = GLUSTER_OPT_TYPE,
+            .type = QEMU_OPT_STRING,
+            .help = "tcp|unix",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList runtime_unix_opts = {
+    .name = "gluster_unix",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_unix_opts.head),
+    .desc = {
+        {
+            .name = GLUSTER_OPT_SOCKET,
+            .type = QEMU_OPT_STRING,
+            .help = "socket file path)",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList runtime_tcp_opts = {
+    .name = "gluster_tcp",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
+    .desc = {
+        {
+            .name = GLUSTER_OPT_TYPE,
+            .type = QEMU_OPT_STRING,
+            .help = "tcp|unix",
+        },
+        {
+            .name = GLUSTER_OPT_HOST,
+            .type = QEMU_OPT_STRING,
+            .help = "host address (hostname/ipv4/ipv6 addresses)",
+        },
+        {
+            .name = GLUSTER_OPT_PORT,
+            .type = QEMU_OPT_NUMBER,
+            .help = "port number on which glusterd is listening (default 24007)",
+        },
+        {
+            .name = "to",
+            .type = QEMU_OPT_NUMBER,
+            .help = "max port number, not supported by gluster",
+        },
+        {
+            .name = "ipv4",
+            .type = QEMU_OPT_BOOL,
+            .help = "ipv4 bool value, not supported by gluster",
+        },
+        {
+            .name = "ipv6",
+            .type = QEMU_OPT_BOOL,
+            .help = "ipv6 bool value, not supported by gluster",
+        },
+        { /* end of list */ }
+    },
+};
+
+static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
 {
    char *p, *q;

@@ -62,31 +196,29 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
    if (*p == '\0') {
        return -EINVAL;
    }
-    gconf->volname = g_strndup(q, p - q);
+    gconf->volume = g_strndup(q, p - q);

-    /* image */
+    /* path */
    p += strspn(p, "/");
    if (*p == '\0') {
        return -EINVAL;
    }
-    gconf->image = g_strdup(p);
+    gconf->path = g_strdup(p);
    return 0;
 }

 /*
- * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
+ * file=gluster[+transport]://[host[:port]]/volume/path[?socket=...]
 *
 * 'gluster' is the protocol.
 *
 * 'transport' specifies the transport type used to connect to gluster
 * management daemon (glusterd). Valid transport types are
- * tcp, unix and rdma. If a transport type isn't specified, then tcp
- * type is assumed.
+ * tcp or unix. If a transport type isn't specified, then tcp type is assumed.
 *
- * 'server' specifies the server where the volume file specification for
- * the given volume resides. This can be either hostname, ipv4 address
- * or ipv6 address. ipv6 address needs to be within square brackets [ ].
- * If transport type is 'unix', then 'server' field should not be specified.
+ * 'host' specifies the host where the volume file specification for
+ * the given volume resides. This can be either hostname or ipv4 address.
+ * If transport type is 'unix', then 'host' field should not be specified.
 * The 'socket' field needs to be populated with the path to unix domain
 * socket.
 *
@@ -95,23 +227,22 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
 * default port. If the transport type is unix, then 'port' should not be
 * specified.
 *
- * 'volname' is the name of the gluster volume which contains the VM image.
+ * 'volume' is the name of the gluster volume which contains the VM image.
 *
- * 'image' is the path to the actual VM image that resides on gluster volume.
+ * 'path' is the path to the actual VM image that resides on gluster volume.
 *
 * Examples:
 *
 * file=gluster://1.2.3.4/testvol/a.img
 * file=gluster+tcp://1.2.3.4/testvol/a.img
 * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
- * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
- * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
- * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
+ * file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img
 * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
- * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
 */
-static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
+static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
+                                  const char *filename)
 {
+    GlusterServer *gsconf;
    URI *uri;
    QueryParams *qp = NULL;
    bool is_unix = false;
@@ -122,16 +253,21 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
        return -EINVAL;
    }

+    gconf->server = g_new0(GlusterServerList, 1);
+    gconf->server->value = gsconf = g_new0(GlusterServer, 1);
+
    /* transport */
    if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
-        gconf->transport = g_strdup("tcp");
+        gsconf->type = GLUSTER_TRANSPORT_TCP;
    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
-        gconf->transport = g_strdup("tcp");
+        gsconf->type = GLUSTER_TRANSPORT_TCP;
    } else if (!strcmp(uri->scheme, "gluster+unix")) {
-        gconf->transport = g_strdup("unix");
+        gsconf->type = GLUSTER_TRANSPORT_UNIX;
        is_unix = true;
    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
-        gconf->transport = g_strdup("rdma");
+        gsconf->type = GLUSTER_TRANSPORT_TCP;
+        error_report("Warning: rdma feature is not supported, falling "
+                     "back to tcp");
    } else {
        ret = -EINVAL;
        goto out;
@@ -157,10 +293,14 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
            ret = -EINVAL;
            goto out;
        }
-        gconf->server = g_strdup(qp->p[0].value);
+        gsconf->u.q_unix.path = g_strdup(qp->p[0].value);
    } else {
-        gconf->server = g_strdup(uri->server ? uri->server : "localhost");
-        gconf->port = uri->port;
+        gsconf->u.tcp.host = g_strdup(uri->server ? uri->server : "localhost");
+        if (uri->port) {
+            gsconf->u.tcp.port = g_strdup_printf("%d", uri->port);
+        } else {
+            gsconf->u.tcp.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
+        }
    }

 out:
@@ -171,30 +311,34 @@ out:
    return ret;
 }

-static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
-                                      Error **errp)
+static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
+                                           Error **errp)
 {
-    struct glfs *glfs = NULL;
+    struct glfs *glfs;
    int ret;
    int old_errno;
+    GlusterServerList *server;

-    ret = qemu_gluster_parseuri(gconf, filename);
-    if (ret < 0) {
-        error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/"
-                   "volname/image[?socket=...]");
-        errno = -ret;
-        goto out;
-    }
-
-    glfs = glfs_new(gconf->volname);
+    glfs = glfs_new(gconf->volume);
    if (!glfs) {
        goto out;
    }

-    ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
-            gconf->port);
-    if (ret < 0) {
-        goto out;
+    for (server = gconf->server; server; server = server->next) {
+        if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
+            ret = glfs_set_volfile_server(glfs,
+                                   GlusterTransport_lookup[server->value->type],
+                                   server->value->u.q_unix.path, 0);
+        } else {
+            ret = glfs_set_volfile_server(glfs,
+                                   GlusterTransport_lookup[server->value->type],
+                                   server->value->u.tcp.host,
+                                   atoi(server->value->u.tcp.port));
+        }
+
+        if (ret < 0) {
+            goto out;
+        }
    }

    ret = glfs_set_logging(glfs, "-", gconf->debug_level);
@@ -204,15 +348,25 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,

    ret = glfs_init(glfs);
    if (ret) {
-        error_setg_errno(errp, errno,
-                         "Gluster connection failed for server=%s port=%d "
-                         "volume=%s image=%s transport=%s", gconf->server,
-                         gconf->port, gconf->volname, gconf->image,
-                         gconf->transport);
+        error_setg(errp, "Gluster connection for volume %s, path %s failed"
+                         " to connect", gconf->volume, gconf->path);
+        for (server = gconf->server; server; server = server->next) {
+            if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
+                error_append_hint(errp, "hint: failed on socket %s ",
+                                  server->value->u.q_unix.path);
+            } else {
+                error_append_hint(errp, "hint: failed on host %s and port %s ",
+                                  server->value->u.tcp.host,
+                                  server->value->u.tcp.port);
+            }
+        }
+
+        error_append_hint(errp, "Please refer to gluster logs for more info\n");

        /* glfs_init sometimes doesn't set errno although docs suggest that */
-        if (errno == 0)
+        if (errno == 0) {
            errno = EINVAL;
+        }

        goto out;
    }
@@ -227,13 +381,233 @@ out:
    return NULL;
 }

+static int qapi_enum_parse(const char *opt)
+{
+    int i;
+
+    if (!opt) {
+        return GLUSTER_TRANSPORT__MAX;
+    }
+
+    for (i = 0; i < GLUSTER_TRANSPORT__MAX; i++) {
+        if (!strcmp(opt, GlusterTransport_lookup[i])) {
+            return i;
+        }
+    }
+
+    return i;
+}
+
+/*
+ * Convert the json formatted command line into qapi.
+*/
+static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
+                                  QDict *options, Error **errp)
+{
+    QemuOpts *opts;
+    GlusterServer *gsconf;
+    GlusterServerList *curr = NULL;
+    QDict *backing_options = NULL;
+    Error *local_err = NULL;
+    char *str = NULL;
+    const char *ptr;
+    size_t num_servers;
+    int i;
+
+    /* create opts info from runtime_json_opts list */
+    opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    num_servers = qdict_array_entries(options, GLUSTER_OPT_SERVER_PATTERN);
+    if (num_servers < 1) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER, "server");
+        goto out;
+    }
+
+    ptr = qemu_opt_get(opts, GLUSTER_OPT_VOLUME);
+    if (!ptr) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_VOLUME);
+        goto out;
+    }
+    gconf->volume = g_strdup(ptr);
+
+    ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH);
+    if (!ptr) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_PATH);
+        goto out;
+    }
+    gconf->path = g_strdup(ptr);
+    qemu_opts_del(opts);
+
+    for (i = 0; i < num_servers; i++) {
+        str = g_strdup_printf(GLUSTER_OPT_SERVER_PATTERN"%d.", i);
+        qdict_extract_subqdict(options, &backing_options, str);
+
+        /* create opts info from runtime_type_opts list */
+        opts = qemu_opts_create(&runtime_type_opts, NULL, 0, &error_abort);
+        qemu_opts_absorb_qdict(opts, backing_options, &local_err);
+        if (local_err) {
+            goto out;
+        }
+
+        ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE);
+        gsconf = g_new0(GlusterServer, 1);
+        gsconf->type = qapi_enum_parse(ptr);
+        if (!ptr) {
+            error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE);
+            error_append_hint(&local_err, GERR_INDEX_HINT, i);
+            goto out;
+
+        }
+        if (gsconf->type == GLUSTER_TRANSPORT__MAX) {
+            error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE,
+                       GLUSTER_OPT_TYPE, "tcp or unix");
+            error_append_hint(&local_err, GERR_INDEX_HINT, i);
+            goto out;
+        }
+        qemu_opts_del(opts);
+
+        if (gsconf->type == GLUSTER_TRANSPORT_TCP) {
+            /* create opts info from runtime_tcp_opts list */
+            opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
+            qemu_opts_absorb_qdict(opts, backing_options, &local_err);
+            if (local_err) {
+                goto out;
+            }
+
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_HOST);
+            if (!ptr) {
+                error_setg(&local_err, QERR_MISSING_PARAMETER,
+                           GLUSTER_OPT_HOST);
+                error_append_hint(&local_err, GERR_INDEX_HINT, i);
+                goto out;
+            }
+            gsconf->u.tcp.host = g_strdup(ptr);
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT);
+            if (!ptr) {
+                error_setg(&local_err, QERR_MISSING_PARAMETER,
+                           GLUSTER_OPT_PORT);
+                error_append_hint(&local_err, GERR_INDEX_HINT, i);
+                goto out;
+            }
+            gsconf->u.tcp.port = g_strdup(ptr);
+
+            /* defend for unsupported fields in InetSocketAddress,
+             * i.e. @ipv4, @ipv6  and @to
+             */
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_TO);
+            if (ptr) {
+                gsconf->u.tcp.has_to = true;
+            }
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4);
+            if (ptr) {
+                gsconf->u.tcp.has_ipv4 = true;
+            }
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6);
+            if (ptr) {
+                gsconf->u.tcp.has_ipv6 = true;
+            }
+            if (gsconf->u.tcp.has_to) {
+                error_setg(&local_err, "Parameter 'to' not supported");
+                goto out;
+            }
+            if (gsconf->u.tcp.has_ipv4 || gsconf->u.tcp.has_ipv6) {
+                error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported");
+                goto out;
+            }
+            qemu_opts_del(opts);
+        } else {
+            /* create opts info from runtime_unix_opts list */
+            opts = qemu_opts_create(&runtime_unix_opts, NULL, 0, &error_abort);
+            qemu_opts_absorb_qdict(opts, backing_options, &local_err);
+            if (local_err) {
+                goto out;
+            }
+
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET);
+            if (!ptr) {
+                error_setg(&local_err, QERR_MISSING_PARAMETER,
+                           GLUSTER_OPT_SOCKET);
+                error_append_hint(&local_err, GERR_INDEX_HINT, i);
+                goto out;
+            }
+            gsconf->u.q_unix.path = g_strdup(ptr);
+            qemu_opts_del(opts);
+        }
+
+        if (gconf->server == NULL) {
+            gconf->server = g_new0(GlusterServerList, 1);
+            gconf->server->value = gsconf;
+            curr = gconf->server;
+        } else {
+            curr->next = g_new0(GlusterServerList, 1);
+            curr->next->value = gsconf;
+            curr = curr->next;
+        }
+
+        qdict_del(backing_options, str);
+        g_free(str);
+        str = NULL;
+    }
+
+    return 0;
+
+out:
+    error_propagate(errp, local_err);
+    qemu_opts_del(opts);
+    if (str) {
+        qdict_del(backing_options, str);
+        g_free(str);
+    }
+    errno = EINVAL;
+    return -errno;
+}
+
+static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
+                                      const char *filename,
+                                      QDict *options, Error **errp)
+{
+    int ret;
+    if (filename) {
+        ret = qemu_gluster_parse_uri(gconf, filename);
+        if (ret < 0) {
+            error_setg(errp, "invalid URI");
+            error_append_hint(errp, "Usage: file=gluster[+transport]://"
+                                    "[host[:port]]/volume/path[?socket=...]\n");
+            errno = -ret;
+            return NULL;
+        }
+    } else {
+        ret = qemu_gluster_parse_json(gconf, options, errp);
+        if (ret < 0) {
+            error_append_hint(errp, "Usage: "
+                             "-drive driver=qcow2,file.driver=gluster,"
+                             "file.volume=testvol,file.path=/path/a.qcow2"
+                             "[,file.debug=9],file.server.0.type=tcp,"
+                             "file.server.0.host=1.2.3.4,"
+                             "file.server.0.port=24007,"
+                             "file.server.1.transport=unix,"
+                             "file.server.1.socket=/var/run/glusterd.socket ..."
+                             "\n");
+            errno = -ret;
+            return NULL;
+        }
+
+    }
+
+    return qemu_gluster_glfs_init(gconf, errp);
+}
+
 static void qemu_gluster_complete_aio(void *opaque)
 {
    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;

    qemu_bh_delete(acb->bh);
    acb->bh = NULL;
-    qemu_coroutine_enter(acb->coroutine, NULL);
+    qemu_coroutine_enter(acb->coroutine);
 }

 /*
@@ -255,30 +629,6 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
    qemu_bh_schedule(acb->bh);
 }

-#define GLUSTER_OPT_FILENAME "filename"
-#define GLUSTER_OPT_DEBUG "debug"
-#define GLUSTER_DEBUG_DEFAULT 4
-#define GLUSTER_DEBUG_MAX 9
-
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "gluster",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = GLUSTER_OPT_FILENAME,
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the gluster image",
-        },
-        {
-            .name = GLUSTER_OPT_DEBUG,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Gluster log level, valid range is 0-9",
-        },
-        { /* end of list */ }
-    },
-};
-
 static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
 {
    assert(open_flags != NULL);
@@ -324,7 +674,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
    BDRVGlusterState *s = bs->opaque;
    int open_flags = 0;
    int ret = 0;
-    GlusterConf *gconf = g_new0(GlusterConf, 1);
+    BlockdevOptionsGluster *gconf = NULL;
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename;
@@ -347,8 +697,10 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
        s->debug_level = GLUSTER_DEBUG_MAX;
    }

+    gconf = g_new0(BlockdevOptionsGluster, 1);
    gconf->debug_level = s->debug_level;
-    s->glfs = qemu_gluster_init(gconf, filename, errp);
+    gconf->has_debug_level = true;
+    s->glfs = qemu_gluster_init(gconf, filename, options, errp);
    if (!s->glfs) {
        ret = -errno;
        goto out;
@@ -373,7 +725,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,

    qemu_gluster_parse_flags(bdrv_flags, &open_flags);

-    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
+    s->fd = glfs_open(s->glfs, gconf->path, open_flags);
    if (!s->fd) {
        ret = -errno;
    }
@@ -382,7 +734,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,

 out:
    qemu_opts_del(opts);
-    qemu_gluster_gconf_free(gconf);
+    qapi_free_BlockdevOptionsGluster(gconf);
    if (!ret) {
        return ret;
    }
@@ -395,19 +747,13 @@ out:
    return ret;
 }

-typedef struct BDRVGlusterReopenState {
-    struct glfs *glfs;
-    struct glfs_fd *fd;
-} BDRVGlusterReopenState;
-
-
 static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
                                       BlockReopenQueue *queue, Error **errp)
 {
    int ret = 0;
    BDRVGlusterState *s;
    BDRVGlusterReopenState *reop_s;
-    GlusterConf *gconf = NULL;
+    BlockdevOptionsGluster *gconf;
    int open_flags = 0;

    assert(state != NULL);
@@ -420,10 +766,10 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,

    qemu_gluster_parse_flags(state->flags, &open_flags);

-    gconf = g_new0(GlusterConf, 1);
-
+    gconf = g_new0(BlockdevOptionsGluster, 1);
    gconf->debug_level = s->debug_level;
-    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
+    gconf->has_debug_level = true;
+    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp);
    if (reop_s->glfs == NULL) {
        ret = -errno;
        goto exit;
@@ -439,7 +785,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
    }
 #endif

-    reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
+    reop_s->fd = glfs_open(reop_s->glfs, gconf->path, open_flags);
    if (reop_s->fd == NULL) {
        /* reops->glfs will be cleaned up in _abort */
        ret = -errno;
@@ -448,7 +794,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,

 exit:
    /* state->opaque will be freed in either the _abort or _commit */
-    qemu_gluster_gconf_free(gconf);
+    qapi_free_BlockdevOptionsGluster(gconf);
    return ret;
 }

@@ -501,7 +847,9 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)

 #ifdef CONFIG_GLUSTERFS_ZEROFILL
 static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
-        int64_t offset, int size, BdrvRequestFlags flags)
+                                                      int64_t offset,
+                                                      int size,
+                                                      BdrvRequestFlags flags)
 {
    int ret;
    GlusterAIOCB acb;
@@ -527,7 +875,7 @@ static inline bool gluster_supports_zerofill(void)
 }

 static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
-        int64_t size)
+                                        int64_t size)
 {
    return glfs_zerofill(fd, offset, size);
 }
@@ -539,7 +887,7 @@ static inline bool gluster_supports_zerofill(void)
 }

 static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
-        int64_t size)
+                                        int64_t size)
 {
    return 0;
 }
@@ -548,14 +896,15 @@ static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
 static int qemu_gluster_create(const char *filename,
                               QemuOpts *opts, Error **errp)
 {
+    BlockdevOptionsGluster *gconf;
    struct glfs *glfs;
    struct glfs_fd *fd;
    int ret = 0;
    int prealloc = 0;
    int64_t total_size = 0;
    char *tmp = NULL;
-    GlusterConf *gconf = g_new0(GlusterConf, 1);

+    gconf = g_new0(BlockdevOptionsGluster, 1);
    gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
                                                 GLUSTER_DEBUG_DEFAULT);
    if (gconf->debug_level < 0) {
@@ -563,8 +912,9 @@ static int qemu_gluster_create(const char *filename,
    } else if (gconf->debug_level > GLUSTER_DEBUG_MAX) {
        gconf->debug_level = GLUSTER_DEBUG_MAX;
    }
+    gconf->has_debug_level = true;

-    glfs = qemu_gluster_init(gconf, filename, errp);
+    glfs = qemu_gluster_init(gconf, filename, NULL, errp);
    if (!glfs) {
        ret = -errno;
        goto out;
@@ -576,19 +926,17 @@ static int qemu_gluster_create(const char *filename,
    tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
    if (!tmp || !strcmp(tmp, "off")) {
        prealloc = 0;
-    } else if (!strcmp(tmp, "full") &&
-               gluster_supports_zerofill()) {
+    } else if (!strcmp(tmp, "full") && gluster_supports_zerofill()) {
        prealloc = 1;
    } else {
        error_setg(errp, "Invalid preallocation mode: '%s'"
-            " or GlusterFS doesn't support zerofill API",
-            tmp);
+                         " or GlusterFS doesn't support zerofill API", tmp);
        ret = -EINVAL;
        goto out;
    }

-    fd = glfs_creat(glfs, gconf->image,
-        O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
+    fd = glfs_creat(glfs, gconf->path,
+                    O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
    if (!fd) {
        ret = -errno;
    } else {
@@ -606,7 +954,7 @@ static int qemu_gluster_create(const char *filename,
    }
 out:
    g_free(tmp);
-    qemu_gluster_gconf_free(gconf);
+    qapi_free_BlockdevOptionsGluster(gconf);
    if (glfs) {
        glfs_fini(glfs);
    }
@@ -614,7 +962,8 @@ out:
 }

 static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
+                                           int64_t sector_num, int nb_sectors,
+                                           QEMUIOVector *qiov, int write)
 {
    int ret;
    GlusterAIOCB acb;
@@ -629,10 +978,10 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,

    if (write) {
        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
-            gluster_finish_aiocb, &acb);
+                                 gluster_finish_aiocb, &acb);
    } else {
        ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
-            gluster_finish_aiocb, &acb);
+                                gluster_finish_aiocb, &acb);
    }

    if (ret < 0) {
@@ -657,13 +1006,17 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
 }

 static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+                                              int64_t sector_num,
+                                              int nb_sectors,
+                                              QEMUIOVector *qiov)
 {
    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
 }

 static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+                                               int64_t sector_num,
+                                               int nb_sectors,
+                                               QEMUIOVector *qiov)
 {
    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
 }
@@ -724,14 +1077,12 @@ error:
 }

 #ifdef CONFIG_GLUSTERFS_DISCARD
-static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors)
+static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
+                                                 int64_t offset, int size)
 {
    int ret;
    GlusterAIOCB acb;
    BDRVGlusterState *s = bs->opaque;
-    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;

    acb.size = 0;
    acb.ret = 0;
@@ -934,34 +1285,11 @@ static int64_t coroutine_fn qemu_gluster_co_get_block_status(
 }


-static QemuOptsList qemu_gluster_create_opts = {
-    .name = "qemu-gluster-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_PREALLOC,
-            .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode (allowed values: off, full)"
-        },
-        {
-            .name = GLUSTER_OPT_DEBUG,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Gluster log level, valid range is 0-9",
-        },
-        { /* end of list */ }
-    }
-};
-
 static BlockDriver bdrv_gluster = {
    .format_name                  = "gluster",
    .protocol_name                = "gluster",
    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
+    .bdrv_needs_filename          = false,
    .bdrv_file_open               = qemu_gluster_open,
    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
@@ -976,7 +1304,7 @@ static BlockDriver bdrv_gluster = {
    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
+    .bdrv_co_pdiscard             = qemu_gluster_co_pdiscard,
 #endif
 #ifdef CONFIG_GLUSTERFS_ZEROFILL
    .bdrv_co_pwrite_zeroes        = qemu_gluster_co_pwrite_zeroes,
@@ -989,7 +1317,7 @@ static BlockDriver bdrv_gluster_tcp = {
    .format_name                  = "gluster",
    .protocol_name                = "gluster+tcp",
    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
+    .bdrv_needs_filename          = false,
    .bdrv_file_open               = qemu_gluster_open,
    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
@@ -1004,7 +1332,7 @@ static BlockDriver bdrv_gluster_tcp = {
    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
+    .bdrv_co_pdiscard             = qemu_gluster_co_pdiscard,
 #endif
 #ifdef CONFIG_GLUSTERFS_ZEROFILL
    .bdrv_co_pwrite_zeroes        = qemu_gluster_co_pwrite_zeroes,
@@ -1032,7 +1360,7 @@ static BlockDriver bdrv_gluster_unix = {
    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
+    .bdrv_co_pdiscard             = qemu_gluster_co_pdiscard,
 #endif
 #ifdef CONFIG_GLUSTERFS_ZEROFILL
    .bdrv_co_pwrite_zeroes        = qemu_gluster_co_pwrite_zeroes,
@@ -1041,6 +1369,12 @@ static BlockDriver bdrv_gluster_unix = {
    .create_opts                  = &qemu_gluster_create_opts,
 };

+/* rdma is deprecated (actually never supported for volfile fetch).
+ * Let's maintain it for the protocol compatibility, to make sure things
+ * won't break immediately. For now, gluster+rdma will fall back to gluster+tcp
+ * protocol with a warning.
+ * TODO: remove gluster+rdma interface support
+ */
 static BlockDriver bdrv_gluster_rdma = {
    .format_name                  = "gluster",
    .protocol_name                = "gluster+rdma",
@@ -1060,7 +1394,7 @@ static BlockDriver bdrv_gluster_rdma = {
    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
+    .bdrv_co_pdiscard             = qemu_gluster_co_pdiscard,
 #endif
 #ifdef CONFIG_GLUSTERFS_ZEROFILL
    .bdrv_co_pwrite_zeroes        = qemu_gluster_co_pwrite_zeroes,
--- a/block/io.c
+++ b/block/io.c
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -2,7 +2,7 @@
 * QEMU Block driver for iSCSI images
 *
 * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
- * Copyright (c) 2012-2015 Peter Lieven <pl@kamp.de>
+ * Copyright (c) 2012-2016 Peter Lieven <pl@kamp.de>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -46,7 +46,6 @@

 #ifdef __linux__
 #include <scsi/sg.h>
-#include <block/scsi.h>
 #endif

 typedef struct IscsiLun {
@@ -62,7 +61,23 @@ typedef struct IscsiLun {
    struct scsi_inquiry_logical_block_provisioning lbp;
    struct scsi_inquiry_block_limits bl;
    unsigned char *zeroblock;
-    unsigned long *allocationmap;
+    /* The allocmap tracks which clusters (pages) on the iSCSI target are
+     * allocated and which are not. In case a target returns zeros for
+     * unallocated pages (iscsilun->lprz) we can directly return zeros instead
+     * of reading zeros over the wire if a read request falls within an
+     * unallocated block. As there are 3 possible states we need 2 bitmaps to
+     * track. allocmap_valid keeps track if QEMU's information about a page is
+     * valid. allocmap tracks if a page is allocated or not. In case QEMU has no
+     * valid information about a page the corresponding allocmap entry should be
+     * switched to unallocated as well to force a new lookup of the allocation
+     * status as lookups are generally skipped if a page is suspect to be
+     * allocated. If a iSCSI target is opened with cache.direct = on the
+     * allocmap_valid does not exist turning all cached information invalid so
+     * that a fresh lookup is made for any page even if allocmap entry returns
+     * it's unallocated. */
+    unsigned long *allocmap;
+    unsigned long *allocmap_valid;
+    long allocmap_size;
    int cluster_sectors;
    bool use_16_for_rw;
    bool write_protected;
@@ -153,7 +168,7 @@ static void iscsi_co_generic_bh_cb(void *opaque)
    struct IscsiTask *iTask = opaque;
    iTask->complete = 1;
    qemu_bh_delete(iTask->bh);
-    qemu_coroutine_enter(iTask->co, NULL);
+    qemu_coroutine_enter(iTask->co);
 }

 static void iscsi_retry_timer_expired(void *opaque)
@@ -161,7 +176,7 @@ static void iscsi_retry_timer_expired(void *opaque)
    struct IscsiTask *iTask = opaque;
    iTask->complete = 1;
    if (iTask->co) {
-        qemu_coroutine_enter(iTask->co, NULL);
+        qemu_coroutine_enter(iTask->co);
    }
 }

@@ -423,37 +438,135 @@ static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
                                       iscsilun);
 }

-static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
+static void iscsi_allocmap_free(IscsiLun *iscsilun)
 {
-    return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
-                                                       iscsilun),
-                                       iscsilun->cluster_sectors));
+    g_free(iscsilun->allocmap);
+    g_free(iscsilun->allocmap_valid);
+    iscsilun->allocmap = NULL;
+    iscsilun->allocmap_valid = NULL;
 }

-static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
-                                    int nb_sectors)
+
+static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
 {
-    if (iscsilun->allocationmap == NULL) {
-        return;
+    iscsi_allocmap_free(iscsilun);
+
+    iscsilun->allocmap_size =
+        DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks, iscsilun),
+                     iscsilun->cluster_sectors);
+
+    iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
+    if (!iscsilun->allocmap) {
+        return -ENOMEM;
    }
-    bitmap_set(iscsilun->allocationmap,
-               sector_num / iscsilun->cluster_sectors,
-               DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
+
+    if (open_flags & BDRV_O_NOCACHE) {
+        /* in case that cache.direct = on all allocmap entries are
+         * treated as invalid to force a relookup of the block
+         * status on every read request */
+        return 0;
+    }
+
+    iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size);
+    if (!iscsilun->allocmap_valid) {
+        /* if we are under memory pressure free the allocmap as well */
+        iscsi_allocmap_free(iscsilun);
+        return -ENOMEM;
+    }
+
+    return 0;
 }

-static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
-                                      int nb_sectors)
+static void
+iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
+                      int nb_sectors, bool allocated, bool valid)
 {
-    int64_t cluster_num, nb_clusters;
-    if (iscsilun->allocationmap == NULL) {
+    int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
+
+    if (iscsilun->allocmap == NULL) {
        return;
    }
-    cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
-    nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
-                  - cluster_num;
-    if (nb_clusters > 0) {
-        bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
+    /* expand to entirely contain all affected clusters */
+    cl_num_expanded = sector_num / iscsilun->cluster_sectors;
+    nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors,
+                                   iscsilun->cluster_sectors) - cl_num_expanded;
+    /* shrink to touch only completely contained clusters */
+    cl_num_shrunk = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
+    nb_cls_shrunk = (sector_num + nb_sectors) / iscsilun->cluster_sectors
+                      - cl_num_shrunk;
+    if (allocated) {
+        bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
+    } else {
+        bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
    }
+
+    if (iscsilun->allocmap_valid == NULL) {
+        return;
+    }
+    if (valid) {
+        bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
+    } else {
+        bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
+                     nb_cls_expanded);
+    }
+}
+
+static void
+iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num,
+                             int nb_sectors)
+{
+    iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true);
+}
+
+static void
+iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num,
+                               int nb_sectors)
+{
+    /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
+     * is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
+     */
+    iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true);
+}
+
+static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num,
+                                       int nb_sectors)
+{
+    iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false);
+}
+
+static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
+{
+    if (iscsilun->allocmap) {
+        bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size);
+    }
+    if (iscsilun->allocmap_valid) {
+        bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size);
+    }
+}
+
+static inline bool
+iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num,
+                            int nb_sectors)
+{
+    unsigned long size;
+    if (iscsilun->allocmap == NULL) {
+        return true;
+    }
+    size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
+    return !(find_next_bit(iscsilun->allocmap, size,
+                           sector_num / iscsilun->cluster_sectors) == size);
+}
+
+static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
+                                           int64_t sector_num, int nb_sectors)
+{
+    unsigned long size;
+    if (iscsilun->allocmap_valid == NULL) {
+        return false;
+    }
+    size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
+    return (find_next_zero_bit(iscsilun->allocmap_valid, size,
+                               sector_num / iscsilun->cluster_sectors) == size);
 }

 static int coroutine_fn
@@ -473,10 +586,8 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
        return -EINVAL;
    }

-    if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
-        error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
-                     "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
-        return -EINVAL;
+    if (bs->bl.max_transfer) {
+        assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
    }

    lba = sector_qemu2lun(sector_num, iscsilun);
@@ -515,26 +626,16 @@ retry:
    }

    if (iTask.status != SCSI_STATUS_GOOD) {
+        iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
        return iTask.err_code;
    }

-    iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
+    iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);

    return 0;
 }


-static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
-                                             int64_t sector_num, int nb_sectors)
-{
-    unsigned long size;
-    if (iscsilun->allocationmap == NULL) {
-        return true;
-    }
-    size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
-    return !(find_next_bit(iscsilun->allocationmap, size,
-                           sector_num / iscsilun->cluster_sectors) == size);
-}

 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
                                                  int64_t sector_num,
@@ -619,9 +720,9 @@ retry:
    }

    if (ret & BDRV_BLOCK_ZERO) {
-        iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
+        iscsi_allocmap_set_unallocated(iscsilun, sector_num, *pnum);
    } else {
-        iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
+        iscsi_allocmap_set_allocated(iscsilun, sector_num, *pnum);
    }

    if (*pnum > nb_sectors) {
@@ -650,23 +751,36 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
        return -EINVAL;
    }

-    if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
-        error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
-                     "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
-        return -EINVAL;
+    if (bs->bl.max_transfer) {
+        assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
    }

-    if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
-        !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
-        int64_t ret;
+    /* if cache.direct is off and we have a valid entry in our allocation map
+     * we can skip checking the block status and directly return zeroes if
+     * the request falls within an unallocated area */
+    if (iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
+        !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
+            qemu_iovec_memset(iov, 0, 0x00, iov->size);
+            return 0;
+    }
+
+    if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
+        !iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
+        !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
        int pnum;
        BlockDriverState *file;
-        ret = iscsi_co_get_block_status(bs, sector_num,
-                                        BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
+        /* check the block status from the beginning of the cluster
+         * containing the start sector */
+        int64_t ret = iscsi_co_get_block_status(bs,
+                          sector_num - sector_num % iscsilun->cluster_sectors,
+                          BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
        if (ret < 0) {
            return ret;
        }
-        if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
+        /* if the whole request falls into an unallocated area we can avoid
+         * to read and directly return zeroes instead */
+        if (ret & BDRV_BLOCK_ZERO &&
+            pnum >= nb_sectors + sector_num % iscsilun->cluster_sectors) {
            qemu_iovec_memset(iov, 0, 0x00, iov->size);
            return 0;
        }
@@ -928,29 +1042,26 @@ iscsi_getlength(BlockDriverState *bs)
 }

 static int
-coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
-                                   int nb_sectors)
+coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
 {
    IscsiLun *iscsilun = bs->opaque;
    struct IscsiTask iTask;
    struct unmap_list list;

-    if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
-        return -EINVAL;
-    }
+    assert(is_byte_request_lun_aligned(offset, count, iscsilun));

    if (!iscsilun->lbp.lbpu) {
        /* UNMAP is not supported by the target */
        return 0;
    }

-    list.lba = sector_qemu2lun(sector_num, iscsilun);
-    list.num = sector_qemu2lun(nb_sectors, iscsilun);
+    list.lba = offset / iscsilun->block_size;
+    list.num = count / iscsilun->block_size;

    iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
    if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
-                     iscsi_co_generic_cb, &iTask) == NULL) {
+                         iscsi_co_generic_cb, &iTask) == NULL) {
        return -ENOMEM;
    }

@@ -980,7 +1091,8 @@ retry:
        return iTask.err_code;
    }

-    iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
+    iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
+                               count >> BDRV_SECTOR_BITS);

    return 0;
 }
@@ -1070,15 +1182,17 @@ retry:
    }

    if (iTask.status != SCSI_STATUS_GOOD) {
+        iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
+                                   count >> BDRV_SECTOR_BITS);
        return iTask.err_code;
    }

    if (flags & BDRV_REQ_MAY_UNMAP) {
-        iscsi_allocationmap_clear(iscsilun, offset >> BDRV_SECTOR_BITS,
-                                  count >> BDRV_SECTOR_BITS);
+        iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
+                                   count >> BDRV_SECTOR_BITS);
    } else {
-        iscsi_allocationmap_set(iscsilun, offset >> BDRV_SECTOR_BITS,
-                                count >> BDRV_SECTOR_BITS);
+        iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS,
+                                     count >> BDRV_SECTOR_BITS);
    }

    return 0;
@@ -1589,14 +1703,13 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }
    bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
-    bs->request_alignment = iscsilun->block_size;

    /* We don't have any emulation for devices other than disks and CD-ROMs, so
     * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
     * will try to read from the device to guess the image format.
     */
    if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
-        bs->sg = 1;
+        bs->sg = true;
    }

    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
@@ -1652,10 +1765,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
                                     iscsilun->block_size) >> BDRV_SECTOR_BITS;
        if (iscsilun->lbprz) {
-            iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
-            if (iscsilun->allocationmap == NULL) {
-                ret = -ENOMEM;
-            }
+            ret = iscsi_allocmap_init(iscsilun, bs->open_flags);
        }
    }

@@ -1692,38 +1802,37 @@ static void iscsi_close(BlockDriverState *bs)
    }
    iscsi_destroy_context(iscsi);
    g_free(iscsilun->zeroblock);
-    g_free(iscsilun->allocationmap);
+    iscsi_allocmap_free(iscsilun);
    memset(iscsilun, 0, sizeof(IscsiLun));
 }

-static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
-{
-    return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
-}
-
 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    /* We don't actually refresh here, but just return data queried in
     * iscsi_open(): iscsi targets don't change their limits. */

    IscsiLun *iscsilun = bs->opaque;
-    uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
+    uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
+
+    bs->bl.request_alignment = iscsilun->block_size;

    if (iscsilun->bl.max_xfer_len) {
        max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
    }

-    bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
+    if (max_xfer_len * iscsilun->block_size < INT_MAX) {
+        bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
+    }

    if (iscsilun->lbp.lbpu) {
-        if (iscsilun->bl.max_unmap < 0xffffffff) {
-            bs->bl.max_discard =
-                sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
+        if (iscsilun->bl.max_unmap < 0xffffffff / iscsilun->block_size) {
+            bs->bl.max_pdiscard =
+                iscsilun->bl.max_unmap * iscsilun->block_size;
        }
-        bs->bl.discard_alignment =
-            sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
+        bs->bl.pdiscard_alignment =
+            iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
    } else {
-        bs->bl.discard_alignment = iscsilun->block_size >> BDRV_SECTOR_BITS;
+        bs->bl.pdiscard_alignment = iscsilun->block_size;
    }

    if (iscsilun->bl.max_ws_len < 0xffffffff / iscsilun->block_size) {
@@ -1736,8 +1845,11 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
    } else {
        bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
    }
-    bs->bl.opt_transfer_length =
-        sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
+    if (iscsilun->bl.opt_xfer_len &&
+        iscsilun->bl.opt_xfer_len < INT_MAX / iscsilun->block_size) {
+        bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
+                                        iscsilun->block_size);
+    }
 }

 /* Note that this will not re-establish a connection with an iSCSI target - it
@@ -1754,6 +1866,16 @@ static int iscsi_reopen_prepare(BDRVReopenState *state,
    return 0;
 }

+static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
+{
+    IscsiLun *iscsilun = reopen_state->bs->opaque;
+
+    /* the cache.direct status might have changed */
+    if (iscsilun->allocmap != NULL) {
+        iscsi_allocmap_init(iscsilun, reopen_state->flags);
+    }
+}
+
 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
 {
    IscsiLun *iscsilun = bs->opaque;
@@ -1773,9 +1895,8 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
        return -EINVAL;
    }

-    if (iscsilun->allocationmap != NULL) {
-        g_free(iscsilun->allocationmap);
-        iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
+    if (iscsilun->allocmap != NULL) {
+        iscsi_allocmap_init(iscsilun, bs->open_flags);
    }

    return 0;
@@ -1835,6 +1956,13 @@ static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    return 0;
 }

+static void iscsi_invalidate_cache(BlockDriverState *bs,
+                                   Error **errp)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    iscsi_allocmap_invalidate(iscsilun);
+}
+
 static QemuOptsList iscsi_create_opts = {
    .name = "iscsi-create-opts",
    .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
@@ -1858,7 +1986,9 @@ static BlockDriver bdrv_iscsi = {
    .bdrv_close      = iscsi_close,
    .bdrv_create     = iscsi_create,
    .create_opts     = &iscsi_create_opts,
-    .bdrv_reopen_prepare  = iscsi_reopen_prepare,
+    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
+    .bdrv_reopen_commit    = iscsi_reopen_commit,
+    .bdrv_invalidate_cache = iscsi_invalidate_cache,

    .bdrv_getlength  = iscsi_getlength,
    .bdrv_get_info   = iscsi_get_info,
@@ -1866,7 +1996,7 @@ static BlockDriver bdrv_iscsi = {
    .bdrv_refresh_limits = iscsi_refresh_limits,

    .bdrv_co_get_block_status = iscsi_co_get_block_status,
-    .bdrv_co_discard      = iscsi_co_discard,
+    .bdrv_co_pdiscard      = iscsi_co_pdiscard,
    .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
    .bdrv_co_readv         = iscsi_co_readv,
    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -28,8 +28,6 @@
 */
 #define MAX_EVENTS 128

-#define MAX_QUEUED_IO  128
-
 struct qemu_laiocb {
    BlockAIOCB common;
    Coroutine *co;
@@ -44,12 +42,15 @@ struct qemu_laiocb {

 typedef struct {
    int plugged;
-    unsigned int n;
+    unsigned int in_queue;
+    unsigned int in_flight;
    bool blocked;
    QSIMPLEQ_HEAD(, qemu_laiocb) pending;
 } LaioQueue;

 struct LinuxAioState {
+    AioContext *aio_context;
+
    io_context_t ctx;
    EventNotifier e;

@@ -87,14 +88,14 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
                qemu_iovec_memset(laiocb->qiov, ret, 0,
                    laiocb->qiov->size - ret);
            } else {
-                ret = -EINVAL;
+                ret = -ENOSPC;
            }
        }
    }

    laiocb->ret = ret;
    if (laiocb->co) {
-        qemu_coroutine_enter(laiocb->co, NULL);
+        qemu_coroutine_enter(laiocb->co);
    } else {
        laiocb->common.cb(laiocb->common.opaque, ret);
        qemu_aio_unref(laiocb);
@@ -129,6 +130,7 @@ static void qemu_laio_completion_bh(void *opaque)
            s->event_max = 0;
            return; /* no more events */
        }
+        s->io_q.in_flight -= s->event_max;
    }

    /* Reschedule so nested event loops see currently pending completions */
@@ -190,7 +192,8 @@ static void ioq_init(LaioQueue *io_q)
 {
    QSIMPLEQ_INIT(&io_q->pending);
    io_q->plugged = 0;
-    io_q->n = 0;
+    io_q->in_queue = 0;
+    io_q->in_flight = 0;
    io_q->blocked = false;
 }

@@ -198,14 +201,17 @@ static void ioq_submit(LinuxAioState *s)
 {
    int ret, len;
    struct qemu_laiocb *aiocb;
-    struct iocb *iocbs[MAX_QUEUED_IO];
+    struct iocb *iocbs[MAX_EVENTS];
    QSIMPLEQ_HEAD(, qemu_laiocb) completed;

    do {
+        if (s->io_q.in_flight >= MAX_EVENTS) {
+            break;
+        }
        len = 0;
        QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) {
            iocbs[len++] = &aiocb->iocb;
-            if (len == MAX_QUEUED_IO) {
+            if (s->io_q.in_flight + len >= MAX_EVENTS) {
                break;
            }
        }
@@ -215,27 +221,33 @@ static void ioq_submit(LinuxAioState *s)
            break;
        }
        if (ret < 0) {
-            abort();
+            /* Fail the first request, retry the rest */
+            aiocb = QSIMPLEQ_FIRST(&s->io_q.pending);
+            QSIMPLEQ_REMOVE_HEAD(&s->io_q.pending, next);
+            s->io_q.in_queue--;
+            aiocb->ret = ret;
+            qemu_laio_process_completion(aiocb);
+            continue;
        }

-        s->io_q.n -= ret;
+        s->io_q.in_flight += ret;
+        s->io_q.in_queue  -= ret;
        aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb);
        QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed);
    } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
-    s->io_q.blocked = (s->io_q.n > 0);
+    s->io_q.blocked = (s->io_q.in_queue > 0);
 }

 void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
 {
-    assert(!s->io_q.plugged);
-    s->io_q.plugged = 1;
+    s->io_q.plugged++;
 }

 void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
 {
    assert(s->io_q.plugged);
-    s->io_q.plugged = 0;
-    if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+    if (--s->io_q.plugged == 0 &&
+        !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
        ioq_submit(s);
    }
 }
@@ -263,9 +275,10 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));

    QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
-    s->io_q.n++;
+    s->io_q.in_queue++;
    if (!s->io_q.blocked &&
-        (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
+        (!s->io_q.plugged ||
+         s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) {
        ioq_submit(s);
    }

@@ -325,6 +338,7 @@ void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)

 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
 {
+    s->aio_context = new_context;
    s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
    aio_set_event_notifier(new_context, &s->e, false,
                           qemu_laio_completion_cb);
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -23,7 +23,9 @@

 #define SLICE_TIME    100000000ULL /* ns */
 #define MAX_IN_FLIGHT 16
-#define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
+#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
+#define DEFAULT_MIRROR_BUF_SIZE \
+    (MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)

 /* The mirroring buffer is a list of granularity-sized chunks.
 * Free chunks are organized in a list.
@@ -58,9 +60,10 @@ typedef struct MirrorBlockJob {
    QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
    int buf_free_count;

+    uint64_t last_pause_ns;
    unsigned long *in_flight_bitmap;
    int in_flight;
-    int sectors_in_flight;
+    int64_t sectors_in_flight;
    int ret;
    bool unmap;
    bool waiting_for_io;
@@ -121,7 +124,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
    g_free(op);

    if (s->waiting_for_io) {
-        qemu_coroutine_enter(s->common.co, NULL);
+        qemu_coroutine_enter(s->common.co);
    }
 }

@@ -303,8 +306,9 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
    s->in_flight++;
    s->sectors_in_flight += nb_sectors;
    if (is_discard) {
-        blk_aio_discard(s->target, sector_num, op->nb_sectors,
-                        mirror_write_complete, op);
+        blk_aio_pdiscard(s->target, sector_num << BDRV_SECTOR_BITS,
+                         op->nb_sectors << BDRV_SECTOR_BITS,
+                         mirror_write_complete, op);
    } else {
        blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE,
                              op->nb_sectors * BDRV_SECTOR_SIZE,
@@ -322,6 +326,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    int nb_chunks = 1;
    int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
    int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
+    int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
+                             MAX_IO_SECTORS);

    sector_num = hbitmap_iter_next(&s->hbi);
    if (sector_num < 0) {
@@ -372,7 +379,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
    while (nb_chunks > 0 && sector_num < end) {
        int ret;
-        int io_sectors;
+        int io_sectors, io_sectors_acct;
        BlockDriverState *file;
        enum MirrorMethod {
            MIRROR_METHOD_COPY,
@@ -385,7 +392,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
                                          nb_chunks * sectors_per_chunk,
                                          &io_sectors, &file);
        if (ret < 0) {
-            io_sectors = nb_chunks * sectors_per_chunk;
+            io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
+        } else if (ret & BDRV_BLOCK_DATA) {
+            io_sectors = MIN(io_sectors, max_io_sectors);
        }

        io_sectors -= io_sectors % sectors_per_chunk;
@@ -405,16 +414,30 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
            }
        }

+        while (s->in_flight >= MAX_IN_FLIGHT) {
+            trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+            mirror_wait_for_io(s);
+        }
+
+        if (s->ret < 0) {
+            return 0;
+        }
+
        mirror_clip_sectors(s, sector_num, &io_sectors);
        switch (mirror_method) {
        case MIRROR_METHOD_COPY:
            io_sectors = mirror_do_read(s, sector_num, io_sectors);
+            io_sectors_acct = io_sectors;
            break;
        case MIRROR_METHOD_ZERO:
-            mirror_do_zero_or_discard(s, sector_num, io_sectors, false);
-            break;
        case MIRROR_METHOD_DISCARD:
-            mirror_do_zero_or_discard(s, sector_num, io_sectors, true);
+            mirror_do_zero_or_discard(s, sector_num, io_sectors,
+                                      mirror_method == MIRROR_METHOD_DISCARD);
+            if (write_zeroes_ok) {
+                io_sectors_acct = 0;
+            } else {
+                io_sectors_acct = io_sectors;
+            }
            break;
        default:
            abort();
@@ -422,7 +445,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
        assert(io_sectors);
        sector_num += io_sectors;
        nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
-        delay_ns += ratelimit_calculate_delay(&s->limit, io_sectors);
+        if (s->common.speed) {
+            delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors_acct);
+        }
    }
    return delay_ns;
 }
@@ -506,25 +531,97 @@ static void mirror_exit(BlockJob *job, void *opaque)
    block_job_completed(&s->common, data->ret);
    g_free(data);
    bdrv_drained_end(src);
-    if (qemu_get_aio_context() == bdrv_get_aio_context(src)) {
-        aio_enable_external(iohandler_get_aio_context());
-    }
    bdrv_unref(src);
 }

+static void mirror_throttle(MirrorBlockJob *s)
+{
+    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+    if (now - s->last_pause_ns > SLICE_TIME) {
+        s->last_pause_ns = now;
+        block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
+    } else {
+        block_job_pause_point(&s->common);
+    }
+}
+
+static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
+{
+    int64_t sector_num, end;
+    BlockDriverState *base = s->base;
+    BlockDriverState *bs = blk_bs(s->common.blk);
+    BlockDriverState *target_bs = blk_bs(s->target);
+    int ret, n;
+
+    end = s->bdev_length / BDRV_SECTOR_SIZE;
+
+    if (base == NULL && !bdrv_has_zero_init(target_bs)) {
+        if (!bdrv_can_write_zeroes_with_unmap(target_bs)) {
+            bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end);
+            return 0;
+        }
+
+        for (sector_num = 0; sector_num < end; ) {
+            int nb_sectors = MIN(end - sector_num,
+                QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
+
+            mirror_throttle(s);
+
+            if (block_job_is_cancelled(&s->common)) {
+                return 0;
+            }
+
+            if (s->in_flight >= MAX_IN_FLIGHT) {
+                trace_mirror_yield(s, s->in_flight, s->buf_free_count, -1);
+                mirror_wait_for_io(s);
+                continue;
+            }
+
+            mirror_do_zero_or_discard(s, sector_num, nb_sectors, false);
+            sector_num += nb_sectors;
+        }
+
+        mirror_drain(s);
+    }
+
+    /* First part, loop on the sectors and initialize the dirty bitmap.  */
+    for (sector_num = 0; sector_num < end; ) {
+        /* Just to make sure we are not exceeding int limit. */
+        int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
+                             end - sector_num);
+
+        mirror_throttle(s);
+
+        if (block_job_is_cancelled(&s->common)) {
+            return 0;
+        }
+
+        ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
+        if (ret < 0) {
+            return ret;
+        }
+
+        assert(n > 0);
+        if (ret == 1) {
+            bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
+        }
+        sector_num += n;
+    }
+    return 0;
+}
+
 static void coroutine_fn mirror_run(void *opaque)
 {
    MirrorBlockJob *s = opaque;
    MirrorExitData *data;
    BlockDriverState *bs = blk_bs(s->common.blk);
    BlockDriverState *target_bs = blk_bs(s->target);
-    int64_t sector_num, end, length;
-    uint64_t last_pause_ns;
+    int64_t length;
    BlockDriverInfo bdi;
    char backing_filename[2]; /* we only need 2 characters because we are only
                                 checking for a NULL string */
    int ret = 0;
-    int n;
    int target_cluster_size = BDRV_SECTOR_SIZE;

    if (block_job_is_cancelled(&s->common)) {
@@ -566,7 +663,6 @@ static void coroutine_fn mirror_run(void *opaque)
    s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
    s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);

-    end = s->bdev_length / BDRV_SECTOR_SIZE;
    s->buf = qemu_try_blockalign(bs, s->buf_size);
    if (s->buf == NULL) {
        ret = -ENOMEM;
@@ -575,47 +671,18 @@ static void coroutine_fn mirror_run(void *opaque)

    mirror_free_init(s);

-    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    if (!s->is_none_mode) {
-        /* First part, loop on the sectors and initialize the dirty bitmap.  */
-        BlockDriverState *base = s->base;
-        bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(target_bs);
-
-        for (sector_num = 0; sector_num < end; ) {
-            /* Just to make sure we are not exceeding int limit. */
-            int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
-                                 end - sector_num);
-            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-
-            if (now - last_pause_ns > SLICE_TIME) {
-                last_pause_ns = now;
-                block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
-            } else {
-                block_job_pause_point(&s->common);
-            }
-
-            if (block_job_is_cancelled(&s->common)) {
-                goto immediate_exit;
-            }
-
-            ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
-
-            if (ret < 0) {
-                goto immediate_exit;
-            }
-
-            assert(n > 0);
-            if (ret == 1 || mark_all_dirty) {
-                bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
-            }
-            sector_num += n;
+        ret = mirror_dirty_init(s);
+        if (ret < 0 || block_job_is_cancelled(&s->common)) {
+            goto immediate_exit;
        }
    }

    bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
    for (;;) {
        uint64_t delay_ns = 0;
-        int64_t cnt;
+        int64_t cnt, delta;
        bool should_complete;

        if (s->ret < 0) {
@@ -638,9 +705,10 @@ static void coroutine_fn mirror_run(void *opaque)
         * We do so every SLICE_TIME nanoseconds, or when there is an error,
         * or when the source is clean, whichever comes first.
         */
-        if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
+        delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
+        if (delta < SLICE_TIME &&
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
+            if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
                mirror_wait_for_io(s);
@@ -708,7 +776,7 @@ static void coroutine_fn mirror_run(void *opaque)
            s->common.cancelled = false;
            break;
        }
-        last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }

 immediate_exit:
@@ -732,12 +800,6 @@ immediate_exit:
    /* Before we switch to target in mirror_exit, make sure data doesn't
     * change. */
    bdrv_drained_begin(bs);
-    if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) {
-        /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the
-         * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we
-         * need a block layer API change to achieve this. */
-        aio_disable_external(iohandler_get_aio_context());
-    }
    block_job_defer_to_main_loop(&s->common, mirror_exit, data);
 }

@@ -761,7 +823,8 @@ static void mirror_complete(BlockJob *job, Error **errp)
    target = blk_bs(s->target);

    if (!s->synced) {
-        error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
+        error_setg(errp, "The active block job '%s' cannot be completed",
+                   job->id);
        return;
    }

@@ -842,8 +905,8 @@ static const BlockJobDriver commit_active_job_driver = {
    .attached_aio_context   = mirror_attached_aio_context,
 };

-static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
-                             const char *replaces,
+static void mirror_start_job(const char *job_id, BlockDriverState *bs,
+                             BlockDriverState *target, const char *replaces,
                             int64_t speed, uint32_t granularity,
                             int64_t buf_size,
                             BlockMirrorBackingMode backing_mode,
@@ -872,7 +935,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
        buf_size = DEFAULT_MIRROR_BUF_SIZE;
    }

-    s = block_job_create(driver, bs, speed, cb, opaque, errp);
+    s = block_job_create(job_id, driver, bs, speed, cb, opaque, errp);
    if (!s) {
        return;
    }
@@ -900,13 +963,13 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,

    bdrv_op_block_all(target, s->common.blocker);

-    s->common.co = qemu_coroutine_create(mirror_run);
+    s->common.co = qemu_coroutine_create(mirror_run, s);
    trace_mirror_start(bs, s, s->common.co, opaque);
-    qemu_coroutine_enter(s->common.co, s);
+    qemu_coroutine_enter(s->common.co);
 }

-void mirror_start(BlockDriverState *bs, BlockDriverState *target,
-                  const char *replaces,
+void mirror_start(const char *job_id, BlockDriverState *bs,
+                  BlockDriverState *target, const char *replaces,
                  int64_t speed, uint32_t granularity, int64_t buf_size,
                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                  BlockdevOnError on_source_error,
@@ -924,14 +987,14 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
    }
    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
-    mirror_start_job(bs, target, replaces,
+    mirror_start_job(job_id, bs, target, replaces,
                     speed, granularity, buf_size, backing_mode,
                     on_source_error, on_target_error, unmap, cb, opaque, errp,
                     &mirror_job_driver, is_none_mode, base);
 }

-void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
-                         int64_t speed,
+void commit_active_start(const char *job_id, BlockDriverState *bs,
+                         BlockDriverState *base, int64_t speed,
                         BlockdevOnError on_error,
                         BlockCompletionFunc *cb,
                         void *opaque, Error **errp)
@@ -972,7 +1035,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
        }
    }

-    mirror_start_job(bs, base, NULL, speed, 0, 0, MIRROR_LEAVE_BACKING_CHAIN,
+    mirror_start_job(job_id, bs, base, NULL, speed, 0, 0,
+                     MIRROR_LEAVE_BACKING_CHAIN,
                     on_error, on_error, false, cb, opaque, &local_err,
                     &commit_active_job_driver, false, base);
    if (local_err) {
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -38,7 +38,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)

    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
        if (s->recv_coroutine[i]) {
-            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+            qemu_coroutine_enter(s->recv_coroutine[i]);
        }
    }
 }
@@ -99,7 +99,7 @@ static void nbd_reply_ready(void *opaque)
    }

    if (s->recv_coroutine[i]) {
-        qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+        qemu_coroutine_enter(s->recv_coroutine[i]);
        return;
    }

@@ -111,12 +111,12 @@ static void nbd_restart_write(void *opaque)
 {
    BlockDriverState *bs = opaque;

-    qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine, NULL);
+    qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
 }

 static int nbd_co_send_request(BlockDriverState *bs,
                               struct nbd_request *request,
-                               QEMUIOVector *qiov, int offset)
+                               QEMUIOVector *qiov)
 {
    NbdClientSession *s = nbd_get_client_session(bs);
    AioContext *aio_context;
@@ -149,8 +149,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
        qio_channel_set_cork(s->ioc, true);
        rc = nbd_send_request(s->ioc, request);
        if (rc >= 0) {
-            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
-                               offset, request->len, 0);
+            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
+                               false);
            if (ret != request->len) {
                rc = -EIO;
            }
@@ -167,8 +167,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
 }

 static void nbd_co_receive_reply(NbdClientSession *s,
-    struct nbd_request *request, struct nbd_reply *reply,
-    QEMUIOVector *qiov, int offset)
+                                 struct nbd_request *request,
+                                 struct nbd_reply *reply,
+                                 QEMUIOVector *qiov)
 {
    int ret;

@@ -181,8 +182,8 @@ static void nbd_co_receive_reply(NbdClientSession *s,
        reply->error = EIO;
    } else {
        if (qiov && reply->error == 0) {
-            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
-                               offset, request->len, 1);
+            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
+                               true);
            if (ret != request->len) {
                reply->error = EIO;
            }
@@ -217,36 +218,41 @@ static void nbd_coroutine_end(NbdClientSession *s,
    }
 }

-static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors, QEMUIOVector *qiov,
-                          int offset)
+int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
+                         uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_READ };
+    struct nbd_request request = {
+        .type = NBD_CMD_READ,
+        .from = offset,
+        .len = bytes,
+    };
    struct nbd_reply reply;
    ssize_t ret;

-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
+    assert(bytes <= NBD_MAX_BUFFER_SIZE);
+    assert(!flags);

    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, NULL, 0);
+    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
        reply.error = -ret;
    } else {
-        nbd_co_receive_reply(client, &request, &reply, qiov, offset);
+        nbd_co_receive_reply(client, &request, &reply, qiov);
    }
    nbd_coroutine_end(client, &request);
    return -reply.error;
-
 }

-static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
-                           int nb_sectors, QEMUIOVector *qiov,
-                           int offset, int flags)
+int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                          uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_WRITE };
+    struct nbd_request request = {
+        .type = NBD_CMD_WRITE,
+        .from = offset,
+        .len = bytes,
+    };
    struct nbd_reply reply;
    ssize_t ret;

@@ -255,59 +261,19 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
        request.type |= NBD_CMD_FLAG_FUA;
    }

-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
+    assert(bytes <= NBD_MAX_BUFFER_SIZE);

    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, qiov, offset);
+    ret = nbd_co_send_request(bs, &request, qiov);
    if (ret < 0) {
        reply.error = -ret;
    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
+        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
    nbd_coroutine_end(client, &request);
    return -reply.error;
 }

-/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
- * remain aligned to 4K. */
-#define NBD_MAX_SECTORS 2040
-
-int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov)
-{
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
-}
-
-int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov, int flags)
-{
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset,
-                              flags);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset, flags);
-}
-
 int nbd_client_co_flush(BlockDriverState *bs)
 {
    NbdClientSession *client = nbd_get_client_session(bs);
@@ -323,36 +289,37 @@ int nbd_client_co_flush(BlockDriverState *bs)
    request.len = 0;

    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, NULL, 0);
+    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
        reply.error = -ret;
    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
+        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
    nbd_coroutine_end(client, &request);
    return -reply.error;
 }

-int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors)
+int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
 {
    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_TRIM };
+    struct nbd_request request = {
+        .type = NBD_CMD_TRIM,
+        .from = offset,
+        .len = count,
+    };
    struct nbd_reply reply;
    ssize_t ret;

    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
        return 0;
    }
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;

    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, NULL, 0);
+    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
        reply.error = -ret;
    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
+        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
    nbd_coroutine_end(client, &request);
    return -reply.error;
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -20,7 +20,7 @@
 typedef struct NbdClientSession {
    QIOChannelSocket *sioc; /* The master data channel */
    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
-    uint32_t nbdflags;
+    uint16_t nbdflags;
    off_t size;

    CoMutex send_mutex;
@@ -44,13 +44,12 @@ int nbd_client_init(BlockDriverState *bs,
                    Error **errp);
 void nbd_client_close(BlockDriverState *bs);

-int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors);
+int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
 int nbd_client_co_flush(BlockDriverState *bs);
-int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov, int flags);
-int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov);
+int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                          uint64_t bytes, QEMUIOVector *qiov, int flags);
+int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
+                         uint64_t bytes, QEMUIOVector *qiov, int flags);

 void nbd_client_detach_aio_context(BlockDriverState *bs);
 void nbd_client_attach_aio_context(BlockDriverState *bs,
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -42,6 +42,9 @@

 typedef struct BDRVNBDState {
    NbdClientSession client;
+
+    /* For nbd_refresh_filename() */
+    char *path, *host, *port, *export, *tlscredsid;
 } BDRVNBDState;

 static int nbd_parse_uri(const char *filename, QDict *options)
@@ -188,13 +191,15 @@ out:
    g_free(file);
 }

-static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
-                                 Error **errp)
+static SocketAddress *nbd_config(BDRVNBDState *s, QemuOpts *opts, Error **errp)
 {
    SocketAddress *saddr;

-    if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
-        if (qdict_haskey(options, "path")) {
+    s->path = g_strdup(qemu_opt_get(opts, "path"));
+    s->host = g_strdup(qemu_opt_get(opts, "host"));
+
+    if (!s->path == !s->host) {
+        if (s->path) {
            error_setg(errp, "path and host may not be used at the same time.");
        } else {
            error_setg(errp, "one of path and host must be specified.");
@@ -204,32 +209,28 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,

    saddr = g_new0(SocketAddress, 1);

-    if (qdict_haskey(options, "path")) {
+    if (s->path) {
        UnixSocketAddress *q_unix;
        saddr->type = SOCKET_ADDRESS_KIND_UNIX;
        q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
-        q_unix->path = g_strdup(qdict_get_str(options, "path"));
-        qdict_del(options, "path");
+        q_unix->path = g_strdup(s->path);
    } else {
        InetSocketAddress *inet;
+
+        s->port = g_strdup(qemu_opt_get(opts, "port"));
+
        saddr->type = SOCKET_ADDRESS_KIND_INET;
        inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
-        inet->host = g_strdup(qdict_get_str(options, "host"));
-        if (!qdict_get_try_str(options, "port")) {
+        inet->host = g_strdup(s->host);
+        inet->port = g_strdup(s->port);
+        if (!inet->port) {
            inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
-        } else {
-            inet->port = g_strdup(qdict_get_str(options, "port"));
        }
-        qdict_del(options, "host");
-        qdict_del(options, "port");
    }

    s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;

-    *export = g_strdup(qdict_get_try_str(options, "export"));
-    if (*export) {
-        qdict_del(options, "export");
-    }
+    s->export = g_strdup(qemu_opt_get(opts, "export"));

    return saddr;
 }
@@ -292,28 +293,66 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
 }


+static QemuOptsList nbd_runtime_opts = {
+    .name = "nbd",
+    .head = QTAILQ_HEAD_INITIALIZER(nbd_runtime_opts.head),
+    .desc = {
+        {
+            .name = "host",
+            .type = QEMU_OPT_STRING,
+            .help = "TCP host to connect to",
+        },
+        {
+            .name = "port",
+            .type = QEMU_OPT_STRING,
+            .help = "TCP port to connect to",
+        },
+        {
+            .name = "path",
+            .type = QEMU_OPT_STRING,
+            .help = "Unix socket path to connect to",
+        },
+        {
+            .name = "export",
+            .type = QEMU_OPT_STRING,
+            .help = "Name of the NBD export to open",
+        },
+        {
+            .name = "tls-creds",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of the TLS credentials to use",
+        },
+    },
+};
+
 static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    BDRVNBDState *s = bs->opaque;
-    char *export = NULL;
+    QemuOpts *opts = NULL;
+    Error *local_err = NULL;
    QIOChannelSocket *sioc = NULL;
-    SocketAddress *saddr;
-    const char *tlscredsid;
+    SocketAddress *saddr = NULL;
    QCryptoTLSCreds *tlscreds = NULL;
    const char *hostname = NULL;
    int ret = -EINVAL;

+    opts = qemu_opts_create(&nbd_runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        goto error;
+    }
+
    /* Pop the config into our state object. Exit if invalid. */
-    saddr = nbd_config(s, options, &export, errp);
+    saddr = nbd_config(s, opts, errp);
    if (!saddr) {
        goto error;
    }

-    tlscredsid = g_strdup(qdict_get_try_str(options, "tls-creds"));
-    if (tlscredsid) {
-        qdict_del(options, "tls-creds");
-        tlscreds = nbd_get_tls_creds(tlscredsid, errp);
+    s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
+    if (s->tlscredsid) {
+        tlscreds = nbd_get_tls_creds(s->tlscredsid, errp);
        if (!tlscreds) {
            goto error;
        }
@@ -335,7 +374,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* NBD handshake */
-    ret = nbd_client_init(bs, sioc, export,
+    ret = nbd_client_init(bs, sioc, s->export,
                          tlscreds, hostname, errp);
 error:
    if (sioc) {
@@ -344,17 +383,18 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
    if (tlscreds) {
        object_unref(OBJECT(tlscreds));
    }
+    if (ret < 0) {
+        g_free(s->path);
+        g_free(s->host);
+        g_free(s->port);
+        g_free(s->export);
+        g_free(s->tlscredsid);
+    }
    qapi_free_SocketAddress(saddr);
-    g_free(export);
+    qemu_opts_del(opts);
    return ret;
 }

-static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov)
-{
-    return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov);
-}
-
 static int nbd_co_flush(BlockDriverState *bs)
 {
    return nbd_client_co_flush(bs);
@@ -362,19 +402,21 @@ static int nbd_co_flush(BlockDriverState *bs)

 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-    bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS;
-    bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS;
-}
-
-static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors)
-{
-    return nbd_client_co_discard(bs, sector_num, nb_sectors);
+    bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
+    bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
 }

 static void nbd_close(BlockDriverState *bs)
 {
+    BDRVNBDState *s = bs->opaque;
+
    nbd_client_close(bs);
+
+    g_free(s->path);
+    g_free(s->host);
+    g_free(s->port);
+    g_free(s->export);
+    g_free(s->tlscredsid);
 }

 static int64_t nbd_getlength(BlockDriverState *bs)
@@ -397,48 +439,45 @@ static void nbd_attach_aio_context(BlockDriverState *bs,

 static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
 {
+    BDRVNBDState *s = bs->opaque;
    QDict *opts = qdict_new();
-    const char *path   = qdict_get_try_str(options, "path");
-    const char *host   = qdict_get_try_str(options, "host");
-    const char *port   = qdict_get_try_str(options, "port");
-    const char *export = qdict_get_try_str(options, "export");
-    const char *tlscreds = qdict_get_try_str(options, "tls-creds");

    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));

-    if (path && export) {
+    if (s->path && s->export) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix:///%s?socket=%s", export, path);
-    } else if (path && !export) {
+                 "nbd+unix:///%s?socket=%s", s->export, s->path);
+    } else if (s->path && !s->export) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix://?socket=%s", path);
-    } else if (!path && export && port) {
+                 "nbd+unix://?socket=%s", s->path);
+    } else if (!s->path && s->export && s->port) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s:%s/%s", host, port, export);
-    } else if (!path && export && !port) {
+                 "nbd://%s:%s/%s", s->host, s->port, s->export);
+    } else if (!s->path && s->export && !s->port) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s/%s", host, export);
-    } else if (!path && !export && port) {
+                 "nbd://%s/%s", s->host, s->export);
+    } else if (!s->path && !s->export && s->port) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s:%s", host, port);
-    } else if (!path && !export && !port) {
+                 "nbd://%s:%s", s->host, s->port);
+    } else if (!s->path && !s->export && !s->port) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s", host);
+                 "nbd://%s", s->host);
    }

-    if (path) {
-        qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(path)));
-    } else if (port) {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
-        qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(port)));
+    if (s->path) {
+        qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(s->path)));
+    } else if (s->port) {
+        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
+        qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(s->port)));
    } else {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
+        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
    }
-    if (export) {
-        qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export)));
+    if (s->export) {
+        qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(s->export)));
    }
-    if (tlscreds) {
-        qdict_put_obj(opts, "tls-creds", QOBJECT(qstring_from_str(tlscreds)));
+    if (s->tlscredsid) {
+        qdict_put_obj(opts, "tls-creds",
+                      QOBJECT(qstring_from_str(s->tlscredsid)));
    }

    bs->full_open_options = opts;
@@ -450,11 +489,11 @@ static BlockDriver bdrv_nbd = {
    .instance_size              = sizeof(BDRVNBDState),
    .bdrv_parse_filename        = nbd_parse_filename,
    .bdrv_file_open             = nbd_open,
-    .bdrv_co_readv              = nbd_co_readv,
-    .bdrv_co_writev_flags       = nbd_client_co_writev,
+    .bdrv_co_preadv             = nbd_client_co_preadv,
+    .bdrv_co_pwritev            = nbd_client_co_pwritev,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
-    .bdrv_co_discard            = nbd_co_discard,
+    .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
    .bdrv_refresh_limits        = nbd_refresh_limits,
    .bdrv_getlength             = nbd_getlength,
    .bdrv_detach_aio_context    = nbd_detach_aio_context,
@@ -468,11 +507,11 @@ static BlockDriver bdrv_nbd_tcp = {
    .instance_size              = sizeof(BDRVNBDState),
    .bdrv_parse_filename        = nbd_parse_filename,
    .bdrv_file_open             = nbd_open,
-    .bdrv_co_readv              = nbd_co_readv,
-    .bdrv_co_writev_flags       = nbd_client_co_writev,
+    .bdrv_co_preadv             = nbd_client_co_preadv,
+    .bdrv_co_pwritev            = nbd_client_co_pwritev,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
-    .bdrv_co_discard            = nbd_co_discard,
+    .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
    .bdrv_refresh_limits        = nbd_refresh_limits,
    .bdrv_getlength             = nbd_getlength,
    .bdrv_detach_aio_context    = nbd_detach_aio_context,
@@ -486,11 +525,11 @@ static BlockDriver bdrv_nbd_unix = {
    .instance_size              = sizeof(BDRVNBDState),
    .bdrv_parse_filename        = nbd_parse_filename,
    .bdrv_file_open             = nbd_open,
-    .bdrv_co_readv              = nbd_co_readv,
-    .bdrv_co_writev_flags       = nbd_client_co_writev,
+    .bdrv_co_preadv             = nbd_client_co_preadv,
+    .bdrv_co_pwritev            = nbd_client_co_pwritev,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
-    .bdrv_co_discard            = nbd_co_discard,
+    .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
    .bdrv_refresh_limits        = nbd_refresh_limits,
    .bdrv_getlength             = nbd_getlength,
    .bdrv_detach_aio_context    = nbd_detach_aio_context,
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -104,7 +104,7 @@ static void nfs_co_generic_bh_cb(void *opaque)
    NFSRPC *task = opaque;
    task->complete = 1;
    qemu_bh_delete(task->bh);
-    qemu_coroutine_enter(task->co, NULL);
+    qemu_coroutine_enter(task->co);
 }

 static void
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -43,6 +43,7 @@
 #define HEADER_MAGIC2 "WithouFreSpacExt"
 #define HEADER_VERSION 2
 #define HEADER_INUSE_MAGIC  (0x746F6E59)
+#define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32)

 #define DEFAULT_CLUSTER_SIZE 1048576        /* 1 MiB */

@@ -210,7 +211,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
        int ret;
        space += s->prealloc_size;
        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
-            ret = bdrv_pwrite_zeroes(bs->file->bs,
+            ret = bdrv_pwrite_zeroes(bs->file,
                                     s->data_end << BDRV_SECTOR_BITS,
                                     space << BDRV_SECTOR_BITS, 0);
        } else {
@@ -250,7 +251,7 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
        if (off + to_write > s->header_size) {
            to_write = s->header_size - off;
        }
-        ret = bdrv_pwrite(bs->file->bs, off, (uint8_t *)s->header + off,
+        ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off,
                          to_write);
        if (ret < 0) {
            qemu_co_mutex_unlock(&s->lock);
@@ -311,7 +312,7 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
        qemu_iovec_reset(&hd_qiov);
        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);

-        ret = bdrv_co_writev(bs->file->bs, position, n, &hd_qiov);
+        ret = bdrv_co_writev(bs->file, position, n, &hd_qiov);
        if (ret < 0) {
            break;
        }
@@ -351,7 +352,7 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
            qemu_iovec_reset(&hd_qiov);
            qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);

-            ret = bdrv_co_readv(bs->file->bs, position, n, &hd_qiov);
+            ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
            if (ret < 0) {
                break;
            }
@@ -432,7 +433,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
    }

    if (flush_bat) {
-        ret = bdrv_pwrite_sync(bs->file->bs, 0, s->header, s->header_size);
+        ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
        if (ret < 0) {
            res->check_errors++;
            return ret;
@@ -475,6 +476,10 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
                          BDRV_SECTOR_SIZE);
    cl_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
                          DEFAULT_CLUSTER_SIZE), BDRV_SECTOR_SIZE);
+    if (total_size >= MAX_PARALLELS_IMAGE_FACTOR * cl_size) {
+        error_propagate(errp, local_err);
+        return -E2BIG;
+    }

    ret = bdrv_create_file(filename, opts, &local_err);
    if (ret < 0) {
@@ -563,7 +568,7 @@ static int parallels_update_header(BlockDriverState *bs)
    if (size > s->header_size) {
        size = s->header_size;
    }
-    return bdrv_pwrite_sync(bs->file->bs, 0, s->header, size);
+    return bdrv_pwrite_sync(bs->file, 0, s->header, size);
 }

 static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
@@ -576,7 +581,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    Error *local_err = NULL;
    char *buf;

-    ret = bdrv_pread(bs->file->bs, 0, &ph, sizeof(ph));
+    ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
    if (ret < 0) {
        goto fail;
    }
@@ -631,7 +636,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
        s->header_size = size;
    }

-    ret = bdrv_pread(bs->file->bs, 0, s->header, s->header_size);
+    ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
    if (ret < 0) {
        goto fail;
    }
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -690,16 +690,15 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
 void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
                                   ImageInfoSpecific *info_spec)
 {
-    QmpOutputVisitor *ov = qmp_output_visitor_new();
    QObject *obj, *data;
+    Visitor *v = qmp_output_visitor_new(&obj);

-    visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), NULL, &info_spec,
-                                 &error_abort);
-    obj = qmp_output_get_qobject(ov);
+    visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
+    visit_complete(v, &obj);
    assert(qobject_type(obj) == QTYPE_QDICT);
    data = qdict_get(qobject_to_qdict(obj), "data");
    dump_qobject(func_fprintf, f, 1, data);
-    qmp_output_visitor_cleanup(ov);
+    visit_free(v);
 }

 void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -105,7 +105,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;
    QCowHeader header;

-    ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
+    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
    if (ret < 0) {
        goto fail;
    }
@@ -174,7 +174,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }

-        bs->encrypted = 1;
+        bs->encrypted = true;
    }
    s->cluster_bits = header.cluster_bits;
    s->cluster_size = 1 << s->cluster_bits;
@@ -208,7 +208,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
+    ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
               s->l1_size * sizeof(uint64_t));
    if (ret < 0) {
        goto fail;
@@ -239,7 +239,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
            ret = -EINVAL;
            goto fail;
        }
-        ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
+        ret = bdrv_pread(bs->file, header.backing_file_offset,
                   bs->backing_file, len);
        if (ret < 0) {
            goto fail;
@@ -390,7 +390,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
        /* update the L1 entry */
        s->l1_table[l1_index] = l2_offset;
        tmp = cpu_to_be64(l2_offset);
-        if (bdrv_pwrite_sync(bs->file->bs,
+        if (bdrv_pwrite_sync(bs->file,
                s->l1_table_offset + l1_index * sizeof(tmp),
                &tmp, sizeof(tmp)) < 0)
            return 0;
@@ -420,11 +420,11 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
    l2_table = s->l2_cache + (min_index << s->l2_bits);
    if (new_l2_table) {
        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-        if (bdrv_pwrite_sync(bs->file->bs, l2_offset, l2_table,
+        if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
                s->l2_size * sizeof(uint64_t)) < 0)
            return 0;
    } else {
-        if (bdrv_pread(bs->file->bs, l2_offset, l2_table,
+        if (bdrv_pread(bs->file, l2_offset, l2_table,
                       s->l2_size * sizeof(uint64_t)) !=
            s->l2_size * sizeof(uint64_t))
            return 0;
@@ -450,7 +450,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
            cluster_offset = (cluster_offset + s->cluster_size - 1) &
                ~(s->cluster_size - 1);
            /* write the cluster content */
-            if (bdrv_pwrite(bs->file->bs, cluster_offset, s->cluster_cache,
+            if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache,
                            s->cluster_size) !=
                s->cluster_size)
                return -1;
@@ -480,7 +480,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                                errno = EIO;
                                return -1;
                            }
-                            if (bdrv_pwrite(bs->file->bs,
+                            if (bdrv_pwrite(bs->file,
                                            cluster_offset + i * 512,
                                            s->cluster_data, 512) != 512)
                                return -1;
@@ -495,7 +495,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
        /* update L2 table */
        tmp = cpu_to_be64(cluster_offset);
        l2_table[l2_index] = tmp;
-        if (bdrv_pwrite_sync(bs->file->bs, l2_offset + l2_index * sizeof(tmp),
+        if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
                &tmp, sizeof(tmp)) < 0)
            return 0;
    }
@@ -565,7 +565,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
    if (s->cluster_cache_offset != coffset) {
        csize = cluster_offset >> (63 - s->cluster_bits);
        csize &= (s->cluster_size - 1);
-        ret = bdrv_pread(bs->file->bs, coffset, s->cluster_data, csize);
+        ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
        if (ret != csize)
            return -1;
        if (decompress_buffer(s->cluster_cache, s->cluster_size,
@@ -619,8 +619,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
                hd_iov.iov_len = n * 512;
                qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
                qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_readv(bs->backing->bs, sector_num,
-                                    n, &hd_qiov);
+                ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov);
                qemu_co_mutex_lock(&s->lock);
                if (ret < 0) {
                    goto fail;
@@ -644,7 +643,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
            hd_iov.iov_len = n * 512;
            qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
            qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_readv(bs->file->bs,
+            ret = bdrv_co_readv(bs->file,
                                (cluster_offset >> 9) + index_in_cluster,
                                n, &hd_qiov);
            qemu_co_mutex_lock(&s->lock);
@@ -746,7 +745,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
        hd_iov.iov_len = n * 512;
        qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
        qemu_co_mutex_unlock(&s->lock);
-        ret = bdrv_co_writev(bs->file->bs,
+        ret = bdrv_co_writev(bs->file,
                             (cluster_offset >> 9) + index_in_cluster,
                             n, &hd_qiov);
        qemu_co_mutex_lock(&s->lock);
@@ -900,7 +899,7 @@ static int qcow_make_empty(BlockDriverState *bs)
    int ret;

    memset(s->l1_table, 0, l1_length);
-    if (bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, s->l1_table,
+    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
            l1_length) < 0)
        return -1;
    ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
@@ -916,32 +915,32 @@ static int qcow_make_empty(BlockDriverState *bs)

 /* XXX: put compressed sectors first, then all the cluster aligned
   tables to avoid losing bytes in alignment */
-static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                                 const uint8_t *buf, int nb_sectors)
+static coroutine_fn int
+qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+                           uint64_t bytes, QEMUIOVector *qiov)
 {
    BDRVQcowState *s = bs->opaque;
+    QEMUIOVector hd_qiov;
+    struct iovec iov;
    z_stream strm;
    int ret, out_len;
-    uint8_t *out_buf;
+    uint8_t *buf, *out_buf;
    uint64_t cluster_offset;

-    if (nb_sectors != s->cluster_sectors) {
-        ret = -EINVAL;
-
-        /* Zero-pad last write if image size is not cluster aligned */
-        if (sector_num + nb_sectors == bs->total_sectors &&
-            nb_sectors < s->cluster_sectors) {
-            uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
-            memset(pad_buf, 0, s->cluster_size);
-            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
-            ret = qcow_write_compressed(bs, sector_num,
-                                        pad_buf, s->cluster_sectors);
-            qemu_vfree(pad_buf);
+    buf = qemu_blockalign(bs, s->cluster_size);
+    if (bytes != s->cluster_size) {
+        if (bytes > s->cluster_size ||
+            offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
+        {
+            qemu_vfree(buf);
+            return -EINVAL;
        }
-        return ret;
+        /* Zero-pad last write if image size is not cluster aligned */
+        memset(buf + bytes, 0, s->cluster_size - bytes);
    }
+    qemu_iovec_to_buf(qiov, 0, buf, qiov->size);

-    out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+    out_buf = g_malloc(s->cluster_size);

    /* best compression, small window, no zlib header */
    memset(&strm, 0, sizeof(strm));
@@ -970,27 +969,35 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,

    if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
        /* could not compress: write normal cluster */
-        ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
-        if (ret < 0) {
-            goto fail;
-        }
-    } else {
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
-                                            out_len, 0, 0);
-        if (cluster_offset == 0) {
-            ret = -EIO;
-            goto fail;
-        }
-
-        cluster_offset &= s->cluster_offset_mask;
-        ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
+        ret = qcow_co_writev(bs, offset >> BDRV_SECTOR_BITS,
+                             bytes >> BDRV_SECTOR_BITS, qiov);
        if (ret < 0) {
            goto fail;
        }
+        goto success;
    }
+    qemu_co_mutex_lock(&s->lock);
+    cluster_offset = get_cluster_offset(bs, offset, 2, out_len, 0, 0);
+    qemu_co_mutex_unlock(&s->lock);
+    if (cluster_offset == 0) {
+        ret = -EIO;
+        goto fail;
+    }
+    cluster_offset &= s->cluster_offset_mask;

+    iov = (struct iovec) {
+        .iov_base   = out_buf,
+        .iov_len    = out_len,
+    };
+    qemu_iovec_init_external(&hd_qiov, &iov, 1);
+    ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
+    if (ret < 0) {
+        goto fail;
+    }
+success:
    ret = 0;
 fail:
+    qemu_vfree(buf);
    g_free(out_buf);
    return ret;
 }
@@ -1043,7 +1050,7 @@ static BlockDriver bdrv_qcow = {

    .bdrv_set_key           = qcow_set_key,
    .bdrv_make_empty        = qcow_make_empty,
-    .bdrv_write_compressed  = qcow_write_compressed,
+    .bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed,
    .bdrv_get_info          = qcow_get_info,

    .create_opts            = &qcow_create_opts,
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -210,7 +210,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
        BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
    }

-    ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset,
+    ret = bdrv_pwrite(bs->file, c->entries[i].offset,
                      qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
    if (ret < 0) {
        return ret;
@@ -357,7 +357,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
            BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
        }

-        ret = bdrv_pread(bs->file->bs, offset,
+        ret = bdrv_pread(bs->file, offset,
                         qcow2_cache_get_table_addr(bs, c, i),
                         s->cluster_size);
        if (ret < 0) {
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -65,7 +65,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
        }
    }

-    if (new_l1_size > INT_MAX / sizeof(uint64_t)) {
+    QEMU_BUILD_BUG_ON(QCOW_MAX_L1_SIZE > INT_MAX);
+    if (new_l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
        return -EFBIG;
    }

@@ -108,7 +109,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
    for(i = 0; i < s->l1_size; i++)
        new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
-    ret = bdrv_pwrite_sync(bs->file->bs, new_l1_table_offset,
+    ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset,
                           new_l1_table, new_l1_size2);
    if (ret < 0)
        goto fail;
@@ -117,9 +118,9 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,

    /* set new table */
    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
-    cpu_to_be32w((uint32_t*)data, new_l1_size);
+    stl_be_p(data, new_l1_size);
    stq_be_p(data + 4, new_l1_table_offset);
-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_size),
+    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size),
                           data, sizeof(data));
    if (ret < 0) {
        goto fail;
@@ -185,7 +186,7 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
    }

    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
-    ret = bdrv_pwrite_sync(bs->file->bs,
+    ret = bdrv_pwrite_sync(bs->file,
                           s->l1_table_offset + 8 * l1_start_index,
                           buf, sizeof(buf));
    if (ret < 0) {
@@ -446,7 +447,7 @@ static int coroutine_fn do_perform_cow(BlockDriverState *bs,
    }

    BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
-    ret = bdrv_co_pwritev(bs->file->bs, cluster_offset + offset_in_cluster,
+    ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster,
                          bytes, &qiov, 0);
    if (ret < 0) {
        goto out;
@@ -482,8 +483,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    unsigned int l2_index;
    uint64_t l1_index, l2_offset, *l2_table;
    int l1_bits, c;
-    unsigned int offset_in_cluster, nb_clusters;
-    uint64_t bytes_available, bytes_needed;
+    unsigned int offset_in_cluster;
+    uint64_t bytes_available, bytes_needed, nb_clusters;
    int ret;

    offset_in_cluster = offset_into_cluster(s, offset);
@@ -499,7 +500,6 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    if (bytes_needed > bytes_available) {
        bytes_needed = bytes_available;
    }
-    assert(bytes_needed <= INT_MAX);

    *cluster_offset = 0;

@@ -536,8 +536,11 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
    *cluster_offset = be64_to_cpu(l2_table[l2_index]);

-    /* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */
    nb_clusters = size_to_clusters(s, bytes_needed);
+    /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned
+     * integers; the minimum cluster size is 512, so this assertion is always
+     * true */
+    assert(nb_clusters <= INT_MAX);

    ret = qcow2_get_cluster_type(*cluster_offset);
    switch (ret) {
@@ -584,13 +587,17 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,

    qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);

-    bytes_available = (c * s->cluster_size);
+    bytes_available = (int64_t)c * s->cluster_size;

 out:
    if (bytes_available > bytes_needed) {
        bytes_available = bytes_needed;
    }

+    /* bytes_available <= bytes_needed <= *bytes + offset_in_cluster;
+     * subtracting offset_in_cluster will therefore definitely yield something
+     * not exceeding UINT_MAX */
+    assert(bytes_available - offset_in_cluster <= UINT_MAX);
    *bytes = bytes_available - offset_in_cluster;

    return ret;
@@ -1408,7 +1415,7 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
        sector_offset = coffset & 511;
        csize = nb_csectors * 512 - sector_offset;
        BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
-        ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data,
+        ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data,
                        nb_csectors);
        if (ret < 0) {
            return ret;
@@ -1677,7 +1684,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                    (void **)&l2_table);
        } else {
            /* load inactive L2 tables from disk */
-            ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+            ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
                            (void *)l2_table, s->cluster_sectors);
        }
        if (ret < 0) {
@@ -1752,7 +1759,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                goto fail;
            }

-            ret = bdrv_pwrite_zeroes(bs->file->bs, offset, s->cluster_size, 0);
+            ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
            if (ret < 0) {
                if (!preallocated) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
@@ -1784,7 +1791,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                    goto fail;
                }

-                ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+                ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
                                 (void *)l2_table, s->cluster_sectors);
                if (ret < 0) {
                    goto fail;
@@ -1859,7 +1866,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,

        l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);

-        ret = bdrv_read(bs->file->bs,
+        ret = bdrv_read(bs->file,
                        s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
                        (void *)l1_table, l1_sectors);
        if (ret < 0) {
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -104,7 +104,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
            goto fail;
        }
        BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
-        ret = bdrv_pread(bs->file->bs, s->refcount_table_offset,
+        ret = bdrv_pread(bs->file, s->refcount_table_offset,
                         s->refcount_table, refcount_table_size2);
        if (ret < 0) {
            goto fail;
@@ -431,7 +431,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
    if (refcount_table_index < s->refcount_table_size) {
        uint64_t data64 = cpu_to_be64(new_block);
        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
-        ret = bdrv_pwrite_sync(bs->file->bs,
+        ret = bdrv_pwrite_sync(bs->file,
            s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
            &data64, sizeof(data64));
        if (ret < 0) {
@@ -533,7 +533,7 @@ static int alloc_refcount_block(BlockDriverState *bs,

    /* Write refcount blocks to disk */
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
-    ret = bdrv_pwrite_sync(bs->file->bs, meta_offset, new_blocks,
+    ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
        blocks_clusters * s->cluster_size);
    g_free(new_blocks);
    new_blocks = NULL;
@@ -547,7 +547,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
    }

    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
-    ret = bdrv_pwrite_sync(bs->file->bs, table_offset, new_table,
+    ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
        table_size * sizeof(uint64_t));
    if (ret < 0) {
        goto fail_table;
@@ -562,10 +562,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
        uint64_t d64;
        uint32_t d32;
    } data;
-    cpu_to_be64w(&data.d64, table_offset);
-    cpu_to_be32w(&data.d32, table_clusters);
+    data.d64 = cpu_to_be64(table_offset);
+    data.d32 = cpu_to_be32(table_clusters);
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
-    ret = bdrv_pwrite_sync(bs->file->bs,
+    ret = bdrv_pwrite_sync(bs->file,
                           offsetof(QCowHeader, refcount_table_offset),
                           &data, sizeof(data));
    if (ret < 0) {
@@ -615,9 +615,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)

        /* Discard is optional, ignore the return value */
        if (ret >= 0) {
-            bdrv_discard(bs->file->bs,
-                         d->offset >> BDRV_SECTOR_BITS,
-                         d->bytes >> BDRV_SECTOR_BITS);
+            bdrv_pdiscard(bs->file->bs, d->offset, d->bytes);
        }

        g_free(d);
@@ -1070,7 +1068,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
        }
        l1_allocated = true;

-        ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
+        ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
        if (ret < 0) {
            goto fail;
        }
@@ -1223,7 +1221,7 @@ fail:
            cpu_to_be64s(&l1_table[i]);
        }

-        ret = bdrv_pwrite_sync(bs->file->bs, l1_table_offset,
+        ret = bdrv_pwrite_sync(bs->file, l1_table_offset,
                               l1_table, l1_size2);

        for (i = 0; i < l1_size; i++) {
@@ -1382,7 +1380,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
    l2_size = s->l2_size * sizeof(uint64_t);
    l2_table = g_malloc(l2_size);

-    ret = bdrv_pread(bs->file->bs, l2_offset, l2_table, l2_size);
+    ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size);
    if (ret < 0) {
        fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
        res->check_errors++;
@@ -1514,7 +1512,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
            res->check_errors++;
            goto fail;
        }
-        ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
+        ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
        if (ret < 0) {
            fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
            res->check_errors++;
@@ -1612,7 +1610,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
            }
        }

-        ret = bdrv_pread(bs->file->bs, l2_offset, l2_table,
+        ret = bdrv_pread(bs->file, l2_offset, l2_table,
                         s->l2_size * sizeof(uint64_t));
        if (ret < 0) {
            fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
@@ -1664,7 +1662,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
                goto fail;
            }

-            ret = bdrv_pwrite(bs->file->bs, l2_offset, l2_table,
+            ret = bdrv_pwrite(bs->file, l2_offset, l2_table,
                              s->cluster_size);
            if (ret < 0) {
                fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
@@ -2098,7 +2096,7 @@ write_refblocks:
        on_disk_refblock = (void *)((char *) *refcount_table +
                                    refblock_index * s->cluster_size);

-        ret = bdrv_write(bs->file->bs, refblock_offset / BDRV_SECTOR_SIZE,
+        ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE,
                         on_disk_refblock, s->cluster_sectors);
        if (ret < 0) {
            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
@@ -2147,7 +2145,7 @@ write_refblocks:
    }

    assert(reftable_size < INT_MAX / sizeof(uint64_t));
-    ret = bdrv_pwrite(bs->file->bs, reftable_offset, on_disk_reftable,
+    ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
                      reftable_size * sizeof(uint64_t));
    if (ret < 0) {
        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
@@ -2155,12 +2153,11 @@ write_refblocks:
    }

    /* Enter new reftable into the image header */
-    cpu_to_be64w(&reftable_offset_and_clusters.reftable_offset,
-                 reftable_offset);
-    cpu_to_be32w(&reftable_offset_and_clusters.reftable_clusters,
-                 size_to_clusters(s, reftable_size * sizeof(uint64_t)));
-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader,
-                                                  refcount_table_offset),
+    reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
+    reftable_offset_and_clusters.reftable_clusters =
+        cpu_to_be32(size_to_clusters(s, reftable_size * sizeof(uint64_t)));
+    ret = bdrv_pwrite_sync(bs->file,
+                           offsetof(QCowHeader, refcount_table_offset),
                           &reftable_offset_and_clusters,
                           sizeof(reftable_offset_and_clusters));
    if (ret < 0) {
@@ -2407,7 +2404,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
                return -ENOMEM;
            }

-            ret = bdrv_pread(bs->file->bs, l1_ofs, l1, l1_sz2);
+            ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
            if (ret < 0) {
                g_free(l1);
                return ret;
@@ -2560,7 +2557,7 @@ static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
            return ret;
        }

-        ret = bdrv_pwrite(bs->file->bs, offset, refblock, s->cluster_size);
+        ret = bdrv_pwrite(bs->file, offset, refblock, s->cluster_size);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Failed to write refblock");
            return ret;
@@ -2830,7 +2827,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
        cpu_to_be64s(&new_reftable[i]);
    }

-    ret = bdrv_pwrite(bs->file->bs, new_reftable_offset, new_reftable,
+    ret = bdrv_pwrite(bs->file, new_reftable_offset, new_reftable,
                      new_reftable_size * sizeof(uint64_t));

    for (i = 0; i < new_reftable_size; i++) {
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -67,7 +67,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
    for(i = 0; i < s->nb_snapshots; i++) {
        /* Read statically sized part of the snapshot header */
        offset = align_offset(offset, 8);
-        ret = bdrv_pread(bs->file->bs, offset, &h, sizeof(h));
+        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
        if (ret < 0) {
            goto fail;
        }
@@ -86,7 +86,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
        name_size = be16_to_cpu(h.name_size);

        /* Read extra data */
-        ret = bdrv_pread(bs->file->bs, offset, &extra,
+        ret = bdrv_pread(bs->file, offset, &extra,
                         MIN(sizeof(extra), extra_data_size));
        if (ret < 0) {
            goto fail;
@@ -105,7 +105,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)

        /* Read snapshot ID */
        sn->id_str = g_malloc(id_str_size + 1);
-        ret = bdrv_pread(bs->file->bs, offset, sn->id_str, id_str_size);
+        ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
        if (ret < 0) {
            goto fail;
        }
@@ -114,7 +114,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)

        /* Read snapshot name */
        sn->name = g_malloc(name_size + 1);
-        ret = bdrv_pread(bs->file->bs, offset, sn->name, name_size);
+        ret = bdrv_pread(bs->file, offset, sn->name, name_size);
        if (ret < 0) {
            goto fail;
        }
@@ -217,25 +217,25 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        h.name_size = cpu_to_be16(name_size);
        offset = align_offset(offset, 8);

-        ret = bdrv_pwrite(bs->file->bs, offset, &h, sizeof(h));
+        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
        if (ret < 0) {
            goto fail;
        }
        offset += sizeof(h);

-        ret = bdrv_pwrite(bs->file->bs, offset, &extra, sizeof(extra));
+        ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
        if (ret < 0) {
            goto fail;
        }
        offset += sizeof(extra);

-        ret = bdrv_pwrite(bs->file->bs, offset, sn->id_str, id_str_size);
+        ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
        if (ret < 0) {
            goto fail;
        }
        offset += id_str_size;

-        ret = bdrv_pwrite(bs->file->bs, offset, sn->name, name_size);
+        ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
        if (ret < 0) {
            goto fail;
        }
@@ -257,7 +257,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
    header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
    header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);

-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, nb_snapshots),
+    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
                           &header_data, sizeof(header_data));
    if (ret < 0) {
        goto fail;
@@ -399,7 +399,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        goto fail;
    }

-    ret = bdrv_pwrite(bs->file->bs, sn->l1_table_offset, l1_table,
+    ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
                      s->l1_size * sizeof(uint64_t));
    if (ret < 0) {
        goto fail;
@@ -512,7 +512,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
        goto fail;
    }

-    ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
+    ret = bdrv_pread(bs->file, sn->l1_table_offset,
                     sn_l1_table, sn_l1_bytes);
    if (ret < 0) {
        goto fail;
@@ -530,7 +530,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
        goto fail;
    }

-    ret = bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, sn_l1_table,
+    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
                           cur_l1_bytes);
    if (ret < 0) {
        goto fail;
@@ -716,7 +716,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
        return -ENOMEM;
    }

-    ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
+    ret = bdrv_pread(bs->file, sn->l1_table_offset,
                     new_l1_table, new_l1_bytes);
    if (ret < 0) {
        error_setg(errp, "Failed to read l1 table for snapshot");
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -107,7 +107,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
        printf("attempting to read extended header in offset %lu\n", offset);
 #endif

-        ret = bdrv_pread(bs->file->bs, offset, &ext, sizeof(ext));
+        ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
        if (ret < 0) {
            error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
                             "pread fail from offset %" PRIu64, offset);
@@ -135,7 +135,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                           sizeof(bs->backing_format));
                return 2;
            }
-            ret = bdrv_pread(bs->file->bs, offset, bs->backing_format, ext.len);
+            ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
            if (ret < 0) {
                error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
                                 "Could not read format name");
@@ -151,7 +151,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
        case QCOW2_EXT_MAGIC_FEATURE_TABLE:
            if (p_feature_table != NULL) {
                void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
-                ret = bdrv_pread(bs->file->bs, offset , feature_table, ext.len);
+                ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
                if (ret < 0) {
                    error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
                                     "Could not read table");
@@ -172,7 +172,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                uext->len = ext.len;
                QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);

-                ret = bdrv_pread(bs->file->bs, offset , uext->data, uext->len);
+                ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
                if (ret < 0) {
                    error_setg_errno(errp, -ret, "ERROR: unknown extension: "
                                     "Could not read data");
@@ -249,7 +249,7 @@ int qcow2_mark_dirty(BlockDriverState *bs)
    }

    val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
-    ret = bdrv_pwrite(bs->file->bs, offsetof(QCowHeader, incompatible_features),
+    ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
                      &val, sizeof(val));
    if (ret < 0) {
        return ret;
@@ -817,7 +817,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    uint64_t ext_end;
    uint64_t l1_vm_state_index;

-    ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
+    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not read qcow2 header");
        goto fail;
@@ -892,7 +892,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    if (header.header_length > sizeof(header)) {
        s->unknown_header_fields_size = header.header_length - sizeof(header);
        s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
-        ret = bdrv_pread(bs->file->bs, sizeof(header), s->unknown_header_fields,
+        ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
                         s->unknown_header_fields_size);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
@@ -980,10 +980,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }

-        bs->encrypted = 1;
-
-        /* Encryption works on a sector granularity */
-        bs->request_alignment = BDRV_SECTOR_SIZE;
+        bs->encrypted = true;
    }

    s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
@@ -1069,7 +1066,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
            ret = -ENOMEM;
            goto fail;
        }
-        ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
+        ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
                         s->l1_size * sizeof(uint64_t));
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not read L1 table");
@@ -1125,7 +1122,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
            ret = -EINVAL;
            goto fail;
        }
-        ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
+        ret = bdrv_pread(bs->file, header.backing_file_offset,
                         bs->backing_file, len);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not read backing file name");
@@ -1202,6 +1199,10 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BDRVQcow2State *s = bs->opaque;

+    if (bs->encrypted) {
+        /* Encryption works on a sector granularity */
+        bs->bl.request_alignment = BDRV_SECTOR_SIZE;
+    }
    bs->bl.pwrite_zeroes_alignment = s->cluster_size;
 }

@@ -1442,7 +1443,7 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,

                    BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                    qemu_co_mutex_unlock(&s->lock);
-                    ret = bdrv_co_preadv(bs->backing->bs, offset, n1,
+                    ret = bdrv_co_preadv(bs->backing, offset, n1,
                                         &local_qiov, 0);
                    qemu_co_mutex_lock(&s->lock);

@@ -1505,7 +1506,7 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,

            BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
            qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_preadv(bs->file->bs,
+            ret = bdrv_co_preadv(bs->file,
                                 cluster_offset + offset_in_cluster,
                                 cur_bytes, &hd_qiov, 0);
            qemu_co_mutex_lock(&s->lock);
@@ -1636,7 +1637,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
        trace_qcow2_writev_data(qemu_coroutine_self(),
                                cluster_offset + offset_in_cluster);
-        ret = bdrv_co_pwritev(bs->file->bs,
+        ret = bdrv_co_pwritev(bs->file,
                              cluster_offset + offset_in_cluster,
                              cur_bytes, &hd_qiov, 0);
        qemu_co_mutex_lock(&s->lock);
@@ -1975,7 +1976,7 @@ int qcow2_update_header(BlockDriverState *bs)
    }

    /* Write the new header */
-    ret = bdrv_pwrite(bs->file->bs, 0, header, s->cluster_size);
+    ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
    if (ret < 0) {
        goto fail;
    }
@@ -2058,7 +2059,7 @@ static int preallocate(BlockDriverState *bs)
     */
    if (host_offset != 0) {
        uint8_t data = 0;
-        ret = bdrv_pwrite(bs->file->bs, (host_offset + cur_bytes) - 1,
+        ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1,
                          &data, 1);
        if (ret < 0) {
            return ret;
@@ -2478,15 +2479,15 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
    return ret;
 }

-static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors)
+static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
+                                          int64_t offset, int count)
 {
    int ret;
    BDRVQcow2State *s = bs->opaque;

    qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
-        nb_sectors, QCOW2_DISCARD_REQUEST, false);
+    ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
+                                 QCOW2_DISCARD_REQUEST, false);
    qemu_co_mutex_unlock(&s->lock);
    return ret;
 }
@@ -2522,7 +2523,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)

    /* write updated header.size */
    offset = cpu_to_be64(offset);
-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, size),
+    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
                           &offset, sizeof(uint64_t));
    if (ret < 0) {
        return ret;
@@ -2534,39 +2535,39 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)

 /* XXX: put compressed sectors first, then all the cluster aligned
   tables to avoid losing bytes in alignment */
-static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                                  const uint8_t *buf, int nb_sectors)
+static coroutine_fn int
+qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+                            uint64_t bytes, QEMUIOVector *qiov)
 {
    BDRVQcow2State *s = bs->opaque;
+    QEMUIOVector hd_qiov;
+    struct iovec iov;
    z_stream strm;
    int ret, out_len;
-    uint8_t *out_buf;
+    uint8_t *buf, *out_buf;
    uint64_t cluster_offset;

-    if (nb_sectors == 0) {
+    if (bytes == 0) {
        /* align end of file to a sector boundary to ease reading with
           sector based I/Os */
        cluster_offset = bdrv_getlength(bs->file->bs);
        return bdrv_truncate(bs->file->bs, cluster_offset);
    }

-    if (nb_sectors != s->cluster_sectors) {
-        ret = -EINVAL;
-
-        /* Zero-pad last write if image size is not cluster aligned */
-        if (sector_num + nb_sectors == bs->total_sectors &&
-            nb_sectors < s->cluster_sectors) {
-            uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
-            memset(pad_buf, 0, s->cluster_size);
-            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
-            ret = qcow2_write_compressed(bs, sector_num,
-                                         pad_buf, s->cluster_sectors);
-            qemu_vfree(pad_buf);
+    buf = qemu_blockalign(bs, s->cluster_size);
+    if (bytes != s->cluster_size) {
+        if (bytes > s->cluster_size ||
+            offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
+        {
+            qemu_vfree(buf);
+            return -EINVAL;
        }
-        return ret;
+        /* Zero-pad last write if image size is not cluster aligned */
+        memset(buf + bytes, 0, s->cluster_size - bytes);
    }
+    qemu_iovec_to_buf(qiov, 0, buf, bytes);

-    out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+    out_buf = g_malloc(s->cluster_size);

    /* best compression, small window, no zlib header */
    memset(&strm, 0, sizeof(strm));
@@ -2595,33 +2596,44 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,

    if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
        /* could not compress: write normal cluster */
-        ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
-        if (ret < 0) {
-            goto fail;
-        }
-    } else {
-        cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
-            sector_num << 9, out_len);
-        if (!cluster_offset) {
-            ret = -EIO;
-            goto fail;
-        }
-        cluster_offset &= s->cluster_offset_mask;
-
-        ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
-        ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
+        ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
        if (ret < 0) {
            goto fail;
        }
+        goto success;
    }

+    qemu_co_mutex_lock(&s->lock);
+    cluster_offset =
+        qcow2_alloc_compressed_cluster_offset(bs, offset, out_len);
+    if (!cluster_offset) {
+        qemu_co_mutex_unlock(&s->lock);
+        ret = -EIO;
+        goto fail;
+    }
+    cluster_offset &= s->cluster_offset_mask;
+
+    ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
+    qemu_co_mutex_unlock(&s->lock);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    iov = (struct iovec) {
+        .iov_base   = out_buf,
+        .iov_len    = out_len,
+    };
+    qemu_iovec_init_external(&hd_qiov, &iov, 1);
+
+    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
+    ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
+    if (ret < 0) {
+        goto fail;
+    }
+success:
    ret = 0;
 fail:
+    qemu_vfree(buf);
    g_free(out_buf);
    return ret;
 }
@@ -2663,7 +2675,7 @@ static int make_completely_empty(BlockDriverState *bs)
    /* After this call, neither the in-memory nor the on-disk refcount
     * information accurately describe the actual references */

-    ret = bdrv_pwrite_zeroes(bs->file->bs, s->l1_table_offset,
+    ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
                             l1_clusters * s->cluster_size, 0);
    if (ret < 0) {
        goto fail_broken_refcounts;
@@ -2677,7 +2689,7 @@ static int make_completely_empty(BlockDriverState *bs)
     * overwrite parts of the existing refcount and L1 table, which is not
     * an issue because the dirty flag is set, complete data loss is in fact
     * desired and partial data loss is consequently fine as well */
-    ret = bdrv_pwrite_zeroes(bs->file->bs, s->cluster_size,
+    ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
                             (2 + l1_clusters) * s->cluster_size, 0);
    /* This call (even if it failed overall) may have overwritten on-disk
     * refcount structures; in that case, the in-memory refcount information
@@ -2693,10 +2705,10 @@ static int make_completely_empty(BlockDriverState *bs)
    /* "Create" an empty reftable (one cluster) directly after the image
     * header and an empty L1 table three clusters after the image header;
     * the cluster between those two will be used as the first refblock */
-    cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
-    cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
-    cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_table_offset),
+    l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
+    l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
+    l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
+    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
                           &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
    if (ret < 0) {
        goto fail_broken_refcounts;
@@ -2727,7 +2739,7 @@ static int make_completely_empty(BlockDriverState *bs)

    /* Enter the first refblock into the reftable */
    rt_entry = cpu_to_be64(2 * s->cluster_size);
-    ret = bdrv_pwrite_sync(bs->file->bs, s->cluster_size,
+    ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
                           &rt_entry, sizeof(rt_entry));
    if (ret < 0) {
        goto fail_broken_refcounts;
@@ -3364,9 +3376,9 @@ BlockDriver bdrv_qcow2 = {
    .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,

    .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
-    .bdrv_co_discard        = qcow2_co_discard,
+    .bdrv_co_pdiscard       = qcow2_co_pdiscard,
    .bdrv_truncate          = qcow2_truncate,
-    .bdrv_write_compressed  = qcow2_write_compressed,
+    .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
    .bdrv_make_empty        = qcow2_make_empty,

    .bdrv_snapshot_create   = qcow2_snapshot_create,
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -65,7 +65,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
    read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,

    qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
-    bdrv_aio_readv(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, qiov,
+    bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
                   qiov->size / BDRV_SECTOR_SIZE,
                   qed_read_table_cb, read_table_cb);
 }
@@ -154,7 +154,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
    /* Adjust for offset into table */
    offset += start * sizeof(uint64_t);

-    bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
                    &write_table_cb->qiov,
                    write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
                    qed_write_table_cb, write_table_cb);
--- a/block/qed.c
+++ b/block/qed.c
@@ -86,7 +86,7 @@ int qed_write_header_sync(BDRVQEDState *s)
    int ret;

    qed_header_cpu_to_le(&s->header, &le);
-    ret = bdrv_pwrite(s->bs->file->bs, 0, &le, sizeof(le));
+    ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
    if (ret != sizeof(le)) {
        return ret;
    }
@@ -123,7 +123,7 @@ static void qed_write_header_read_cb(void *opaque, int ret)
    /* Update header */
    qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);

-    bdrv_aio_writev(s->bs->file->bs, 0, &write_header_cb->qiov,
+    bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
                    write_header_cb->nsectors, qed_write_header_cb,
                    write_header_cb);
 }
@@ -155,7 +155,7 @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
    write_header_cb->iov.iov_len = len;
    qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);

-    bdrv_aio_readv(s->bs->file->bs, 0, &write_header_cb->qiov, nsectors,
+    bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
                   qed_write_header_read_cb, write_header_cb);
 }

@@ -218,7 +218,7 @@ static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
 *
 * The string is NUL-terminated.
 */
-static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
+static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n,
                           char *buf, size_t buflen)
 {
    int ret;
@@ -389,7 +389,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
    s->bs = bs;
    QSIMPLEQ_INIT(&s->allocating_write_reqs);

-    ret = bdrv_pread(bs->file->bs, 0, &le_header, sizeof(le_header));
+    ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
    if (ret < 0) {
        return ret;
    }
@@ -446,7 +446,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
            return -EINVAL;
        }

-        ret = qed_read_string(bs->file->bs, s->header.backing_filename_offset,
+        ret = qed_read_string(bs->file, s->header.backing_filename_offset,
                              s->header.backing_filename_size, bs->backing_file,
                              sizeof(bs->backing_file));
        if (ret < 0) {
@@ -708,7 +708,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
    }

    if (cb->co) {
-        qemu_coroutine_enter(cb->co, NULL);
+        qemu_coroutine_enter(cb->co);
    }
 }

@@ -800,7 +800,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
    qemu_iovec_concat(*backing_qiov, qiov, 0, size);

    BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
-    bdrv_aio_readv(s->bs->backing->bs, pos / BDRV_SECTOR_SIZE,
+    bdrv_aio_readv(s->bs->backing, pos / BDRV_SECTOR_SIZE,
                   *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
 }

@@ -837,7 +837,7 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
    }

    BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
-    bdrv_aio_writev(s->bs->file->bs, copy_cb->offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
                    &copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
                    qed_copy_from_backing_file_cb, copy_cb);
 }
@@ -1087,7 +1087,7 @@ static void qed_aio_write_main(void *opaque, int ret)
    }

    BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
-    bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
                    &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
                    next_fn, acb);
 }
@@ -1319,7 +1319,7 @@ static void qed_aio_read_data(void *opaque, int ret,
    }

    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    bdrv_aio_readv(bs->file->bs, offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
                   &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
                   qed_aio_next_io, acb);
    return;
@@ -1425,7 +1425,7 @@ static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
    cb->done = true;
    cb->ret = ret;
    if (cb->co) {
-        qemu_coroutine_enter(cb->co, NULL);
+        qemu_coroutine_enter(cb->co);
    }
 }

@@ -1575,7 +1575,7 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
    }

    /* Write new header */
-    ret = bdrv_pwrite_sync(bs->file->bs, 0, buffer, buffer_len);
+    ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
    g_free(buffer);
    if (ret == 0) {
        memcpy(&s->header, &new_header, sizeof(new_header));
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -383,7 +383,7 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
            continue;
        }
        QLIST_FOREACH(item, &version->items, next) {
-            bdrv_aio_writev(s->children[item->index]->bs, acb->sector_num,
+            bdrv_aio_writev(s->children[item->index], acb->sector_num,
                            acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
                            acb);
        }
@@ -660,7 +660,7 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
    }

    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num,
+        acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
                                            &acb->qcrs[i].qiov, acb->nb_sectors,
                                            quorum_aio_cb, &acb->qcrs[i]);
    }
@@ -678,7 +678,7 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
    qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
                     acb->qcrs[acb->child_iter].buf);
    acb->qcrs[acb->child_iter].aiocb =
-        bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
+        bdrv_aio_readv(s->children[acb->child_iter], acb->sector_num,
                       &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
                       quorum_aio_cb, &acb->qcrs[acb->child_iter]);

@@ -719,7 +719,7 @@ static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
    int i;

    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i]->bs, sector_num,
+        acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num,
                                             qiov, nb_sectors, &quorum_aio_cb,
                                             &acb->qcrs[i]);
    }
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -32,7 +32,7 @@
 #include "trace.h"
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
 #include "qapi/util.h"
 #include "qapi/qmp/qstring.h"

@@ -137,10 +137,6 @@ typedef struct BDRVRawState {
    int open_flags;
    size_t buf_align;

-#ifdef CONFIG_LINUX_AIO
-    int use_aio;
-    LinuxAioState *aio_ctx;
-#endif
 #ifdef CONFIG_XFS
    bool is_xfs:1;
 #endif
@@ -154,9 +150,6 @@ typedef struct BDRVRawState {
 typedef struct BDRVRawReopenState {
    int fd;
    int open_flags;
-#ifdef CONFIG_LINUX_AIO
-    int use_aio;
-#endif
 } BDRVRawReopenState;

 static int fd_open(BlockDriverState *bs);
@@ -302,22 +295,22 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
    /* For SCSI generic devices the alignment is not really used.
       With buffered I/O, we don't have any restrictions. */
    if (bdrv_is_sg(bs) || !s->needs_alignment) {
-        bs->request_alignment = 1;
+        bs->bl.request_alignment = 1;
        s->buf_align = 1;
        return;
    }

-    bs->request_alignment = 0;
+    bs->bl.request_alignment = 0;
    s->buf_align = 0;
    /* Let's try to use the logical blocksize for the alignment. */
-    if (probe_logical_blocksize(fd, &bs->request_alignment) < 0) {
-        bs->request_alignment = 0;
+    if (probe_logical_blocksize(fd, &bs->bl.request_alignment) < 0) {
+        bs->bl.request_alignment = 0;
    }
 #ifdef CONFIG_XFS
    if (s->is_xfs) {
        struct dioattr da;
        if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) {
-            bs->request_alignment = da.d_miniosz;
+            bs->bl.request_alignment = da.d_miniosz;
            /* The kernel returns wrong information for d_mem */
            /* s->buf_align = da.d_mem; */
        }
@@ -337,21 +330,21 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
        qemu_vfree(buf);
    }

-    if (!bs->request_alignment) {
+    if (!bs->bl.request_alignment) {
        size_t align;
        buf = qemu_memalign(s->buf_align, max_align);
        for (align = 512; align <= max_align; align <<= 1) {
            if (raw_is_io_aligned(fd, buf, align)) {
-                bs->request_alignment = align;
+                bs->bl.request_alignment = align;
                break;
            }
        }
        qemu_vfree(buf);
    }

-    if (!s->buf_align || !bs->request_alignment) {
-        error_setg(errp, "Could not find working O_DIRECT alignment. "
-                         "Try cache.direct=off.");
+    if (!s->buf_align || !bs->bl.request_alignment) {
+        error_setg(errp, "Could not find working O_DIRECT alignment");
+        error_append_hint(errp, "Try cache.direct=off\n");
    }
 }

@@ -374,58 +367,15 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
    }
 }

-static void raw_detach_aio_context(BlockDriverState *bs)
-{
 #ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-
-    if (s->use_aio) {
-        laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
-    }
-#endif
-}
-
-static void raw_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
+static bool raw_use_aio(int bdrv_flags)
 {
-#ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-
-    if (s->use_aio) {
-        laio_attach_aio_context(s->aio_ctx, new_context);
-    }
-#endif
-}
-
-#ifdef CONFIG_LINUX_AIO
-static int raw_set_aio(LinuxAioState **aio_ctx, int *use_aio, int bdrv_flags)
-{
-    int ret = -1;
-    assert(aio_ctx != NULL);
-    assert(use_aio != NULL);
    /*
     * Currently Linux do AIO only for files opened with O_DIRECT
     * specified so check NOCACHE flag too
     */
-    if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
-                      (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
-
-        /* if non-NULL, laio_init() has already been run */
-        if (*aio_ctx == NULL) {
-            *aio_ctx = laio_init();
-            if (!*aio_ctx) {
-                goto error;
-            }
-        }
-        *use_aio = 1;
-    } else {
-        *use_aio = 0;
-    }
-
-    ret = 0;
-
-error:
-    return ret;
+    return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
+                         (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO);
 }
 #endif

@@ -494,13 +444,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    s->fd = fd;

 #ifdef CONFIG_LINUX_AIO
-    if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
-        qemu_close(fd);
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Could not set AIO state");
-        goto fail;
-    }
-    if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
+    if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
        error_setg(errp, "aio=native was specified, but it requires "
                         "cache.direct=on, which was not specified.");
        ret = -EINVAL;
@@ -567,8 +511,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    }
 #endif

-    raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
-
    ret = 0;
 fail:
    if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
@@ -603,18 +545,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
    state->opaque = g_new0(BDRVRawReopenState, 1);
    raw_s = state->opaque;

-#ifdef CONFIG_LINUX_AIO
-    raw_s->use_aio = s->use_aio;
-
-    /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
-     * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
-     * won't override aio_ctx if aio_ctx is non-NULL */
-    if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
-        error_setg(errp, "Could not set AIO state");
-        return -1;
-    }
-#endif
-
    if (s->type == FTYPE_CD) {
        raw_s->open_flags |= O_NONBLOCK;
    }
@@ -639,15 +569,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,

    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
        /* dup the original fd */
-        /* TODO: use qemu fcntl wrapper */
-#ifdef F_DUPFD_CLOEXEC
-        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
-#else
-        raw_s->fd = dup(s->fd);
-        if (raw_s->fd != -1) {
-            qemu_set_cloexec(raw_s->fd);
-        }
-#endif
+        raw_s->fd = qemu_dup(s->fd);
        if (raw_s->fd >= 0) {
            ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
            if (ret) {
@@ -697,9 +619,6 @@ static void raw_reopen_commit(BDRVReopenState *state)

    qemu_close(s->fd);
    s->fd = raw_s->fd;
-#ifdef CONFIG_LINUX_AIO
-    s->use_aio = raw_s->use_aio;
-#endif

    g_free(state->opaque);
    state->opaque = NULL;
@@ -745,8 +664,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
    if (!fstat(s->fd, &st)) {
        if (S_ISBLK(st.st_mode)) {
            int ret = hdev_get_max_transfer_length(s->fd);
-            if (ret >= 0) {
-                bs->bl.max_transfer_length = ret;
+            if (ret > 0 && ret <= BDRV_REQUEST_MAX_SECTORS) {
+                bs->bl.max_transfer = pow2floor(ret << BDRV_SECTOR_BITS);
            }
        }
    }
@@ -1295,7 +1214,7 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
 }

 static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        int64_t offset, QEMUIOVector *qiov, int count,
        BlockCompletionFunc *cb, void *opaque, int type)
 {
    RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
@@ -1305,8 +1224,8 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
    acb->aio_type = type;
    acb->aio_fildes = fd;

-    acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
-    acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
+    acb->aio_nbytes = count;
+    acb->aio_offset = offset;

    if (qiov) {
        acb->aio_iov = qiov->iov;
@@ -1314,7 +1233,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
        assert(qiov->size == acb->aio_nbytes);
    }

-    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
+    trace_paio_submit(acb, opaque, offset, count, type);
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }
@@ -1337,9 +1256,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
        if (!bdrv_qiov_is_aligned(bs, qiov)) {
            type |= QEMU_AIO_MISALIGNED;
 #ifdef CONFIG_LINUX_AIO
-        } else if (s->use_aio) {
+        } else if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+            LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
            assert(qiov->size == bytes);
-            return laio_co_submit(bs, s->aio_ctx, s->fd, offset, qiov, type);
+            return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
 #endif
        }
    }
@@ -1365,9 +1285,9 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
 static void raw_aio_plug(BlockDriverState *bs)
 {
 #ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-    if (s->use_aio) {
-        laio_io_plug(bs, s->aio_ctx);
+    if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+        laio_io_plug(bs, aio);
    }
 #endif
 }
@@ -1375,9 +1295,9 @@ static void raw_aio_plug(BlockDriverState *bs)
 static void raw_aio_unplug(BlockDriverState *bs)
 {
 #ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-    if (s->use_aio) {
-        laio_io_unplug(bs, s->aio_ctx);
+    if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+        laio_io_unplug(bs, aio);
    }
 #endif
 }
@@ -1397,13 +1317,6 @@ static void raw_close(BlockDriverState *bs)
 {
    BDRVRawState *s = bs->opaque;

-    raw_detach_aio_context(bs);
-
-#ifdef CONFIG_LINUX_AIO
-    if (s->use_aio) {
-        laio_cleanup(s->aio_ctx);
-    }
-#endif
    if (s->fd >= 0) {
        qemu_close(s->fd);
        s->fd = -1;
@@ -1873,13 +1786,13 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
    return ret | BDRV_BLOCK_OFFSET_VALID | start;
 }

-static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors,
+static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs,
+    int64_t offset, int count,
    BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;

-    return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+    return paio_submit(bs, s->fd, offset, NULL, count,
                       cb, opaque, QEMU_AIO_DISCARD);
 }

@@ -1951,7 +1864,7 @@ BlockDriver bdrv_file = {
    .bdrv_co_preadv         = raw_co_preadv,
    .bdrv_co_pwritev        = raw_co_pwritev,
    .bdrv_aio_flush = raw_aio_flush,
-    .bdrv_aio_discard = raw_aio_discard,
+    .bdrv_aio_pdiscard = raw_aio_pdiscard,
    .bdrv_refresh_limits = raw_refresh_limits,
    .bdrv_io_plug = raw_aio_plug,
    .bdrv_io_unplug = raw_aio_unplug,
@@ -1962,9 +1875,6 @@ BlockDriver bdrv_file = {
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
    .create_opts = &raw_create_opts,
 };

@@ -2293,8 +2203,8 @@ static int fd_open(BlockDriverState *bs)
    return -EIO;
 }

-static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors,
+static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs,
+    int64_t offset, int count,
    BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
@@ -2302,7 +2212,7 @@ static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
    if (fd_open(bs) < 0) {
        return NULL;
    }
-    return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+    return paio_submit(bs, s->fd, offset, NULL, count,
                       cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
 }

@@ -2397,7 +2307,7 @@ static BlockDriver bdrv_host_device = {
    .bdrv_co_preadv         = raw_co_preadv,
    .bdrv_co_pwritev        = raw_co_pwritev,
    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_aio_discard   = hdev_aio_discard,
+    .bdrv_aio_pdiscard   = hdev_aio_pdiscard,
    .bdrv_refresh_limits = raw_refresh_limits,
    .bdrv_io_plug = raw_aio_plug,
    .bdrv_io_unplug = raw_aio_unplug,
@@ -2410,9 +2320,6 @@ static BlockDriver bdrv_host_device = {
    .bdrv_probe_blocksizes = hdev_probe_blocksizes,
    .bdrv_probe_geometry = hdev_probe_geometry,

-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
    /* generic scsi device */
 #ifdef __linux__
    .bdrv_aio_ioctl     = hdev_aio_ioctl,
@@ -2532,9 +2439,6 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
    /* removable device support */
    .bdrv_is_inserted   = cdrom_is_inserted,
    .bdrv_eject         = cdrom_eject,
@@ -2665,9 +2569,6 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
    /* removable device support */
    .bdrv_is_inserted   = cdrom_is_inserted,
    .bdrv_eject         = cdrom_eject,
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -27,7 +27,7 @@
 #include "qemu/timer.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
 #include "trace.h"
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
@@ -142,7 +142,7 @@ static int aio_worker(void *arg)
 }

 static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        int64_t offset, QEMUIOVector *qiov, int count,
        BlockCompletionFunc *cb, void *opaque, int type)
 {
    RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
@@ -155,11 +155,12 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
    if (qiov) {
        acb->aio_iov = qiov->iov;
        acb->aio_niov = qiov->niov;
+        assert(qiov->size == count);
    }
-    acb->aio_nbytes = nb_sectors * 512;
-    acb->aio_offset = sector_num * 512;
+    acb->aio_nbytes = count;
+    acb->aio_offset = offset;

-    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
+    trace_paio_submit(acb, opaque, offset, count, type);
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }
@@ -222,7 +223,7 @@ static void raw_attach_aio_context(BlockDriverState *bs,
    }
 }

-static void raw_probe_alignment(BlockDriverState *bs)
+static void raw_probe_alignment(BlockDriverState *bs, Error **errp)
 {
    BDRVRawState *s = bs->opaque;
    DWORD sectorsPerCluster, freeClusters, totalClusters, count;
@@ -230,14 +231,14 @@ static void raw_probe_alignment(BlockDriverState *bs)
    BOOL status;

    if (s->type == FTYPE_CD) {
-        bs->request_alignment = 2048;
+        bs->bl.request_alignment = 2048;
        return;
    }
    if (s->type == FTYPE_HARDDISK) {
        status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
                                 NULL, 0, &dg, sizeof(dg), &count, NULL);
        if (status != 0) {
-            bs->request_alignment = dg.Geometry.BytesPerSector;
+            bs->bl.request_alignment = dg.Geometry.BytesPerSector;
            return;
        }
        /* try GetDiskFreeSpace too */
@@ -247,7 +248,7 @@ static void raw_probe_alignment(BlockDriverState *bs)
        GetDiskFreeSpace(s->drive_path, &sectorsPerCluster,
                         &dg.Geometry.BytesPerSector,
                         &freeClusters, &totalClusters);
-        bs->request_alignment = dg.Geometry.BytesPerSector;
+        bs->bl.request_alignment = dg.Geometry.BytesPerSector;
    }
 }

@@ -365,7 +366,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
        win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
    }

-    raw_probe_alignment(bs);
    ret = 0;
 fail:
    qemu_opts_del(opts);
@@ -381,7 +381,8 @@ static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
        return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
                                nb_sectors, cb, opaque, QEMU_AIO_READ);
    } else {
-        return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+        return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
+                           nb_sectors << BDRV_SECTOR_BITS,
                           cb, opaque, QEMU_AIO_READ);
    }
 }
@@ -395,7 +396,8 @@ static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
        return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
                                nb_sectors, cb, opaque, QEMU_AIO_WRITE);
    } else {
-        return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+        return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
+                           nb_sectors << BDRV_SECTOR_BITS,
                           cb, opaque, QEMU_AIO_WRITE);
    }
 }
@@ -550,6 +552,7 @@ BlockDriver bdrv_file = {
    .bdrv_needs_filename = true,
    .bdrv_parse_filename = raw_parse_filename,
    .bdrv_file_open     = raw_open,
+    .bdrv_refresh_limits = raw_probe_alignment,
    .bdrv_close         = raw_close,
    .bdrv_create        = raw_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -1,6 +1,6 @@
 /* BlockDriver implementation for "raw"
 *
- * Copyright (C) 2010, 2013, Red Hat, Inc.
+ * Copyright (C) 2010-2016 Red Hat, Inc.
 * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
 * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
 *
@@ -50,33 +50,30 @@ static int raw_reopen_prepare(BDRVReopenState *reopen_state,
    return 0;
 }

-static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
-                                     int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                      uint64_t bytes, QEMUIOVector *qiov,
+                                      int flags)
 {
    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
+    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }

-static int coroutine_fn
-raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-                    QEMUIOVector *qiov, int flags)
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                                       uint64_t bytes, QEMUIOVector *qiov,
+                                       int flags)
 {
    void *buf = NULL;
    BlockDriver *drv;
    QEMUIOVector local_qiov;
    int ret;

-    if (bs->probed && sector_num == 0) {
-        /* As long as these conditions are true, we can't get partial writes to
-         * the probe buffer and can just directly check the request. */
+    if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
+        /* Handling partial writes would be a pain - so we just
+         * require that guests have 512-byte request alignment if
+         * probing occurred */
        QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
        QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
-
-        if (nb_sectors == 0) {
-            /* qemu_iovec_to_buf() would fail, but we want to return success
-             * instead of -EINVAL in this case. */
-            return 0;
-        }
+        assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE);

        buf = qemu_try_blockalign(bs->file->bs, 512);
        if (!buf) {
@@ -105,8 +102,7 @@ raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
    }

    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-    ret = bdrv_co_pwritev(bs->file->bs, sector_num * BDRV_SECTOR_SIZE,
-                          nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);
+    ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);

 fail:
    if (qiov == &local_qiov) {
@@ -131,13 +127,13 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
                                             int64_t offset, int count,
                                             BdrvRequestFlags flags)
 {
-    return bdrv_co_pwrite_zeroes(bs->file->bs, offset, count, flags);
+    return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
 }

-static int coroutine_fn raw_co_discard(BlockDriverState *bs,
-                                       int64_t sector_num, int nb_sectors)
+static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
+                                        int64_t offset, int count)
 {
-    return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+    return bdrv_co_pdiscard(bs->file->bs, offset, count);
 }

 static int64_t raw_getlength(BlockDriverState *bs)
@@ -152,7 +148,12 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)

 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-    bs->bl = bs->file->bs->bl;
+    if (bs->probed) {
+        /* To make it easier to protect the first sector, any probed
+         * image is restricted to read-modify-write on sub-sector
+         * operations. */
+        bs->bl.request_alignment = BDRV_SECTOR_SIZE;
+    }
 }

 static int raw_truncate(BlockDriverState *bs, int64_t offset)
@@ -197,8 +198,10 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    bs->sg = bs->file->bs->sg;
-    bs->supported_write_flags = BDRV_REQ_FUA;
-    bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP;
+    bs->supported_write_flags = BDRV_REQ_FUA &
+        bs->file->bs->supported_write_flags;
+    bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+        bs->file->bs->supported_zero_flags;

    if (bs->probed && !bdrv_is_read_only(bs)) {
        fprintf(stderr,
@@ -243,10 +246,10 @@ BlockDriver bdrv_raw = {
    .bdrv_open            = &raw_open,
    .bdrv_close           = &raw_close,
    .bdrv_create          = &raw_create,
-    .bdrv_co_readv        = &raw_co_readv,
-    .bdrv_co_writev_flags = &raw_co_writev_flags,
+    .bdrv_co_preadv       = &raw_co_preadv,
+    .bdrv_co_pwritev      = &raw_co_pwritev,
    .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
-    .bdrv_co_discard      = &raw_co_discard,
+    .bdrv_co_pdiscard     = &raw_co_pdiscard,
    .bdrv_co_get_block_status = &raw_co_get_block_status,
    .bdrv_truncate        = &raw_truncate,
    .bdrv_getlength       = &raw_getlength,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -649,9 +649,9 @@ static int rbd_aio_flush_wrapper(rbd_image_t image,
 }

 static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
-                                 int64_t sector_num,
+                                 int64_t off,
                                 QEMUIOVector *qiov,
-                                 int nb_sectors,
+                                 int64_t size,
                                 BlockCompletionFunc *cb,
                                 void *opaque,
                                 RBDAIOCmd cmd)
@@ -659,7 +659,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    RBDAIOCB *acb;
    RADOSCB *rcb = NULL;
    rbd_completion_t c;
-    int64_t off, size;
    char *buf;
    int r;

@@ -668,6 +667,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
    acb->cmd = cmd;
    acb->qiov = qiov;
+    assert(!qiov || qiov->size == size);
    if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
        acb->bounce = NULL;
    } else {
@@ -687,9 +687,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,

    buf = acb->bounce;

-    off = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
-
    rcb = g_new(RADOSCB, 1);
    rcb->acb = acb;
    rcb->buf = buf;
@@ -739,7 +736,8 @@ static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
                                      BlockCompletionFunc *cb,
                                      void *opaque)
 {
-    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
+    return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
+                         nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
                         RBD_AIO_READ);
 }

@@ -750,7 +748,8 @@ static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
                                       BlockCompletionFunc *cb,
                                       void *opaque)
 {
-    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
+    return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
+                         nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
                         RBD_AIO_WRITE);
 }

@@ -931,13 +930,13 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
 }

 #ifdef LIBRBD_SUPPORTS_DISCARD
-static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
-                                        int64_t sector_num,
-                                        int nb_sectors,
-                                        BlockCompletionFunc *cb,
-                                        void *opaque)
+static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
+                                         int64_t offset,
+                                         int count,
+                                         BlockCompletionFunc *cb,
+                                         void *opaque)
 {
-    return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
+    return rbd_start_aio(bs, offset, NULL, count, cb, opaque,
                         RBD_AIO_DISCARD);
 }
 #endif
@@ -1001,7 +1000,7 @@ static BlockDriver bdrv_rbd = {
 #endif

 #ifdef LIBRBD_SUPPORTS_DISCARD
-    .bdrv_aio_discard       = qemu_rbd_aio_discard,
+    .bdrv_aio_pdiscard      = qemu_rbd_aio_pdiscard,
 #endif

    .bdrv_snapshot_create   = qemu_rbd_snap_create,
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -495,7 +495,7 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)

 static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
 {
-    qemu_coroutine_enter(acb->coroutine, NULL);
+    qemu_coroutine_enter(acb->coroutine);
    qemu_aio_unref(acb);
 }

@@ -636,7 +636,7 @@ static void restart_co_req(void *opaque)
 {
    Coroutine *co = opaque;

-    qemu_coroutine_enter(co, NULL);
+    qemu_coroutine_enter(co);
 }

 typedef struct SheepdogReqCo {
@@ -726,8 +726,8 @@ static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
    if (qemu_in_coroutine()) {
        do_co_req(&srco);
    } else {
-        co = qemu_coroutine_create(do_co_req);
-        qemu_coroutine_enter(co, &srco);
+        co = qemu_coroutine_create(do_co_req, &srco);
+        qemu_coroutine_enter(co);
        while (!srco.finished) {
            aio_poll(aio_context, true);
        }
@@ -925,17 +925,17 @@ static void co_read_response(void *opaque)
    BDRVSheepdogState *s = opaque;

    if (!s->co_recv) {
-        s->co_recv = qemu_coroutine_create(aio_read_response);
+        s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
    }

-    qemu_coroutine_enter(s->co_recv, opaque);
+    qemu_coroutine_enter(s->co_recv);
 }

 static void co_write_request(void *opaque)
 {
    BDRVSheepdogState *s = opaque;

-    qemu_coroutine_enter(s->co_send, NULL);
+    qemu_coroutine_enter(s->co_send);
 }

 /*
@@ -2800,8 +2800,8 @@ static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
 }


-static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors)
+static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
+                                      int count)
 {
    SheepdogAIOCB *acb;
    BDRVSheepdogState *s = bs->opaque;
@@ -2811,7 +2811,7 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
    uint32_t zero = 0;

    if (!s->discard_supported) {
-            return 0;
+        return 0;
    }

    memset(&discard_iov, 0, sizeof(discard_iov));
@@ -2820,7 +2820,10 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
    iov.iov_len = sizeof(zero);
    discard_iov.iov = &iov;
    discard_iov.niov = 1;
-    acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors);
+    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert((count & (BDRV_SECTOR_SIZE - 1)) == 0);
+    acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
+                       count >> BDRV_SECTOR_BITS);
    acb->aiocb_type = AIOCB_DISCARD_OBJ;
    acb->aio_done_func = sd_finish_aiocb;

@@ -2954,7 +2957,7 @@ static BlockDriver bdrv_sheepdog = {
    .bdrv_co_readv  = sd_co_readv,
    .bdrv_co_writev = sd_co_writev,
    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
-    .bdrv_co_discard = sd_co_discard,
+    .bdrv_co_pdiscard = sd_co_pdiscard,
    .bdrv_co_get_block_status = sd_co_get_block_status,

    .bdrv_snapshot_create   = sd_snapshot_create,
@@ -2990,7 +2993,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
    .bdrv_co_readv  = sd_co_readv,
    .bdrv_co_writev = sd_co_writev,
    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
-    .bdrv_co_discard = sd_co_discard,
+    .bdrv_co_pdiscard = sd_co_pdiscard,
    .bdrv_co_get_block_status = sd_co_get_block_status,

    .bdrv_snapshot_create   = sd_snapshot_create,
@@ -3026,7 +3029,7 @@ static BlockDriver bdrv_sheepdog_unix = {
    .bdrv_co_readv  = sd_co_readv,
    .bdrv_co_writev = sd_co_writev,
    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
-    .bdrv_co_discard = sd_co_discard,
+    .bdrv_co_pdiscard = sd_co_pdiscard,
    .bdrv_co_get_block_status = sd_co_get_block_status,

    .bdrv_snapshot_create   = sd_snapshot_create,
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -508,36 +508,73 @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
    return ret;
 }

+static QemuOptsList ssh_runtime_opts = {
+    .name = "ssh",
+    .head = QTAILQ_HEAD_INITIALIZER(ssh_runtime_opts.head),
+    .desc = {
+        {
+            .name = "host",
+            .type = QEMU_OPT_STRING,
+            .help = "Host to connect to",
+        },
+        {
+            .name = "port",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Port to connect to",
+        },
+        {
+            .name = "path",
+            .type = QEMU_OPT_STRING,
+            .help = "Path of the image on the host",
+        },
+        {
+            .name = "user",
+            .type = QEMU_OPT_STRING,
+            .help = "User as which to connect",
+        },
+        {
+            .name = "host_key_check",
+            .type = QEMU_OPT_STRING,
+            .help = "Defines how and what to check the host key against",
+        },
+    },
+};
+
 static int connect_to_ssh(BDRVSSHState *s, QDict *options,
                          int ssh_flags, int creat_mode, Error **errp)
 {
    int r, ret;
+    QemuOpts *opts = NULL;
+    Error *local_err = NULL;
    const char *host, *user, *path, *host_key_check;
    int port;

-    if (!qdict_haskey(options, "host")) {
+    opts = qemu_opts_create(&ssh_runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
+        error_propagate(errp, local_err);
+        goto err;
+    }
+
+    host = qemu_opt_get(opts, "host");
+    if (!host) {
        ret = -EINVAL;
        error_setg(errp, "No hostname was specified");
        goto err;
    }
-    host = qdict_get_str(options, "host");

-    if (qdict_haskey(options, "port")) {
-        port = qdict_get_int(options, "port");
-    } else {
-        port = 22;
-    }
+    port = qemu_opt_get_number(opts, "port", 22);

-    if (!qdict_haskey(options, "path")) {
+    path = qemu_opt_get(opts, "path");
+    if (!path) {
        ret = -EINVAL;
        error_setg(errp, "No path was specified");
        goto err;
    }
-    path = qdict_get_str(options, "path");

-    if (qdict_haskey(options, "user")) {
-        user = qdict_get_str(options, "user");
-    } else {
+    user = qemu_opt_get(opts, "user");
+    if (!user) {
        user = g_get_user_name();
        if (!user) {
            error_setg_errno(errp, errno, "Can't get user name");
@@ -546,9 +583,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
        }
    }

-    if (qdict_haskey(options, "host_key_check")) {
-        host_key_check = qdict_get_str(options, "host_key_check");
-    } else {
+    host_key_check = qemu_opt_get(opts, "host_key_check");
+    if (!host_key_check) {
        host_key_check = "yes";
    }

@@ -612,21 +648,14 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
        goto err;
    }

+    qemu_opts_del(opts);
+
    r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
    if (r < 0) {
        sftp_error_setg(errp, s, "failed to read file attributes");
        return -EINVAL;
    }

-    /* Delete the options we've used; any not deleted will cause the
-     * block layer to give an error about unused options.
-     */
-    qdict_del(options, "host");
-    qdict_del(options, "port");
-    qdict_del(options, "user");
-    qdict_del(options, "path");
-    qdict_del(options, "host_key_check");
-
    return 0;

 err:
@@ -646,6 +675,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
    }
    s->session = NULL;

+    qemu_opts_del(opts);
+
    return ret;
 }

@@ -777,7 +808,7 @@ static void restart_coroutine(void *opaque)

    DPRINTF("co=%p", co);

-    qemu_coroutine_enter(co, NULL);
+    qemu_coroutine_enter(co);
 }

 static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
--- a/block/stream.c
+++ b/block/stream.c
@@ -95,6 +95,7 @@ static void coroutine_fn stream_run(void *opaque)
    BlockDriverState *base = s->base;
    int64_t sector_num = 0;
    int64_t end = -1;
+    uint64_t delay_ns = 0;
    int error = 0;
    int ret = 0;
    int n = 0;
@@ -123,10 +124,8 @@ static void coroutine_fn stream_run(void *opaque)
    }

    for (sector_num = 0; sector_num < end; sector_num += n) {
-        uint64_t delay_ns = 0;
        bool copy;

-wait:
        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that bdrv_drain_all() returns.
         */
@@ -156,12 +155,6 @@ wait:
        }
        trace_stream_one_iteration(s, sector_num, n, ret);
        if (copy) {
-            if (s->common.speed) {
-                delay_ns = ratelimit_calculate_delay(&s->limit, n);
-                if (delay_ns > 0) {
-                    goto wait;
-                }
-            }
            ret = stream_populate(blk, sector_num, n, buf);
        }
        if (ret < 0) {
@@ -182,6 +175,9 @@ wait:

        /* Publish progress */
        s->common.offset += n * BDRV_SECTOR_SIZE;
+        if (copy && s->common.speed) {
+            delay_ns = ratelimit_calculate_delay(&s->limit, n);
+        }
    }

    if (!base) {
@@ -218,15 +214,15 @@ static const BlockJobDriver stream_job_driver = {
    .set_speed     = stream_set_speed,
 };

-void stream_start(BlockDriverState *bs, BlockDriverState *base,
-                  const char *backing_file_str, int64_t speed,
-                  BlockdevOnError on_error,
-                  BlockCompletionFunc *cb,
-                  void *opaque, Error **errp)
+void stream_start(const char *job_id, BlockDriverState *bs,
+                  BlockDriverState *base, const char *backing_file_str,
+                  int64_t speed, BlockdevOnError on_error,
+                  BlockCompletionFunc *cb, void *opaque, Error **errp)
 {
    StreamBlockJob *s;

-    s = block_job_create(&stream_job_driver, bs, speed, cb, opaque, errp);
+    s = block_job_create(job_id, &stream_job_driver, bs, speed,
+                         cb, opaque, errp);
    if (!s) {
        return;
    }
@@ -235,7 +231,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
    s->backing_file_str = g_strdup(backing_file_str);

    s->on_error = on_error;
-    s->common.co = qemu_coroutine_create(stream_run);
+    s->common.co = qemu_coroutine_create(stream_run, s);
    trace_stream_start(bs, base, s, s->common.co, opaque);
-    qemu_coroutine_enter(s->common.co, s);
+    qemu_coroutine_enter(s->common.co);
 }
--- a/block/trace-events
+++ b/block/trace-events
@@ -1,4 +1,4 @@
-# See docs/trace-events.txt for syntax documentation.
+# See docs/tracing.txt for syntax documentation.

 # block.c
 bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags %#x format_name \"%s\""
@@ -9,7 +9,7 @@ blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags
 blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"

 # block/io.c
-bdrv_aio_discard(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
+bdrv_aio_pdiscard(void *bs, int64_t offset, int count, void *opaque) "bs %p offset %"PRId64" count %d opaque %p"
 bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
 bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
@@ -58,7 +58,7 @@ qmp_block_stream(void *bs, void *job) "bs %p job %p"
 # block/raw-win32.c
 # block/raw-posix.c
 paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
-paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"
+paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"

 # block/qcow2.c
 qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d"
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -403,7 +403,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,

    logout("\n");

-    ret = bdrv_read(bs->file->bs, 0, (uint8_t *)&header, 1);
+    ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
    if (ret < 0) {
        goto fail;
    }
@@ -500,7 +500,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    ret = bdrv_read(bs->file->bs, s->bmap_sector, (uint8_t *)s->bmap,
+    ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap,
                    bmap_size);
    if (ret < 0) {
        goto fail_free_bmap;
@@ -597,7 +597,7 @@ vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
            qemu_iovec_reset(&local_qiov);
            qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);

-            ret = bdrv_co_preadv(bs->file->bs, data_offset, n_bytes,
+            ret = bdrv_co_preadv(bs->file, data_offset, n_bytes,
                                 &local_qiov, 0);
        }
        logout("%u bytes read\n", n_bytes);
@@ -670,7 +670,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
             * acquire the lock and thus the padded cluster is written before
             * the other coroutines can write to the affected area. */
            qemu_co_mutex_lock(&s->write_lock);
-            ret = bdrv_pwrite(bs->file->bs, data_offset, block, s->block_size);
+            ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size);
            qemu_co_mutex_unlock(&s->write_lock);
        } else {
            uint64_t data_offset = s->header.offset_data +
@@ -690,7 +690,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
            qemu_iovec_reset(&local_qiov);
            qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);

-            ret = bdrv_co_pwritev(bs->file->bs, data_offset, n_bytes,
+            ret = bdrv_co_pwritev(bs->file, data_offset, n_bytes,
                                  &local_qiov, 0);
        }

@@ -719,7 +719,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
        assert(VDI_IS_ALLOCATED(bmap_first));
        *header = s->header;
        vdi_header_to_le(header);
-        ret = bdrv_write(bs->file->bs, 0, block, 1);
+        ret = bdrv_write(bs->file, 0, block, 1);
        g_free(block);
        block = NULL;

@@ -737,7 +737,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
        base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
        logout("will write %u block map sectors starting from entry %u\n",
               n_sectors, bmap_first);
-        ret = bdrv_write(bs->file->bs, offset, base, n_sectors);
+        ret = bdrv_write(bs->file, offset, base, n_sectors);
    }

    return ret;
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -84,7 +84,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,

    offset = log->offset + read;

-    ret = bdrv_pread(bs->file->bs, offset, hdr, sizeof(VHDXLogEntryHeader));
+    ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader));
    if (ret < 0) {
        goto exit;
    }
@@ -144,7 +144,7 @@ static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
        }
        offset = log->offset + read;

-        ret = bdrv_pread(bs->file->bs, offset, buffer, VHDX_LOG_SECTOR_SIZE);
+        ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE);
        if (ret < 0) {
            goto exit;
        }
@@ -194,7 +194,7 @@ static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
            /* full */
            break;
        }
-        ret = bdrv_pwrite(bs->file->bs, offset, buffer_tmp,
+        ret = bdrv_pwrite(bs->file, offset, buffer_tmp,
                          VHDX_LOG_SECTOR_SIZE);
        if (ret < 0) {
            goto exit;
@@ -466,7 +466,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,

    /* count is only > 1 if we are writing zeroes */
    for (i = 0; i < count; i++) {
-        ret = bdrv_pwrite_sync(bs->file->bs, file_offset, buffer,
+        ret = bdrv_pwrite_sync(bs->file, file_offset, buffer,
                               VHDX_LOG_SECTOR_SIZE);
        if (ret < 0) {
            goto exit;
@@ -945,7 +945,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,

        if (i == 0 && leading_length) {
            /* partial sector at the front of the buffer */
-            ret = bdrv_pread(bs->file->bs, file_offset, merged_sector,
+            ret = bdrv_pread(bs->file, file_offset, merged_sector,
                             VHDX_LOG_SECTOR_SIZE);
            if (ret < 0) {
                goto exit;
@@ -955,7 +955,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
            sector_write = merged_sector;
        } else if (i == sectors - 1 && trailing_length) {
            /* partial sector at the end of the buffer */
-            ret = bdrv_pread(bs->file->bs,
+            ret = bdrv_pread(bs->file,
                            file_offset,
                            merged_sector + trailing_length,
                            VHDX_LOG_SECTOR_SIZE - trailing_length);
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -298,9 +298,10 @@ static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
 * and then update the header checksum.  Header is converted to proper
 * endianness before being written to the specified file offset
 */
-static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
+static int vhdx_write_header(BdrvChild *file, VHDXHeader *hdr,
                             uint64_t offset, bool read)
 {
+    BlockDriverState *bs_file = file->bs;
    uint8_t *buffer = NULL;
    int ret;
    VHDXHeader *header_le;
@@ -315,7 +316,7 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
    buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE);
    if (read) {
        /* if true, we can't assume the extra reserved bytes are 0 */
-        ret = bdrv_pread(bs_file, offset, buffer, VHDX_HEADER_SIZE);
+        ret = bdrv_pread(file, offset, buffer, VHDX_HEADER_SIZE);
        if (ret < 0) {
            goto exit;
        }
@@ -329,7 +330,7 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
    vhdx_header_le_export(hdr, header_le);
    vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
                         offsetof(VHDXHeader, checksum));
-    ret = bdrv_pwrite_sync(bs_file, offset, header_le, sizeof(VHDXHeader));
+    ret = bdrv_pwrite_sync(file, offset, header_le, sizeof(VHDXHeader));

 exit:
    qemu_vfree(buffer);
@@ -378,7 +379,7 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
        inactive_header->log_guid = *log_guid;
    }

-    ret = vhdx_write_header(bs->file->bs, inactive_header, header_offset, true);
+    ret = vhdx_write_header(bs->file, inactive_header, header_offset, true);
    if (ret < 0) {
        goto exit;
    }
@@ -430,7 +431,7 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
    /* We have to read the whole VHDX_HEADER_SIZE instead of
     * sizeof(VHDXHeader), because the checksum is over the whole
     * region */
-    ret = bdrv_pread(bs->file->bs, VHDX_HEADER1_OFFSET, buffer,
+    ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer,
                     VHDX_HEADER_SIZE);
    if (ret < 0) {
        goto fail;
@@ -447,7 +448,7 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
        }
    }

-    ret = bdrv_pread(bs->file->bs, VHDX_HEADER2_OFFSET, buffer,
+    ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer,
                     VHDX_HEADER_SIZE);
    if (ret < 0) {
        goto fail;
@@ -521,7 +522,7 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
     * whole block */
    buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);

-    ret = bdrv_pread(bs->file->bs, VHDX_REGION_TABLE_OFFSET, buffer,
+    ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
                     VHDX_HEADER_BLOCK_SIZE);
    if (ret < 0) {
        goto fail;
@@ -634,7 +635,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)

    buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);

-    ret = bdrv_pread(bs->file->bs, s->metadata_rt.file_offset, buffer,
+    ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
                     VHDX_METADATA_TABLE_MAX_SIZE);
    if (ret < 0) {
        goto exit;
@@ -737,7 +738,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
        goto exit;
    }

-    ret = bdrv_pread(bs->file->bs,
+    ret = bdrv_pread(bs->file,
                     s->metadata_entries.file_parameters_entry.offset
                                         + s->metadata_rt.file_offset,
                     &s->params,
@@ -772,7 +773,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
    /* determine virtual disk size, logical sector size,
     * and phys sector size */

-    ret = bdrv_pread(bs->file->bs,
+    ret = bdrv_pread(bs->file,
                     s->metadata_entries.virtual_disk_size_entry.offset
                                           + s->metadata_rt.file_offset,
                     &s->virtual_disk_size,
@@ -780,7 +781,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
    if (ret < 0) {
        goto exit;
    }
-    ret = bdrv_pread(bs->file->bs,
+    ret = bdrv_pread(bs->file,
                     s->metadata_entries.logical_sector_size_entry.offset
                                             + s->metadata_rt.file_offset,
                     &s->logical_sector_size,
@@ -788,7 +789,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
    if (ret < 0) {
        goto exit;
    }
-    ret = bdrv_pread(bs->file->bs,
+    ret = bdrv_pread(bs->file,
                     s->metadata_entries.phys_sector_size_entry.offset
                                          + s->metadata_rt.file_offset,
                     &s->physical_sector_size,
@@ -905,7 +906,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
    QLIST_INIT(&s->regions);

    /* validate the file signature */
-    ret = bdrv_pread(bs->file->bs, 0, &signature, sizeof(uint64_t));
+    ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
    if (ret < 0) {
        goto fail;
    }
@@ -964,7 +965,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    ret = bdrv_pread(bs->file->bs, s->bat_offset, s->bat, s->bat_rt.length);
+    ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
    if (ret < 0) {
        goto fail;
    }
@@ -1117,7 +1118,7 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
                break;
            case PAYLOAD_BLOCK_FULLY_PRESENT:
                qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_readv(bs->file->bs,
+                ret = bdrv_co_readv(bs->file,
                                    sinfo.file_offset >> BDRV_SECTOR_BITS,
                                    sinfo.sectors_avail, &hd_qiov);
                qemu_co_mutex_lock(&s->lock);
@@ -1326,7 +1327,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
                }
                /* block exists, so we can just overwrite it */
                qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_writev(bs->file->bs,
+                ret = bdrv_co_writev(bs->file,
                                    sinfo.file_offset >> BDRV_SECTOR_BITS,
                                    sectors_to_write, &hd_qiov);
                qemu_co_mutex_lock(&s->lock);
@@ -1387,9 +1388,11 @@ exit:
 * There are 2 headers, and the highest sequence number will represent
 * the active header
 */
-static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
+static int vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
                                   uint32_t log_size)
 {
+    BlockDriverState *bs = blk_bs(blk);
+    BdrvChild *child;
    int ret = 0;
    VHDXHeader *hdr = NULL;

@@ -1404,12 +1407,18 @@ static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
    vhdx_guid_generate(&hdr->file_write_guid);
    vhdx_guid_generate(&hdr->data_write_guid);

-    ret = vhdx_write_header(bs, hdr, VHDX_HEADER1_OFFSET, false);
+    /* XXX Ugly way to get blk->root, but that's a feature, not a bug. This
+     * hack makes it obvious that vhdx_write_header() bypasses the BlockBackend
+     * here, which it really shouldn't be doing. */
+    child = QLIST_FIRST(&bs->parents);
+    assert(!QLIST_NEXT(child, next_parent));
+
+    ret = vhdx_write_header(child, hdr, VHDX_HEADER1_OFFSET, false);
    if (ret < 0) {
        goto exit;
    }
    hdr->sequence_number++;
-    ret = vhdx_write_header(bs, hdr, VHDX_HEADER2_OFFSET, false);
+    ret = vhdx_write_header(child, hdr, VHDX_HEADER2_OFFSET, false);
    if (ret < 0) {
        goto exit;
    }
@@ -1442,7 +1451,7 @@ exit:
 * The first 64KB of the Metadata section is reserved for the metadata
 * header and entries; beyond that, the metadata items themselves reside.
 */
-static int vhdx_create_new_metadata(BlockDriverState *bs,
+static int vhdx_create_new_metadata(BlockBackend *blk,
                                    uint64_t image_size,
                                    uint32_t block_size,
                                    uint32_t sector_size,
@@ -1538,13 +1547,13 @@ static int vhdx_create_new_metadata(BlockDriverState *bs,
                                   VHDX_META_FLAGS_IS_VIRTUAL_DISK;
    vhdx_metadata_entry_le_export(&md_table_entry[4]);

-    ret = bdrv_pwrite(bs, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE);
+    ret = blk_pwrite(blk, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE, 0);
    if (ret < 0) {
        goto exit;
    }

-    ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer,
-                      VHDX_METADATA_ENTRY_BUFFER_SIZE);
+    ret = blk_pwrite(blk, metadata_offset + (64 * KiB), entry_buffer,
+                     VHDX_METADATA_ENTRY_BUFFER_SIZE, 0);
    if (ret < 0) {
        goto exit;
    }
@@ -1564,7 +1573,7 @@ exit:
 *  Fixed images: default state of the BAT is fully populated, with
 *                file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT.
 */
-static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
+static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
                           uint64_t image_size, VHDXImageType type,
                           bool use_zero_blocks, uint64_t file_offset,
                           uint32_t length)
@@ -1588,12 +1597,12 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
    if (type == VHDX_TYPE_DYNAMIC) {
        /* All zeroes, so we can just extend the file - the end of the BAT
         * is the furthest thing we have written yet */
-        ret = bdrv_truncate(bs, data_file_offset);
+        ret = blk_truncate(blk, data_file_offset);
        if (ret < 0) {
            goto exit;
        }
    } else if (type == VHDX_TYPE_FIXED) {
-        ret = bdrv_truncate(bs, data_file_offset + image_size);
+        ret = blk_truncate(blk, data_file_offset + image_size);
        if (ret < 0) {
            goto exit;
        }
@@ -1604,7 +1613,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,

    if (type == VHDX_TYPE_FIXED ||
                use_zero_blocks ||
-                bdrv_has_zero_init(bs) == 0) {
+                bdrv_has_zero_init(blk_bs(blk)) == 0) {
        /* for a fixed file, the default BAT entry is not zero */
        s->bat = g_try_malloc0(length);
        if (length && s->bat == NULL) {
@@ -1620,12 +1629,12 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
            sinfo.file_offset = data_file_offset +
                                (sector_num << s->logical_sector_size_bits);
            sinfo.file_offset = ROUND_UP(sinfo.file_offset, MiB);
-            vhdx_update_bat_table_entry(bs, s, &sinfo, &unused, &unused,
+            vhdx_update_bat_table_entry(blk_bs(blk), s, &sinfo, &unused, &unused,
                                        block_state);
            cpu_to_le64s(&s->bat[sinfo.bat_idx]);
            sector_num += s->sectors_per_block;
        }
-        ret = bdrv_pwrite(bs, file_offset, s->bat, length);
+        ret = blk_pwrite(blk, file_offset, s->bat, length, 0);
        if (ret < 0) {
            goto exit;
        }
@@ -1645,7 +1654,7 @@ exit:
 * to create the BAT itself, we will also cause the BAT to be
 * created.
 */
-static int vhdx_create_new_region_table(BlockDriverState *bs,
+static int vhdx_create_new_region_table(BlockBackend *blk,
                                        uint64_t image_size,
                                        uint32_t block_size,
                                        uint32_t sector_size,
@@ -1720,21 +1729,21 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,

    /* The region table gives us the data we need to create the BAT,
     * so do that now */
-    ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks,
+    ret = vhdx_create_bat(blk, s, image_size, type, use_zero_blocks,
                          bat_file_offset, bat_length);
    if (ret < 0) {
        goto exit;
    }

    /* Now write out the region headers to disk */
-    ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer,
-                      VHDX_HEADER_BLOCK_SIZE);
+    ret = blk_pwrite(blk, VHDX_REGION_TABLE_OFFSET, buffer,
+                     VHDX_HEADER_BLOCK_SIZE, 0);
    if (ret < 0) {
        goto exit;
    }

-    ret = bdrv_pwrite(bs, VHDX_REGION_TABLE2_OFFSET, buffer,
-                      VHDX_HEADER_BLOCK_SIZE);
+    ret = blk_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, buffer,
+                     VHDX_HEADER_BLOCK_SIZE, 0);
    if (ret < 0) {
        goto exit;
    }
@@ -1871,13 +1880,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)


    /* Creates (B),(C) */
-    ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size);
+    ret = vhdx_create_new_headers(blk, image_size, log_size);
    if (ret < 0) {
        goto delete_and_exit;
    }

    /* Creates (D),(E),(G) explicitly. (F) created as by-product */
-    ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512,
+    ret = vhdx_create_new_region_table(blk, image_size, block_size, 512,
                                       log_size, use_zero_blocks, image_type,
                                       &metadata_offset);
    if (ret < 0) {
@@ -1885,7 +1894,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    /* Creates (H) */
-    ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512,
+    ret = vhdx_create_new_metadata(blk, image_size, block_size, 512,
                                   metadata_offset, image_type);
    if (ret < 0) {
        goto delete_and_exit;
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -252,7 +252,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
    int ret;

    desc = g_malloc0(DESC_SIZE);
-    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
    if (ret < 0) {
        g_free(desc);
        return 0;
@@ -286,7 +286,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)

    desc = g_malloc0(DESC_SIZE);
    tmp_desc = g_malloc0(DESC_SIZE);
-    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
    if (ret < 0) {
        goto out;
    }
@@ -306,7 +306,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
        pstrcat(desc, DESC_SIZE, tmp_desc);
    }

-    ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);

 out:
    g_free(desc);
@@ -350,7 +350,7 @@ static int vmdk_parent_open(BlockDriverState *bs)
    int ret;

    desc = g_malloc0(DESC_SIZE + 1);
-    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
    if (ret < 0) {
        goto out;
    }
@@ -454,7 +454,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
        return -ENOMEM;
    }

-    ret = bdrv_pread(extent->file->bs,
+    ret = bdrv_pread(extent->file,
                     extent->l1_table_offset,
                     extent->l1_table,
                     l1_size);
@@ -474,7 +474,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
            ret = -ENOMEM;
            goto fail_l1;
        }
-        ret = bdrv_pread(extent->file->bs,
+        ret = bdrv_pread(extent->file,
                         extent->l1_backup_table_offset,
                         extent->l1_backup_table,
                         l1_size);
@@ -508,7 +508,7 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
    VMDK3Header header;
    VmdkExtent *extent;

-    ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
+    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
    if (ret < 0) {
        error_setg_errno(errp, -ret,
                         "Could not read header from file '%s'",
@@ -538,14 +538,13 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
 static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
                               QDict *options, Error **errp);

-static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
-                            Error **errp)
+static char *vmdk_read_desc(BdrvChild *file, uint64_t desc_offset, Error **errp)
 {
    int64_t size;
    char *buf;
    int ret;

-    size = bdrv_getlength(file);
+    size = bdrv_getlength(file->bs);
    if (size < 0) {
        error_setg_errno(errp, -size, "Could not access file");
        return NULL;
@@ -586,7 +585,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    int64_t l1_backup_offset = 0;
    bool compressed;

-    ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
+    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
    if (ret < 0) {
        error_setg_errno(errp, -ret,
                         "Could not read header from file '%s'",
@@ -596,7 +595,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    if (header.capacity == 0) {
        uint64_t desc_offset = le64_to_cpu(header.desc_offset);
        if (desc_offset) {
-            char *buf = vmdk_read_desc(file->bs, desc_offset << 9, errp);
+            char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
            if (!buf) {
                return -EINVAL;
            }
@@ -636,7 +635,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
            } QEMU_PACKED eos_marker;
        } QEMU_PACKED footer;

-        ret = bdrv_pread(file->bs,
+        ret = bdrv_pread(file,
            bs->file->bs->total_sectors * 512 - 1536,
            &footer, sizeof(footer));
        if (ret < 0) {
@@ -874,7 +873,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
            extent->flat_start_offset = flat_offset << 9;
        } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
            /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
-            char *buf = vmdk_read_desc(extent_file->bs, 0, errp);
+            char *buf = vmdk_read_desc(extent_file, 0, errp);
            if (!buf) {
                ret = -EINVAL;
            } else {
@@ -943,7 +942,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVVmdkState *s = bs->opaque;
    uint32_t magic;

-    buf = vmdk_read_desc(bs->file->bs, 0, errp);
+    buf = vmdk_read_desc(bs->file, 0, errp);
    if (!buf) {
        return -EINVAL;
    }
@@ -1046,14 +1045,14 @@ static int get_whole_cluster(BlockDriverState *bs,
    /* Read backing data before skip range */
    if (skip_start_bytes > 0) {
        if (bs->backing) {
-            ret = bdrv_pread(bs->backing->bs, offset, whole_grain,
+            ret = bdrv_pread(bs->backing, offset, whole_grain,
                             skip_start_bytes);
            if (ret < 0) {
                ret = VMDK_ERROR;
                goto exit;
            }
        }
-        ret = bdrv_pwrite(extent->file->bs, cluster_offset, whole_grain,
+        ret = bdrv_pwrite(extent->file, cluster_offset, whole_grain,
                          skip_start_bytes);
        if (ret < 0) {
            ret = VMDK_ERROR;
@@ -1063,7 +1062,7 @@ static int get_whole_cluster(BlockDriverState *bs,
    /* Read backing data after skip range */
    if (skip_end_bytes < cluster_bytes) {
        if (bs->backing) {
-            ret = bdrv_pread(bs->backing->bs, offset + skip_end_bytes,
+            ret = bdrv_pread(bs->backing, offset + skip_end_bytes,
                             whole_grain + skip_end_bytes,
                             cluster_bytes - skip_end_bytes);
            if (ret < 0) {
@@ -1071,7 +1070,7 @@ static int get_whole_cluster(BlockDriverState *bs,
                goto exit;
            }
        }
-        ret = bdrv_pwrite(extent->file->bs, cluster_offset + skip_end_bytes,
+        ret = bdrv_pwrite(extent->file, cluster_offset + skip_end_bytes,
                          whole_grain + skip_end_bytes,
                          cluster_bytes - skip_end_bytes);
        if (ret < 0) {
@@ -1091,8 +1090,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
 {
    offset = cpu_to_le32(offset);
    /* update L2 table */
-    if (bdrv_pwrite_sync(
-                extent->file->bs,
+    if (bdrv_pwrite_sync(extent->file,
                ((int64_t)m_data->l2_offset * 512)
                    + (m_data->l2_index * sizeof(offset)),
                &offset, sizeof(offset)) < 0) {
@@ -1101,8 +1099,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
    /* update backup L2 table */
    if (extent->l1_backup_table_offset != 0) {
        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
-        if (bdrv_pwrite_sync(
-                    extent->file->bs,
+        if (bdrv_pwrite_sync(extent->file,
                    ((int64_t)m_data->l2_offset * 512)
                        + (m_data->l2_index * sizeof(offset)),
                    &offset, sizeof(offset)) < 0) {
@@ -1191,8 +1188,7 @@ static int get_cluster_offset(BlockDriverState *bs,
        }
    }
    l2_table = extent->l2_cache + (min_index * extent->l2_size);
-    if (bdrv_pread(
-                extent->file->bs,
+    if (bdrv_pread(extent->file,
                (int64_t)l2_offset * 512,
                l2_table,
                extent->l2_size * sizeof(uint32_t)
@@ -1206,13 +1202,6 @@ static int get_cluster_offset(BlockDriverState *bs,
    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
    cluster_sector = le32_to_cpu(l2_table[l2_index]);

-    if (m_data) {
-        m_data->valid = 1;
-        m_data->l1_index = l1_index;
-        m_data->l2_index = l2_index;
-        m_data->l2_offset = l2_offset;
-        m_data->l2_cache_entry = &l2_table[l2_index];
-    }
    if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
        zeroed = true;
    }
@@ -1235,6 +1224,13 @@ static int get_cluster_offset(BlockDriverState *bs,
        if (ret) {
            return ret;
        }
+        if (m_data) {
+            m_data->valid = 1;
+            m_data->l1_index = l1_index;
+            m_data->l2_index = l2_index;
+            m_data->l2_offset = l2_offset;
+            m_data->l2_cache_entry = &l2_table[l2_index];
+        }
    }
    *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
    return VMDK_OK;
@@ -1373,7 +1369,7 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
    }

    write_offset = cluster_offset + offset_in_cluster,
-    ret = bdrv_co_pwritev(extent->file->bs, write_offset, n_bytes,
+    ret = bdrv_co_pwritev(extent->file, write_offset, n_bytes,
                          &local_qiov, 0);

    write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE);
@@ -1411,7 +1407,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,


    if (!extent->compressed) {
-        ret = bdrv_co_preadv(extent->file->bs,
+        ret = bdrv_co_preadv(extent->file,
                             cluster_offset + offset_in_cluster, bytes,
                             qiov, 0);
        if (ret < 0) {
@@ -1424,7 +1420,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
    buf_bytes = cluster_bytes * 2;
    cluster_buf = g_malloc(buf_bytes);
    uncomp_buf = g_malloc(cluster_bytes);
-    ret = bdrv_pread(extent->file->bs,
+    ret = bdrv_pread(extent->file,
                cluster_offset,
                cluster_buf, buf_bytes);
    if (ret < 0) {
@@ -1501,7 +1497,7 @@ vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                qemu_iovec_reset(&local_qiov);
                qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);

-                ret = bdrv_co_preadv(bs->backing->bs, offset, n_bytes,
+                ret = bdrv_co_preadv(bs->backing, offset, n_bytes,
                                     &local_qiov, 0);
                if (ret < 0) {
                    goto fail;
@@ -1649,56 +1645,11 @@ vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    return ret;
 }

-typedef struct VmdkWriteCompressedCo {
-    BlockDriverState *bs;
-    int64_t sector_num;
-    const uint8_t *buf;
-    int nb_sectors;
-    int ret;
-} VmdkWriteCompressedCo;
-
-static void vmdk_co_write_compressed(void *opaque)
+static int coroutine_fn
+vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+                           uint64_t bytes, QEMUIOVector *qiov)
 {
-    VmdkWriteCompressedCo *co = opaque;
-    QEMUIOVector local_qiov;
-    uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
-    uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
-
-    struct iovec iov = (struct iovec) {
-        .iov_base   = (uint8_t*) co->buf,
-        .iov_len    = bytes,
-    };
-    qemu_iovec_init_external(&local_qiov, &iov, 1);
-
-    co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false);
-}
-
-static int vmdk_write_compressed(BlockDriverState *bs,
-                                 int64_t sector_num,
-                                 const uint8_t *buf,
-                                 int nb_sectors)
-{
-    BDRVVmdkState *s = bs->opaque;
-
-    if (s->num_extents == 1 && s->extents[0].compressed) {
-        Coroutine *co;
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-        VmdkWriteCompressedCo data = {
-            .bs         = bs,
-            .sector_num = sector_num,
-            .buf        = buf,
-            .nb_sectors = nb_sectors,
-            .ret        = -EINPROGRESS,
-        };
-        co = qemu_coroutine_create(vmdk_co_write_compressed);
-        qemu_coroutine_enter(co, &data);
-        while (data.ret == -EINPROGRESS) {
-            aio_poll(aio_context, true);
-        }
-        return data.ret;
-    } else {
-        return -ENOTSUP;
-    }
+    return vmdk_co_pwritev(bs, offset, bytes, qiov, 0);
 }

 static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
@@ -2397,7 +2348,7 @@ static BlockDriver bdrv_vmdk = {
    .bdrv_reopen_prepare          = vmdk_reopen_prepare,
    .bdrv_co_preadv               = vmdk_co_preadv,
    .bdrv_co_pwritev              = vmdk_co_pwritev,
-    .bdrv_write_compressed        = vmdk_write_compressed,
+    .bdrv_co_pwritev_compressed   = vmdk_co_pwritev_compressed,
    .bdrv_co_pwrite_zeroes        = vmdk_co_pwrite_zeroes,
    .bdrv_close                   = vmdk_close,
    .bdrv_create                  = vmdk_create,
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -237,7 +237,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
+    ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
    if (ret < 0) {
        error_setg(errp, "Unable to read VHD header");
        goto fail;
@@ -257,7 +257,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
        }

        /* If a fixed disk, the footer is found only at the end of the file */
-        ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
+        ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
                         HEADER_SIZE);
        if (ret < 0) {
            goto fail;
@@ -328,7 +328,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (disk_type == VHD_DYNAMIC) {
-        ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
+        ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
                         HEADER_SIZE);
        if (ret < 0) {
            error_setg(errp, "Error reading dynamic VHD header");
@@ -385,7 +385,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,

        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);

-        ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
+        ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
                         pagetable_size);
        if (ret < 0) {
            error_setg(errp, "Error reading pagetable");
@@ -481,7 +481,7 @@ static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,

        s->last_bitmap_offset = bitmap_offset;
        memset(bitmap, 0xff, s->bitmap_size);
-        bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
+        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
    }

    return block_offset;
@@ -505,7 +505,7 @@ static int rewrite_footer(BlockDriverState* bs)
    BDRVVPCState *s = bs->opaque;
    int64_t offset = s->free_data_block_offset;

-    ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
+    ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
    if (ret < 0)
        return ret;

@@ -539,7 +539,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t offset)

    /* Initialize the block's bitmap */
    memset(bitmap, 0xff, s->bitmap_size);
-    ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
+    ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
        s->bitmap_size);
    if (ret < 0) {
        return ret;
@@ -554,7 +554,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
    /* Write BAT entry to disk */
    bat_offset = s->bat_offset + (4 * index);
    bat_value = cpu_to_be32(s->pagetable[index]);
-    ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
+    ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
    if (ret < 0)
        goto fail;

@@ -591,7 +591,7 @@ vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    QEMUIOVector local_qiov;

    if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        return bdrv_co_preadv(bs->file->bs, offset, bytes, qiov, 0);
+        return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
    }

    qemu_co_mutex_lock(&s->lock);
@@ -607,7 +607,7 @@ vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
            qemu_iovec_reset(&local_qiov);
            qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);

-            ret = bdrv_co_preadv(bs->file->bs, image_offset, n_bytes,
+            ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
                                 &local_qiov, 0);
            if (ret < 0) {
                goto fail;
@@ -640,7 +640,7 @@ vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    QEMUIOVector local_qiov;

    if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        return bdrv_co_pwritev(bs->file->bs, offset, bytes, qiov, 0);
+        return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
    }

    qemu_co_mutex_lock(&s->lock);
@@ -661,7 +661,7 @@ vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
        qemu_iovec_reset(&local_qiov);
        qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);

-        ret = bdrv_co_pwritev(bs->file->bs, image_offset, n_bytes,
+        ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
                              &local_qiov, 0);
        if (ret < 0) {
            goto fail;
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -341,9 +341,8 @@ typedef struct BDRVVVFATState {
    unsigned int current_cluster;

    /* write support */
-    BlockDriverState* write_target;
    char* qcow_filename;
-    BlockDriverState* qcow;
+    BdrvChild* qcow;
    void* fat2;
    char* used_clusters;
    array_t commits;
@@ -981,7 +980,7 @@ static int init_directories(BDRVVVFATState* s,
 static BDRVVVFATState *vvv = NULL;
 #endif

-static int enable_write_target(BDRVVVFATState *s, Error **errp);
+static int enable_write_target(BlockDriverState *bs, Error **errp);
 static int is_consistent(BDRVVVFATState *s);

 static QemuOptsList runtime_opts = {
@@ -1158,8 +1157,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
    s->current_cluster=0xffffffff;

    /* read only is the default for safety */
-    bs->read_only = 1;
-    s->qcow = s->write_target = NULL;
+    bs->read_only = true;
+    s->qcow = NULL;
    s->qcow_filename = NULL;
    s->fat2 = NULL;
    s->downcase_short_names = 1;
@@ -1170,14 +1169,13 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
    s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);

    if (qemu_opt_get_bool(opts, "rw", false)) {
-        ret = enable_write_target(s, errp);
+        ret = enable_write_target(bs, errp);
        if (ret < 0) {
            goto fail;
        }
-        bs->read_only = 0;
+        bs->read_only = false;
    }

-    bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
    bs->total_sectors = cyls * heads * secs;

    if (init_directories(s, dirname, heads, secs, errp)) {
@@ -1209,6 +1207,11 @@ fail:
    return ret;
 }

+static void vvfat_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+}
+
 static inline void vvfat_close_current_file(BDRVVVFATState *s)
 {
    if(s->current_mapping) {
@@ -1387,9 +1390,10 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
 	   return -1;
 	if (s->qcow) {
 	    int n;
-            if (bdrv_is_allocated(s->qcow, sector_num, nb_sectors-i, &n)) {
-DLOG(fprintf(stderr, "sectors %d+%d allocated\n", (int)sector_num, n));
-                if (bdrv_read(s->qcow, sector_num, buf + i*0x200, n)) {
+            if (bdrv_is_allocated(s->qcow->bs, sector_num, nb_sectors-i, &n)) {
+                DLOG(fprintf(stderr, "sectors %d+%d allocated\n",
+                             (int)sector_num, n));
+                if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, n)) {
                    return -1;
                }
                i += n - 1;
@@ -1665,12 +1669,15 @@ static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num)
    int was_modified = 0;
    int i, dummy;

-    if (s->qcow == NULL)
-	return 0;
+    if (s->qcow == NULL) {
+        return 0;
+    }

-    for (i = 0; !was_modified && i < s->sectors_per_cluster; i++)
-	was_modified = bdrv_is_allocated(s->qcow,
-		cluster2sector(s, cluster_num) + i, 1, &dummy);
+    for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) {
+        was_modified = bdrv_is_allocated(s->qcow->bs,
+                                         cluster2sector(s, cluster_num) + i,
+                                         1, &dummy);
+    }

    return was_modified;
 }
@@ -1819,11 +1826,16 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,

 		vvfat_close_current_file(s);
                for (i = 0; i < s->sectors_per_cluster; i++) {
-                    if (!bdrv_is_allocated(s->qcow, offset + i, 1, &dummy)) {
-                        if (vvfat_read(s->bs, offset, s->cluster_buffer, 1)) {
+                    int res;
+
+                    res = bdrv_is_allocated(s->qcow->bs, offset + i, 1, &dummy);
+                    if (!res) {
+                        res = vvfat_read(s->bs, offset, s->cluster_buffer, 1);
+                        if (res) {
                            return -1;
                        }
-                        if (bdrv_write(s->qcow, offset, s->cluster_buffer, 1)) {
+                        res = bdrv_write(s->qcow, offset, s->cluster_buffer, 1);
+                        if (res) {
                            return -2;
                        }
                    }
@@ -2779,8 +2791,8 @@ static int do_commit(BDRVVVFATState* s)
 	return ret;
    }

-    if (s->qcow->drv->bdrv_make_empty) {
-        s->qcow->drv->bdrv_make_empty(s->qcow);
+    if (s->qcow->bs->drv->bdrv_make_empty) {
+        s->qcow->bs->drv->bdrv_make_empty(s->qcow->bs);
    }

    memset(s->used_clusters, 0, sector2cluster(s, s->sector_count));
@@ -2946,7 +2958,7 @@ write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,

 static void write_target_close(BlockDriverState *bs) {
    BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    bdrv_unref(s->qcow);
+    bdrv_unref_child(s->bs, s->qcow);
    g_free(s->qcow_filename);
 }

@@ -2956,8 +2968,19 @@ static BlockDriver vvfat_write_target = {
    .bdrv_close         = write_target_close,
 };

-static int enable_write_target(BDRVVVFATState *s, Error **errp)
+static void vvfat_qcow_options(int *child_flags, QDict *child_options,
+                               int parent_flags, QDict *parent_options)
 {
+    *child_flags = BDRV_O_RDWR | BDRV_O_NO_FLUSH;
+}
+
+static const BdrvChildRole child_vvfat_qcow = {
+    .inherit_options    = vvfat_qcow_options,
+};
+
+static int enable_write_target(BlockDriverState *bs, Error **errp)
+{
+    BDRVVVFATState *s = bs->opaque;
    BlockDriver *bdrv_qcow = NULL;
    BlockDriverState *backing;
    QemuOpts *opts = NULL;
@@ -2995,9 +3018,10 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
    }

    options = qdict_new();
-    qdict_put(options, "driver", qstring_from_str("qcow"));
-    s->qcow = bdrv_open(s->qcow_filename, NULL, options,
-                        BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp);
+    qdict_put(options, "write-target.driver", qstring_from_str("qcow"));
+    s->qcow = bdrv_open_child(s->qcow_filename, options, "write-target", bs,
+                              &child_vvfat_qcow, false, errp);
+    QDECREF(options);
    if (!s->qcow) {
        ret = -EINVAL;
        goto err;
@@ -3046,6 +3070,7 @@ static BlockDriver bdrv_vvfat = {

    .bdrv_parse_filename    = vvfat_parse_filename,
    .bdrv_file_open         = vvfat_open,
+    .bdrv_refresh_limits    = vvfat_refresh_limits,
    .bdrv_close             = vvfat_close,

    .bdrv_co_preadv         = vvfat_co_preadv,
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -27,7 +27,7 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "block/aio.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
 #include "qemu/event_notifier.h"
 #include "qemu/iov.h"
 #include <windows.h>
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -145,7 +145,8 @@ void qmp_nbd_server_start(SocketAddress *addr,
 void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
                        Error **errp)
 {
-    BlockBackend *blk;
+    BlockDriverState *bs = NULL;
+    BlockBackend *on_eject_blk;
    NBDExport *exp;

    if (!nbd_server) {
@@ -158,26 +159,22 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
        return;
    }

-    blk = blk_by_name(device);
-    if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
-        return;
-    }
-    if (!blk_is_inserted(blk)) {
-        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+    on_eject_blk = blk_by_name(device);
+
+    bs = bdrv_lookup_bs(device, device, errp);
+    if (!bs) {
        return;
    }

    if (!has_writable) {
        writable = false;
    }
-    if (blk_is_read_only(blk)) {
+    if (bdrv_is_read_only(bs)) {
        writable = false;
    }

-    exp = nbd_export_new(blk, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL,
-                         errp);
+    exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY,
+                         NULL, false, on_eject_blk, errp);
    if (!exp) {
        return;
    }
--- a/blockdev.c
+++ b/blockdev.c
--- a/blockjob.c
+++ b/blockjob.c
@@ -33,6 +33,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
 #include "qemu/coroutine.h"
+#include "qemu/id.h"
 #include "qmp-commands.h"
 #include "qemu/timer.h"
 #include "qapi-event.h"
@@ -60,6 +61,19 @@ BlockJob *block_job_next(BlockJob *job)
    return QLIST_NEXT(job, job_list);
 }

+BlockJob *block_job_get(const char *id)
+{
+    BlockJob *job;
+
+    QLIST_FOREACH(job, &block_jobs, job_list) {
+        if (!strcmp(id, job->id)) {
+            return job;
+        }
+    }
+
+    return NULL;
+}
+
 /* Normally the job runs in its BlockBackend's AioContext.  The exception is
 * block_job_defer_to_main_loop() where it runs in the QEMU main loop.  Code
 * that supports both cases uses this helper function.
@@ -103,9 +117,9 @@ static void block_job_detach_aio_context(void *opaque)
    block_job_unref(job);
 }

-void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
-                       int64_t speed, BlockCompletionFunc *cb,
-                       void *opaque, Error **errp)
+void *block_job_create(const char *job_id, const BlockJobDriver *driver,
+                       BlockDriverState *bs, int64_t speed,
+                       BlockCompletionFunc *cb, void *opaque, Error **errp)
 {
    BlockBackend *blk;
    BlockJob *job;
@@ -116,6 +130,24 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
        return NULL;
    }

+    if (job_id == NULL) {
+        job_id = bdrv_get_device_name(bs);
+        if (!*job_id) {
+            error_setg(errp, "An explicit job ID is required for this node");
+            return NULL;
+        }
+    }
+
+    if (!id_wellformed(job_id)) {
+        error_setg(errp, "Invalid job ID '%s'", job_id);
+        return NULL;
+    }
+
+    if (block_job_get(job_id)) {
+        error_setg(errp, "Job ID '%s' already in use", job_id);
+        return NULL;
+    }
+
    blk = blk_new();
    blk_insert_bs(blk, bs);

@@ -126,7 +158,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);

    job->driver        = driver;
-    job->id            = g_strdup(bdrv_get_device_name(bs));
+    job->id            = g_strdup(job_id);
    job->blk           = blk;
    job->cb            = cb;
    job->opaque        = opaque;
@@ -290,7 +322,8 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 void block_job_complete(BlockJob *job, Error **errp)
 {
    if (job->pause_count || job->cancelled || !job->driver->complete) {
-        error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
+        error_setg(errp, "The active block job '%s' cannot be completed",
+                   job->id);
        return;
    }

@@ -346,7 +379,7 @@ void block_job_resume(BlockJob *job)
 void block_job_enter(BlockJob *job)
 {
    if (job->co && !job->busy) {
-        qemu_coroutine_enter(job->co, NULL);
+        qemu_coroutine_enter(job->co);
    }
 }

@@ -524,6 +557,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,

    switch (on_err) {
    case BLOCKDEV_ON_ERROR_ENOSPC:
+    case BLOCKDEV_ON_ERROR_AUTO:
        action = (error == ENOSPC) ?
                 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
        break;
--- a/bsd-user/i386/target_syscall.h
+++ b/bsd-user/i386/target_syscall.h
@@ -162,4 +162,4 @@ struct target_vm86plus_struct {

 #define UNAME_MACHINE "i386"

-#endif  /* TARGET_SYSCALL_H */
+#endif /* TARGET_SYSCALL_H */
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -17,10 +17,12 @@
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "qemu/osdep.h"
+#include "qemu-version.h"
 #include <machine/trap.h>

 #include "qapi/error.h"
 #include "qemu.h"
+#include "qemu/config-file.h"
 #include "qemu/path.h"
 #include "qemu/help_option.h"
 /* For tb_lock */
@@ -30,6 +32,8 @@
 #include "qemu/timer.h"
 #include "qemu/envlist.h"
 #include "exec/log.h"
+#include "trace/control.h"
+#include "glib-compat.h"

 int singlestep;
 unsigned long mmap_min_addr;
@@ -168,7 +172,7 @@ void cpu_loop(CPUX86State *env)
    //target_siginfo_t info;

    for(;;) {
-        trapnr = cpu_x86_exec(cs);
+        trapnr = cpu_exec(cs);
        switch(trapnr) {
        case 0x80:
            /* syscall from int $0x80 */
@@ -509,7 +513,7 @@ void cpu_loop(CPUSPARCState *env)
    //target_siginfo_t info;

    while (1) {
-        trapnr = cpu_sparc_exec(cs);
+        trapnr = cpu_exec(cs);

        switch (trapnr) {
 #ifndef TARGET_SPARC64
@@ -664,7 +668,8 @@ void cpu_loop(CPUSPARCState *env)

 static void usage(void)
 {
-    printf("qemu-" TARGET_NAME " version " QEMU_VERSION ", Copyright (c) 2003-2008 Fabrice Bellard\n"
+    printf("qemu-" TARGET_NAME " version " QEMU_VERSION QEMU_PKGVERSION
+           ", " QEMU_COPYRIGHT "\n"
           "usage: qemu-" TARGET_NAME " [options] program [arguments...]\n"
           "BSD CPU emulator (compiled for %s emulation)\n"
           "\n"
@@ -687,6 +692,8 @@ static void usage(void)
           "-p pagesize       set the host page size to 'pagesize'\n"
           "-singlestep       always run in singlestep mode\n"
           "-strace           log system calls\n"
+           "-trace            [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
+           "                  specify tracing options\n"
           "\n"
           "Environment variables:\n"
           "QEMU_STRACE       Print system calls and arguments similar to the\n"
@@ -735,6 +742,7 @@ int main(int argc, char **argv)
    int gdbstub_port = 0;
    char **target_environ, **wrk;
    envlist_t *envlist = NULL;
+    char *trace_file = NULL;
    bsd_type = target_openbsd;

    if (argc <= 1)
@@ -754,8 +762,10 @@ int main(int argc, char **argv)

    cpu_model = NULL;

+    qemu_add_opts(&qemu_trace_opts);
+
    optind = 1;
-    for(;;) {
+    for (;;) {
        if (optind >= argc)
            break;
        r = argv[optind];
@@ -840,8 +850,10 @@ int main(int argc, char **argv)
            singlestep = 1;
        } else if (!strcmp(r, "strace")) {
            do_strace = 1;
-        } else
-        {
+        } else if (!strcmp(r, "trace")) {
+            g_free(trace_file);
+            trace_file = trace_opt_parse(optarg);
+        } else {
            usage();
        }
    }
@@ -865,6 +877,11 @@ int main(int argc, char **argv)
    }
    filename = argv[optind];

+    if (!trace_init_backends()) {
+        exit(1);
+    }
+    trace_init_file(trace_file);
+
    /* Zero out regs */
    memset(regs, 0, sizeof(struct target_pt_regs));

@@ -1116,6 +1133,7 @@ int main(int argc, char **argv)
        gdbserver_start (gdbstub_port);
        gdb_handlesig(cpu, 0);
    }
+    trace_init_vcpu_events();
    cpu_loop(env);
    /* never exits */
    return 0;
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -209,8 +209,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
                       abi_ulong new_addr);
 int target_msync(abi_ulong start, abi_ulong len, int flags);
 extern unsigned long last_brk;
-void cpu_list_lock(void);
-void cpu_list_unlock(void);
 #if defined(CONFIG_USE_NPTL)
 void mmap_fork_start(void);
 void mmap_fork_end(int child);
@@ -358,7 +356,7 @@ static inline void *lock_user(int type, abi_ulong guest_addr, long len, int copy
 #ifdef DEBUG_REMAP
    {
        void *addr;
-        addr = malloc(len);
+        addr = g_malloc(len);
        if (copy)
            memcpy(addr, g2h(guest_addr), len);
        else
@@ -384,7 +382,7 @@ static inline void unlock_user(void *host_ptr, abi_ulong guest_addr,
        return;
    if (len > 0)
        memcpy(g2h(guest_addr), host_ptr, len);
-    free(host_ptr);
+    g_free(host_ptr);
 #endif
 }

--- a/bsd-user/sparc/target_syscall.h
+++ b/bsd-user/sparc/target_syscall.h
@@ -11,4 +11,4 @@ struct target_pt_regs {

 #define UNAME_MACHINE "sun4"

-#endif  /* TARGET_SYSCALL_H */
+#endif /* TARGET_SYSCALL_H */
--- a/bsd-user/sparc64/target_syscall.h
+++ b/bsd-user/sparc64/target_syscall.h
@@ -12,4 +12,4 @@ struct target_pt_regs {

 #define UNAME_MACHINE "sun4u"

-#endif  /* TARGET_SYSCALL_H */
+#endif /* TARGET_SYSCALL_H */
--- a/bsd-user/x86_64/target_syscall.h
+++ b/bsd-user/x86_64/target_syscall.h
@@ -118,4 +118,4 @@ struct target_msqid64_ds {
 #define TARGET_ARCH_GET_FS 0x1003
 #define TARGET_ARCH_GET_GS 0x1004

-#endif  /* TARGET_SYSCALL_H */
+#endif /* TARGET_SYSCALL_H */
--- a/96
+++ b/96
@@ -229,6 +229,7 @@ xfs=""

 vhost_net="no"
 vhost_scsi="no"
+vhost_vsock="no"
 kvm="no"
 rdma=""
 gprof="no"
@@ -305,8 +306,8 @@ archipelago="no"
 gtk=""
 gtkabi=""
 gtk_gl="no"
+tls_priority="NORMAL"
 gnutls=""
-gnutls_hash=""
 gnutls_rnd=""
 nettle=""
 nettle_kdf="no"
@@ -369,6 +370,7 @@ fi

 ar="${AR-${cross_prefix}ar}"
 as="${AS-${cross_prefix}as}"
+ccas="${CCAS-$cc}"
 cpp="${CPP-$cc -E}"
 objcopy="${OBJCOPY-${cross_prefix}objcopy}"
 ld="${LD-${cross_prefix}ld}"
@@ -673,6 +675,7 @@ Haiku)
  kvm="yes"
  vhost_net="yes"
  vhost_scsi="yes"
+  vhost_vsock="yes"
  QEMU_INCLUDES="-I\$(SRC_PATH)/linux-headers -I$(pwd)/linux-headers $QEMU_INCLUDES"
 ;;
 esac
@@ -1016,6 +1019,10 @@ for opt do
  ;;
  --enable-vhost-scsi) vhost_scsi="yes"
  ;;
+  --disable-vhost-vsock) vhost_vsock="no"
+  ;;
+  --enable-vhost-vsock) vhost_vsock="yes"
+  ;;
  --disable-opengl) opengl="no"
  ;;
  --enable-opengl) opengl="yes"
@@ -1097,6 +1104,8 @@ for opt do
  ;;
  --enable-gtk) gtk="yes"
  ;;
+  --tls-priority=*) tls_priority="$optarg"
+  ;;
  --disable-gnutls) gnutls="no"
  ;;
  --enable-gnutls) gnutls="yes"
@@ -1308,6 +1317,7 @@ Advanced options (experts only):
  --disable-blobs          disable installing provided firmware blobs
  --with-vss-sdk=SDK-path  enable Windows VSS support in QEMU Guest Agent
  --with-win-sdk=SDK-path  path to Windows Platform SDK (to build VSS .tlb)
+  --tls-priority           default TLS protocol/cipher priority string

 Optional features, enabled with --enable-FEATURE and
 disabled with --disable-FEATURE, default is enabled if available:
@@ -1448,7 +1458,7 @@ fi
 gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits"
 gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags"
 gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
-gcc_flags="-Wendif-labels $gcc_flags"
+gcc_flags="-Wendif-labels -Wno-shift-negative-value $gcc_flags"
 gcc_flags="-Wno-initializer-overrides $gcc_flags"
 gcc_flags="-Wno-string-plus-int $gcc_flags"
 # Note that we do not add -Werror to gcc_flags here, because that would
@@ -1784,7 +1794,9 @@ fi
 ##########################################
 # avx2 optimization requirement check

-cat > $TMPC << EOF
+
+if test "$static" = "no" ; then
+  cat > $TMPC << EOF
 #pragma GCC push_options
 #pragma GCC target("avx2")
 #include <cpuid.h>
@@ -1797,12 +1809,13 @@ static void *bar_ifunc(void) {return (void*) bar;}
 int foo(void *a) __attribute__((ifunc("bar_ifunc")));
 int main(int argc, char *argv[]) { return foo(argv[0]);}
 EOF
-if compile_object "" ; then
-    if has readelf; then
-        if readelf --syms $TMPO 2>/dev/null |grep -q "IFUNC.*foo"; then
-            avx2_opt="yes"
-        fi
-    fi
+  if compile_object "" ; then
+      if has readelf; then
+          if readelf --syms $TMPO 2>/dev/null |grep -q "IFUNC.*foo"; then
+              avx2_opt="yes"
+          fi
+      fi
+  fi
 fi

 #########################################
@@ -2218,13 +2231,6 @@ if test "$gnutls" != "no"; then
 	QEMU_CFLAGS="$QEMU_CFLAGS $gnutls_cflags"
        gnutls="yes"

-	# gnutls_hash_init requires >= 2.9.10
-	if $pkg_config --exists "gnutls >= 2.9.10"; then
-            gnutls_hash="yes"
-	else
-	    gnutls_hash="no"
-	fi
-
 	# gnutls_rnd requires >= 2.11.0
 	if $pkg_config --exists "gnutls >= 2.11.0"; then
 	    gnutls_rnd="yes"
@@ -2258,11 +2264,9 @@ if test "$gnutls" != "no"; then
 	feature_not_found "gnutls" "Install gnutls devel"
    else
        gnutls="no"
-        gnutls_hash="no"
        gnutls_rnd="no"
    fi
 else
-    gnutls_hash="no"
    gnutls_rnd="no"
 fi

@@ -3126,6 +3130,7 @@ else
      if test "$found" = "no"; then
        LIBS="$pthread_lib $LIBS"
      fi
+      PTHREAD_LIB="$pthread_lib"
      break
    fi
  done
@@ -4055,13 +4060,13 @@ fi

 if test "$mingw32" = "yes" -a "$guest_agent" != "no" -a "$vss_win32_sdk" != "no" ; then
  case "$vss_win32_sdk" in
-    "")   vss_win32_include="-I$source_path" ;;
+    "")   vss_win32_include="-isystem $source_path" ;;
    *\ *) # The SDK is installed in "Program Files" by default, but we cannot
          # handle path with spaces. So we symlink the headers into ".sdk/vss".
-          vss_win32_include="-I$source_path/.sdk/vss"
+          vss_win32_include="-isystem $source_path/.sdk/vss"
 	  symlink "$vss_win32_sdk/inc" "$source_path/.sdk/vss/inc"
 	  ;;
-    *)    vss_win32_include="-I$vss_win32_sdk"
+    *)    vss_win32_include="-isystem $vss_win32_sdk"
  esac
  cat > $TMPC << EOF
 #define __MIDL_user_allocate_free_DEFINED__
@@ -4192,6 +4197,18 @@ if compile_prog "" "" ; then
    posix_madvise=yes
 fi

+##########################################
+# check if we have posix_syslog
+
+posix_syslog=no
+cat > $TMPC << EOF
+#include <syslog.h>
+int main(void) { openlog("qemu", LOG_PID, LOG_DAEMON); syslog(LOG_INFO, "configure"); return 0; }
+EOF
+if compile_prog "" "" ; then
+    posix_syslog=yes
+fi
+
 ##########################################
 # check if trace backend exists

@@ -4700,7 +4717,16 @@ roms=
 if test \( "$cpu" = "i386" -o "$cpu" = "x86_64" \) -a \
        "$targetos" != "Darwin" -a "$targetos" != "SunOS" -a \
        "$softmmu" = yes ; then
-  roms="optionrom"
+    # Different host OS linkers have different ideas about the name of the ELF
+    # emulation. Linux and OpenBSD use 'elf_i386'; FreeBSD uses the _fbsd
+    # variant; and Windows uses i386pe.
+    for emu in elf_i386 elf_i386_fbsd i386pe; do
+        if "$ld" -verbose 2>&1 | grep -q "^[[:space:]]*$emu[[:space:]]*$"; then
+            ld_i386_emulation="$emu"
+            roms="optionrom"
+            break
+        fi
+    done
 fi
 if test "$cpu" = "ppc64" -a "$targetos" != "Darwin" ; then
  roms="$roms spapr-rtas"
@@ -4812,8 +4838,8 @@ echo "SDL support       $sdl $(echo_version $sdl $sdlversion)"
 echo "GTK support       $gtk $(echo_version $gtk $gtk_version)"
 echo "GTK GL support    $gtk_gl"
 echo "VTE support       $vte $(echo_version $vte $vteversion)"
+echo "TLS priority      $tls_priority"
 echo "GNUTLS support    $gnutls"
-echo "GNUTLS hash       $gnutls_hash"
 echo "GNUTLS rnd        $gnutls_rnd"
 echo "libgcrypt         $gcrypt"
 echo "libgcrypt kdf     $gcrypt_kdf"
@@ -4863,6 +4889,7 @@ echo "uuid support      $uuid"
 echo "libcap-ng support $cap_ng"
 echo "vhost-net support $vhost_net"
 echo "vhost-scsi support $vhost_scsi"
+echo "vhost-vsock support $vhost_vsock"
 echo "Trace backends    $trace_backends"
 if have_backend "simple"; then
 echo "Trace output file $trace_file-<pid>"
@@ -5176,12 +5203,10 @@ if test "$gtk" = "yes" ; then
    echo "CONFIG_GTK_GL=y" >> $config_host_mak
  fi
 fi
+echo "CONFIG_TLS_PRIORITY=\"$tls_priority\"" >> $config_host_mak
 if test "$gnutls" = "yes" ; then
  echo "CONFIG_GNUTLS=y" >> $config_host_mak
 fi
-if test "$gnutls_hash" = "yes" ; then
-  echo "CONFIG_GNUTLS_HASH=y" >> $config_host_mak
-fi
 if test "$gnutls_rnd" = "yes" ; then
  echo "CONFIG_GNUTLS_RND=y" >> $config_host_mak
 fi
@@ -5246,6 +5271,9 @@ fi
 if test "$vhost_net" = "yes" ; then
  echo "CONFIG_VHOST_NET_USED=y" >> $config_host_mak
 fi
+if test "$vhost_vsock" = "yes" ; then
+  echo "CONFIG_VHOST_VSOCK=y" >> $config_host_mak
+fi
 if test "$blobs" = "yes" ; then
  echo "INSTALL_BLOBS=yes" >> $config_host_mak
 fi
@@ -5462,6 +5490,13 @@ if have_backend "ftrace"; then
    feature_not_found "ftrace(trace backend)" "ftrace requires Linux"
  fi
 fi
+if have_backend "syslog"; then
+  if test "$posix_syslog" = "yes" ; then
+    echo "CONFIG_TRACE_SYSLOG=y" >> $config_host_mak
+  else
+    feature_not_found "syslog(trace backend)" "syslog not available"
+  fi
+fi
 echo "CONFIG_TRACE_FILE=$trace_file" >> $config_host_mak

 if test "$rdma" = "yes" ; then
@@ -5517,6 +5552,7 @@ echo "OBJCC=$objcc" >> $config_host_mak
 echo "AR=$ar" >> $config_host_mak
 echo "ARFLAGS=$ARFLAGS" >> $config_host_mak
 echo "AS=$as" >> $config_host_mak
+echo "CCAS=$ccas" >> $config_host_mak
 echo "CPP=$cpp" >> $config_host_mak
 echo "OBJCOPY=$objcopy" >> $config_host_mak
 echo "LD=$ld" >> $config_host_mak
@@ -5541,8 +5577,10 @@ fi
 echo "LDFLAGS=$LDFLAGS" >> $config_host_mak
 echo "LDFLAGS_NOPIE=$LDFLAGS_NOPIE" >> $config_host_mak
 echo "LD_REL_FLAGS=$LD_REL_FLAGS" >> $config_host_mak
+echo "LD_I386_EMULATION=$ld_i386_emulation" >> $config_host_mak
 echo "LIBS+=$LIBS" >> $config_host_mak
 echo "LIBS_TOOLS+=$libs_tools" >> $config_host_mak
+echo "PTHREAD_LIB=$PTHREAD_LIB" >> $config_host_mak
 echo "EXESUF=$EXESUF" >> $config_host_mak
 echo "DSOSUF=$DSOSUF" >> $config_host_mak
 echo "LDFLAGS_SHARED=$LDFLAGS_SHARED" >> $config_host_mak
@@ -5990,6 +6028,7 @@ for rom in seabios vgabios ; do
    echo "# Automatically generated by configure - do not modify" > $config_mak
    echo "SRC_PATH=$source_path/roms/$rom" >> $config_mak
    echo "AS=$as" >> $config_mak
+    echo "CCAS=$ccas" >> $config_mak
    echo "CC=$cc" >> $config_mak
    echo "BCC=bcc" >> $config_mak
    echo "CPP=$cpp" >> $config_mak
@@ -5998,6 +6037,11 @@ for rom in seabios vgabios ; do
    echo "LD=$ld" >> $config_mak
 done

+# set up tests data directory
+if [ ! -e tests/data ]; then
+    symlink "$source_path/tests/data" tests/data
+fi
+
 # set up qemu-iotests in this build directory
 iotests_common_env="tests/qemu-iotests/common.env"
 iotests_check="tests/qemu-iotests/check"
--- a/contrib/ivshmem-client/ivshmem-client.h
+++ b/contrib/ivshmem-client/ivshmem-client.h
@@ -6,8 +6,8 @@
 * top-level directory.
 */

-#ifndef _IVSHMEM_CLIENT_H_
-#define _IVSHMEM_CLIENT_H_
+#ifndef IVSHMEM_CLIENT_H
+#define IVSHMEM_CLIENT_H

 /**
 * This file provides helper to implement an ivshmem client. It is used
@@ -209,4 +209,4 @@ ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id);
 */
 void ivshmem_client_dump(const IvshmemClient *client);

-#endif /* _IVSHMEM_CLIENT_H_ */
+#endif /* IVSHMEM_CLIENT_H */
--- a/contrib/ivshmem-server/ivshmem-server.h
+++ b/contrib/ivshmem-server/ivshmem-server.h
@@ -6,8 +6,8 @@
 * top-level directory.
 */

-#ifndef _IVSHMEM_SERVER_H_
-#define _IVSHMEM_SERVER_H_
+#ifndef IVSHMEM_SERVER_H
+#define IVSHMEM_SERVER_H

 /**
 * The ivshmem server is a daemon that creates a unix socket in listen
@@ -163,4 +163,4 @@ ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id);
 */
 void ivshmem_server_dump(const IvshmemServer *server);

-#endif /* _IVSHMEM_SERVER_H_ */
+#endif /* IVSHMEM_SERVER_H */
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -608,17 +608,16 @@ int cpu_exec(CPUState *cpu)
    init_delay_params(&sc, cpu);

    for(;;) {
-        TranslationBlock *tb, *last_tb;
-        int tb_exit = 0;
-
        /* prepare setjmp context for exception handling */
        if (sigsetjmp(cpu->jmp_env, 0) == 0) {
+            TranslationBlock *tb, *last_tb = NULL;
+            int tb_exit = 0;
+
            /* if an exception is pending, we execute it here */
            if (cpu_handle_exception(cpu, &ret)) {
                break;
            }

-            last_tb = NULL; /* forget the last executed TB after exception */
            cpu->tb_flushed = false; /* reset before first TB lookup */
            for(;;) {
                cpu_handle_interrupt(cpu, &last_tb);
--- a/cputlb.c
+++ b/cputlb.c
@@ -498,6 +498,35 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
    return qemu_ram_addr_from_host_nofail(p);
 }

+/* Return true if ADDR is present in the victim tlb, and has been copied
+   back to the main tlb.  */
+static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
+                           size_t elt_ofs, target_ulong page)
+{
+    size_t vidx;
+    for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
+        CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
+        target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
+
+        if (cmp == page) {
+            /* Found entry in victim tlb, swap tlb and iotlb.  */
+            CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
+            CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
+            CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
+
+            tmptlb = *tlb; *tlb = *vtlb; *vtlb = tmptlb;
+            tmpio = *io; *io = *vio; *vio = tmpio;
+            return true;
+        }
+    }
+    return false;
+}
+
+/* Macro to call the above, with local variables from the use context.  */
+#define VICTIM_TLB_HIT(TY, ADDR) \
+  victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
+                 (ADDR) & TARGET_PAGE_MASK)
+
 #define MMUSUFFIX _mmu

 #define SHIFT 0
--- a/crypto/Makefile.objs
+++ b/crypto/Makefile.objs
@@ -1,5 +1,8 @@
 crypto-obj-y = init.o
 crypto-obj-y += hash.o
+crypto-obj-$(CONFIG_NETTLE) += hash-nettle.o
+crypto-obj-$(if $(CONFIG_NETTLE),n,$(CONFIG_GCRYPT)) += hash-gcrypt.o
+crypto-obj-$(if $(CONFIG_NETTLE),n,$(if $(CONFIG_GCRYPT),n,y)) += hash-glib.o
 crypto-obj-y += aes.o
 crypto-obj-y += desrfb.o
 crypto-obj-y += cipher.o
@@ -10,6 +13,7 @@ crypto-obj-y += tlssession.o
 crypto-obj-y += secret.o
 crypto-obj-$(CONFIG_GCRYPT) += random-gcrypt.o
 crypto-obj-$(if $(CONFIG_GCRYPT),n,$(CONFIG_GNUTLS_RND)) += random-gnutls.o
+crypto-obj-$(if $(CONFIG_GCRYPT),n,$(if $(CONFIG_GNUTLS_RND),n,y)) += random-platform.o
 crypto-obj-y += pbkdf.o
 crypto-obj-$(CONFIG_NETTLE_KDF) += pbkdf-nettle.o
 crypto-obj-$(if $(CONFIG_NETTLE_KDF),n,$(CONFIG_GCRYPT_KDF)) += pbkdf-gcrypt.o
@@ -26,5 +30,4 @@ crypto-obj-y += block-luks.o
 # Let the userspace emulators avoid linking gnutls/etc
 crypto-aes-obj-y = aes.o

-stub-obj-y += random-stub.o
 stub-obj-y += pbkdf-stub.o
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -201,6 +201,15 @@ QEMU_BUILD_BUG_ON(sizeof(struct QCryptoBlockLUKSHeader) != 592);

 struct QCryptoBlockLUKS {
    QCryptoBlockLUKSHeader header;
+
+    /* Cache parsed versions of what's in header fields,
+     * as we can't rely on QCryptoBlock.cipher being
+     * non-NULL */
+    QCryptoCipherAlgorithm cipher_alg;
+    QCryptoCipherMode cipher_mode;
+    QCryptoIVGenAlgorithm ivgen_alg;
+    QCryptoHashAlgorithm ivgen_hash_alg;
+    QCryptoHashAlgorithm hash_alg;
 };


@@ -776,6 +785,11 @@ qcrypto_block_luks_open(QCryptoBlock *block,
    }

    if (ivalg == QCRYPTO_IVGEN_ALG_ESSIV) {
+        if (!ivhash_name) {
+            ret = -EINVAL;
+            error_setg(errp, "Missing IV generator hash specification");
+            goto fail;
+        }
        ivcipheralg = qcrypto_block_luks_essiv_cipher(cipheralg,
                                                      ivhash,
                                                      &local_err);
@@ -785,6 +799,13 @@ qcrypto_block_luks_open(QCryptoBlock *block,
            goto fail;
        }
    } else {
+        /* Note we parsed the ivhash_name earlier in the cipher_mode
+         * spec string even with plain/plain64 ivgens, but we
+         * will ignore it, since it is irrelevant for these ivgens.
+         * This is for compat with dm-crypt which will silently
+         * ignore hash names with these ivgens rather than report
+         * an error about the invalid usage
+         */
        ivcipheralg = cipheralg;
    }

@@ -835,6 +856,12 @@ qcrypto_block_luks_open(QCryptoBlock *block,
    block->payload_offset = luks->header.payload_offset *
        QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;

+    luks->cipher_alg = cipheralg;
+    luks->cipher_mode = ciphermode;
+    luks->ivgen_alg = ivalg;
+    luks->ivgen_hash_alg = ivhash;
+    luks->hash_alg = hash;
+
    g_free(masterkey);
    g_free(password);

@@ -904,6 +931,15 @@ qcrypto_block_luks_create(QCryptoBlock *block,
    if (!luks_opts.has_hash_alg) {
        luks_opts.hash_alg = QCRYPTO_HASH_ALG_SHA256;
    }
+    if (luks_opts.ivgen_alg == QCRYPTO_IVGEN_ALG_ESSIV) {
+        if (!luks_opts.has_ivgen_hash_alg) {
+            luks_opts.ivgen_hash_alg = QCRYPTO_HASH_ALG_SHA256;
+            luks_opts.has_ivgen_hash_alg = true;
+        }
+    }
+    /* Note we're allowing ivgen_hash_alg to be set even for
+     * non-essiv iv generators that don't need a hash. It will
+     * be silently ignored, for compatibility with dm-crypt */

    if (!options->u.luks.key_secret) {
        error_setg(errp, "Parameter 'key-secret' is required for cipher");
@@ -1250,6 +1286,12 @@ qcrypto_block_luks_create(QCryptoBlock *block,
        goto error;
    }

+    luks->cipher_alg = luks_opts.cipher_alg;
+    luks->cipher_mode = luks_opts.cipher_mode;
+    luks->ivgen_alg = luks_opts.ivgen_alg;
+    luks->ivgen_hash_alg = luks_opts.ivgen_hash_alg;
+    luks->hash_alg = luks_opts.hash_alg;
+
    memset(masterkey, 0, luks->header.key_bytes);
    g_free(masterkey);
    memset(slotkey, 0, luks->header.key_bytes);
@@ -1284,6 +1326,51 @@ qcrypto_block_luks_create(QCryptoBlock *block,
 }


+static int qcrypto_block_luks_get_info(QCryptoBlock *block,
+                                       QCryptoBlockInfo *info,
+                                       Error **errp)
+{
+    QCryptoBlockLUKS *luks = block->opaque;
+    QCryptoBlockInfoLUKSSlot *slot;
+    QCryptoBlockInfoLUKSSlotList *slots = NULL, **prev = &info->u.luks.slots;
+    size_t i;
+
+    info->u.luks.cipher_alg = luks->cipher_alg;
+    info->u.luks.cipher_mode = luks->cipher_mode;
+    info->u.luks.ivgen_alg = luks->ivgen_alg;
+    if (info->u.luks.ivgen_alg == QCRYPTO_IVGEN_ALG_ESSIV) {
+        info->u.luks.has_ivgen_hash_alg = true;
+        info->u.luks.ivgen_hash_alg = luks->ivgen_hash_alg;
+    }
+    info->u.luks.hash_alg = luks->hash_alg;
+    info->u.luks.payload_offset = block->payload_offset;
+    info->u.luks.master_key_iters = luks->header.master_key_iterations;
+    info->u.luks.uuid = g_strndup((const char *)luks->header.uuid,
+                                  sizeof(luks->header.uuid));
+
+    for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
+        slots = g_new0(QCryptoBlockInfoLUKSSlotList, 1);
+        *prev = slots;
+
+        slots->value = slot = g_new0(QCryptoBlockInfoLUKSSlot, 1);
+        slot->active = luks->header.key_slots[i].active ==
+            QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED;
+        slot->key_offset = luks->header.key_slots[i].key_offset
+             * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
+        if (slot->active) {
+            slot->has_iters = true;
+            slot->iters = luks->header.key_slots[i].iterations;
+            slot->has_stripes = true;
+            slot->stripes = luks->header.key_slots[i].stripes;
+        }
+
+        prev = &slots->next;
+    }
+
+    return 0;
+}
+
+
 static void qcrypto_block_luks_cleanup(QCryptoBlock *block)
 {
    g_free(block->opaque);
@@ -1321,6 +1408,7 @@ qcrypto_block_luks_encrypt(QCryptoBlock *block,
 const QCryptoBlockDriver qcrypto_block_driver_luks = {
    .open = qcrypto_block_luks_open,
    .create = qcrypto_block_luks_create,
+    .get_info = qcrypto_block_luks_get_info,
    .cleanup = qcrypto_block_luks_cleanup,
    .decrypt = qcrypto_block_luks_decrypt,
    .encrypt = qcrypto_block_luks_encrypt,
--- a/crypto/block-luks.h
+++ b/crypto/block-luks.h
@@ -18,11 +18,11 @@
 *
 */

-#ifndef QCRYPTO_BLOCK_LUKS_H__
-#define QCRYPTO_BLOCK_LUKS_H__
+#ifndef QCRYPTO_BLOCK_LUKS_H
+#define QCRYPTO_BLOCK_LUKS_H

 #include "crypto/blockpriv.h"

 extern const QCryptoBlockDriver qcrypto_block_driver_luks;

-#endif /* QCRYPTO_BLOCK_LUKS_H__ */
+#endif /* QCRYPTO_BLOCK_LUKS_H */
--- a/crypto/block-qcow.h
+++ b/crypto/block-qcow.h
@@ -18,11 +18,11 @@
 *
 */

-#ifndef QCRYPTO_BLOCK_QCOW_H__
-#define QCRYPTO_BLOCK_QCOW_H__
+#ifndef QCRYPTO_BLOCK_QCOW_H
+#define QCRYPTO_BLOCK_QCOW_H

 #include "crypto/blockpriv.h"

 extern const QCryptoBlockDriver qcrypto_block_driver_qcow;

-#endif /* QCRYPTO_BLOCK_QCOW_H__ */
+#endif /* QCRYPTO_BLOCK_QCOW_H */
--- a/crypto/block.c
+++ b/crypto/block.c
@@ -59,7 +59,8 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,

    if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
        !qcrypto_block_drivers[options->format]) {
-        error_setg(errp, "Unsupported block driver %d", options->format);
+        error_setg(errp, "Unsupported block driver %s",
+                   QCryptoBlockFormat_lookup[options->format]);
        g_free(block);
        return NULL;
    }
@@ -88,7 +89,8 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,

    if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
        !qcrypto_block_drivers[options->format]) {
-        error_setg(errp, "Unsupported block driver %d", options->format);
+        error_setg(errp, "Unsupported block driver %s",
+                   QCryptoBlockFormat_lookup[options->format]);
        g_free(block);
        return NULL;
    }
@@ -105,6 +107,23 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
 }


+QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block,
+                                         Error **errp)
+{
+    QCryptoBlockInfo *info = g_new0(QCryptoBlockInfo, 1);
+
+    info->format = block->format;
+
+    if (block->driver->get_info &&
+        block->driver->get_info(block, info, errp) < 0) {
+        g_free(info);
+        return NULL;
+    }
+
+    return info;
+}
+
+
 int qcrypto_block_decrypt(QCryptoBlock *block,
                          uint64_t startsector,
                          uint8_t *buf,
--- a/crypto/blockpriv.h
+++ b/crypto/blockpriv.h
@@ -18,8 +18,8 @@
 *
 */

-#ifndef QCRYPTO_BLOCK_PRIV_H__
-#define QCRYPTO_BLOCK_PRIV_H__
+#ifndef QCRYPTO_BLOCKPRIV_H
+#define QCRYPTO_BLOCKPRIV_H

 #include "crypto/block.h"

@@ -53,6 +53,10 @@ struct QCryptoBlockDriver {
                  void *opaque,
                  Error **errp);

+    int (*get_info)(QCryptoBlock *block,
+                    QCryptoBlockInfo *info,
+                    Error **errp);
+
    void (*cleanup)(QCryptoBlock *block);

    int (*encrypt)(QCryptoBlock *block,
@@ -89,4 +93,4 @@ int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
                                 size_t len,
                                 Error **errp);

-#endif /* QCRYPTO_BLOCK_PRIV_H__ */
+#endif /* QCRYPTO_BLOCKPRIV_H */
--- a/crypto/cipher-builtin.c
+++ b/crypto/cipher-builtin.c
@@ -244,7 +244,8 @@ static int qcrypto_cipher_init_aes(QCryptoCipher *cipher,
    if (cipher->mode != QCRYPTO_CIPHER_MODE_CBC &&
        cipher->mode != QCRYPTO_CIPHER_MODE_ECB &&
        cipher->mode != QCRYPTO_CIPHER_MODE_XTS) {
-        error_setg(errp, "Unsupported cipher mode %d", cipher->mode);
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[cipher->mode]);
        return -1;
    }

@@ -376,7 +377,8 @@ static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
    QCryptoCipherBuiltin *ctxt;

    if (cipher->mode != QCRYPTO_CIPHER_MODE_ECB) {
-        error_setg(errp, "Unsupported cipher mode %d", cipher->mode);
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[cipher->mode]);
        return -1;
    }

@@ -442,7 +444,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        break;
    default:
        error_setg(errp,
-                   "Unsupported cipher algorithm %d", cipher->alg);
+                   "Unsupported cipher algorithm %s",
+                   QCryptoCipherAlgorithm_lookup[cipher->alg]);
        goto error;
    }

--- a/crypto/cipher-gcrypt.c
+++ b/crypto/cipher-gcrypt.c
@@ -70,7 +70,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        gcrymode = GCRY_CIPHER_MODE_CBC;
        break;
    default:
-        error_setg(errp, "Unsupported cipher mode %d", mode);
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[mode]);
        return NULL;
    }

@@ -120,7 +121,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        break;

    default:
-        error_setg(errp, "Unsupported cipher algorithm %d", alg);
+        error_setg(errp, "Unsupported cipher algorithm %s",
+                   QCryptoCipherAlgorithm_lookup[alg]);
        return NULL;
    }

@@ -192,6 +194,12 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
    }

    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+        if (ctx->blocksize != XTS_BLOCK_SIZE) {
+            error_setg(errp,
+                       "Cipher block size %zu must equal XTS block size %d",
+                       ctx->blocksize, XTS_BLOCK_SIZE);
+            goto error;
+        }
        ctx->iv = g_new0(uint8_t, ctx->blocksize);
    }

--- a/crypto/cipher-nettle.c
+++ b/crypto/cipher-nettle.c
@@ -227,7 +227,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
    case QCRYPTO_CIPHER_MODE_XTS:
        break;
    default:
-        error_setg(errp, "Unsupported cipher mode %d", mode);
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[mode]);
        return NULL;
    }

@@ -357,7 +358,15 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        break;

    default:
-        error_setg(errp, "Unsupported cipher algorithm %d", alg);
+        error_setg(errp, "Unsupported cipher algorithm %s",
+                   QCryptoCipherAlgorithm_lookup[alg]);
+        goto error;
+    }
+
+    if (mode == QCRYPTO_CIPHER_MODE_XTS &&
+        ctx->blocksize != XTS_BLOCK_SIZE) {
+        error_setg(errp, "Cipher block size %zu must equal XTS block size %d",
+                   ctx->blocksize, XTS_BLOCK_SIZE);
        goto error;
    }

@@ -422,8 +431,8 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
        break;

    default:
-        error_setg(errp, "Unsupported cipher algorithm %d",
-                   cipher->alg);
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[cipher->mode]);
        return -1;
    }
    return 0;
@@ -456,19 +465,14 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
        break;

    case QCRYPTO_CIPHER_MODE_XTS:
-        if (ctx->blocksize != XTS_BLOCK_SIZE) {
-            error_setg(errp, "Block size must be %d not %zu",
-                       XTS_BLOCK_SIZE, ctx->blocksize);
-            return -1;
-        }
        xts_decrypt(ctx->ctx, ctx->ctx_tweak,
                    ctx->alg_encrypt_wrapper, ctx->alg_decrypt_wrapper,
                    ctx->iv, len, out, in);
        break;

    default:
-        error_setg(errp, "Unsupported cipher algorithm %d",
-                   cipher->alg);
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[cipher->mode]);
        return -1;
    }
    return 0;
--- a/crypto/hash-gcrypt.c
+++ b/crypto/hash-gcrypt.c
@@ -0,0 +1,109 @@
+/*
+ * QEMU Crypto hash algorithms
+ *
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <gcrypt.h>
+#include "qapi/error.h"
+#include "crypto/hash.h"
+
+
+static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
+    [QCRYPTO_HASH_ALG_MD5] = GCRY_MD_MD5,
+    [QCRYPTO_HASH_ALG_SHA1] = GCRY_MD_SHA1,
+    [QCRYPTO_HASH_ALG_SHA224] = GCRY_MD_SHA224,
+    [QCRYPTO_HASH_ALG_SHA256] = GCRY_MD_SHA256,
+    [QCRYPTO_HASH_ALG_SHA384] = GCRY_MD_SHA384,
+    [QCRYPTO_HASH_ALG_SHA512] = GCRY_MD_SHA512,
+    [QCRYPTO_HASH_ALG_RIPEMD160] = GCRY_MD_RMD160,
+};
+
+gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
+{
+    if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map) &&
+        qcrypto_hash_alg_map[alg] != GCRY_MD_NONE) {
+        return true;
+    }
+    return false;
+}
+
+
+int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
+                        const struct iovec *iov,
+                        size_t niov,
+                        uint8_t **result,
+                        size_t *resultlen,
+                        Error **errp)
+{
+    int i, ret;
+    gcry_md_hd_t md;
+    unsigned char *digest;
+
+    if (!qcrypto_hash_supports(alg)) {
+        error_setg(errp,
+                   "Unknown hash algorithm %d",
+                   alg);
+        return -1;
+    }
+
+    ret = gcry_md_open(&md, qcrypto_hash_alg_map[alg], 0);
+
+    if (ret < 0) {
+        error_setg(errp,
+                   "Unable to initialize hash algorithm: %s",
+                   gcry_strerror(ret));
+        return -1;
+    }
+
+    for (i = 0; i < niov; i++) {
+        gcry_md_write(md, iov[i].iov_base, iov[i].iov_len);
+    }
+
+    ret = gcry_md_get_algo_dlen(qcrypto_hash_alg_map[alg]);
+    if (ret <= 0) {
+        error_setg(errp,
+                   "Unable to get hash length: %s",
+                   gcry_strerror(ret));
+        goto error;
+    }
+    if (*resultlen == 0) {
+        *resultlen = ret;
+        *result = g_new0(uint8_t, *resultlen);
+    } else if (*resultlen != ret) {
+        error_setg(errp,
+                   "Result buffer size %zu is smaller than hash %d",
+                   *resultlen, ret);
+        goto error;
+    }
+
+    digest = gcry_md_read(md, 0);
+    if (!digest) {
+        error_setg(errp,
+                   "No digest produced");
+        goto error;
+    }
+    memcpy(*result, digest, *resultlen);
+
+    gcry_md_close(md);
+    return 0;
+
+ error:
+    gcry_md_close(md);
+    return -1;
+}
--- a/crypto/hash-glib.c
+++ b/crypto/hash-glib.c
@@ -0,0 +1,97 @@
+/*
+ * QEMU Crypto hash algorithms
+ *
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "crypto/hash.h"
+
+
+static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
+    [QCRYPTO_HASH_ALG_MD5] = G_CHECKSUM_MD5,
+    [QCRYPTO_HASH_ALG_SHA1] = G_CHECKSUM_SHA1,
+    [QCRYPTO_HASH_ALG_SHA224] = -1,
+    [QCRYPTO_HASH_ALG_SHA256] = G_CHECKSUM_SHA256,
+    [QCRYPTO_HASH_ALG_SHA384] = -1,
+#if GLIB_CHECK_VERSION(2, 36, 0)
+    [QCRYPTO_HASH_ALG_SHA512] = G_CHECKSUM_SHA512,
+#else
+    [QCRYPTO_HASH_ALG_SHA512] = -1,
+#endif
+    [QCRYPTO_HASH_ALG_RIPEMD160] = -1,
+};
+
+gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
+{
+    if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map) &&
+        qcrypto_hash_alg_map[alg] != -1) {
+        return true;
+    }
+    return false;
+}
+
+
+int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
+                        const struct iovec *iov,
+                        size_t niov,
+                        uint8_t **result,
+                        size_t *resultlen,
+                        Error **errp)
+{
+    int i, ret;
+    GChecksum *cs;
+
+    if (!qcrypto_hash_supports(alg)) {
+        error_setg(errp,
+                   "Unknown hash algorithm %d",
+                   alg);
+        return -1;
+    }
+
+    cs = g_checksum_new(qcrypto_hash_alg_map[alg]);
+
+    for (i = 0; i < niov; i++) {
+        g_checksum_update(cs, iov[i].iov_base, iov[i].iov_len);
+    }
+
+    ret = g_checksum_type_get_length(qcrypto_hash_alg_map[alg]);
+    if (ret < 0) {
+        error_setg(errp, "%s",
+                   "Unable to get hash length");
+        goto error;
+    }
+    if (*resultlen == 0) {
+        *resultlen = ret;
+        *result = g_new0(uint8_t, *resultlen);
+    } else if (*resultlen != ret) {
+        error_setg(errp,
+                   "Result buffer size %zu is smaller than hash %d",
+                   *resultlen, ret);
+        goto error;
+    }
+
+    g_checksum_get_digest(cs, *result, resultlen);
+
+    g_checksum_free(cs);
+    return 0;
+
+ error:
+    g_checksum_free(cs);
+    return -1;
+}
--- a/crypto/hash-nettle.c
+++ b/crypto/hash-nettle.c
@@ -0,0 +1,154 @@
+/*
+ * QEMU Crypto hash algorithms
+ *
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "crypto/hash.h"
+#include <nettle/md5.h>
+#include <nettle/sha.h>
+#include <nettle/ripemd160.h>
+
+typedef void (*qcrypto_nettle_init)(void *ctx);
+typedef void (*qcrypto_nettle_write)(void *ctx,
+                                     unsigned int len,
+                                     const uint8_t *buf);
+typedef void (*qcrypto_nettle_result)(void *ctx,
+                                      unsigned int len,
+                                      uint8_t *buf);
+
+union qcrypto_hash_ctx {
+    struct md5_ctx md5;
+    struct sha1_ctx sha1;
+    struct sha224_ctx sha224;
+    struct sha256_ctx sha256;
+    struct sha384_ctx sha384;
+    struct sha512_ctx sha512;
+    struct ripemd160_ctx ripemd160;
+};
+
+struct qcrypto_hash_alg {
+    qcrypto_nettle_init init;
+    qcrypto_nettle_write write;
+    qcrypto_nettle_result result;
+    size_t len;
+} qcrypto_hash_alg_map[] = {
+    [QCRYPTO_HASH_ALG_MD5] = {
+        .init = (qcrypto_nettle_init)md5_init,
+        .write = (qcrypto_nettle_write)md5_update,
+        .result = (qcrypto_nettle_result)md5_digest,
+        .len = MD5_DIGEST_SIZE,
+    },
+    [QCRYPTO_HASH_ALG_SHA1] = {
+        .init = (qcrypto_nettle_init)sha1_init,
+        .write = (qcrypto_nettle_write)sha1_update,
+        .result = (qcrypto_nettle_result)sha1_digest,
+        .len = SHA1_DIGEST_SIZE,
+    },
+    [QCRYPTO_HASH_ALG_SHA224] = {
+        .init = (qcrypto_nettle_init)sha224_init,
+        .write = (qcrypto_nettle_write)sha224_update,
+        .result = (qcrypto_nettle_result)sha224_digest,
+        .len = SHA224_DIGEST_SIZE,
+    },
+    [QCRYPTO_HASH_ALG_SHA256] = {
+        .init = (qcrypto_nettle_init)sha256_init,
+        .write = (qcrypto_nettle_write)sha256_update,
+        .result = (qcrypto_nettle_result)sha256_digest,
+        .len = SHA256_DIGEST_SIZE,
+    },
+    [QCRYPTO_HASH_ALG_SHA384] = {
+        .init = (qcrypto_nettle_init)sha384_init,
+        .write = (qcrypto_nettle_write)sha384_update,
+        .result = (qcrypto_nettle_result)sha384_digest,
+        .len = SHA384_DIGEST_SIZE,
+    },
+    [QCRYPTO_HASH_ALG_SHA512] = {
+        .init = (qcrypto_nettle_init)sha512_init,
+        .write = (qcrypto_nettle_write)sha512_update,
+        .result = (qcrypto_nettle_result)sha512_digest,
+        .len = SHA512_DIGEST_SIZE,
+    },
+    [QCRYPTO_HASH_ALG_RIPEMD160] = {
+        .init = (qcrypto_nettle_init)ripemd160_init,
+        .write = (qcrypto_nettle_write)ripemd160_update,
+        .result = (qcrypto_nettle_result)ripemd160_digest,
+        .len = RIPEMD160_DIGEST_SIZE,
+    },
+};
+
+gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
+{
+    if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map) &&
+        qcrypto_hash_alg_map[alg].init != NULL) {
+        return true;
+    }
+    return false;
+}
+
+
+int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
+                        const struct iovec *iov,
+                        size_t niov,
+                        uint8_t **result,
+                        size_t *resultlen,
+                        Error **errp)
+{
+    int i;
+    union qcrypto_hash_ctx ctx;
+
+    if (!qcrypto_hash_supports(alg)) {
+        error_setg(errp,
+                   "Unknown hash algorithm %d",
+                   alg);
+        return -1;
+    }
+
+    qcrypto_hash_alg_map[alg].init(&ctx);
+
+    for (i = 0; i < niov; i++) {
+        /* Some versions of nettle have functions
+         * declared with 'int' instead of 'size_t'
+         * so to be safe avoid writing more than
+         * UINT_MAX bytes at a time
+         */
+        size_t len = iov[i].iov_len;
+        uint8_t *base = iov[i].iov_base;
+        while (len) {
+            size_t shortlen = MIN(len, UINT_MAX);
+            qcrypto_hash_alg_map[alg].write(&ctx, len, base);
+            len -= shortlen;
+            base += len;
+        }
+    }
+
+    if (*resultlen == 0) {
+        *resultlen = qcrypto_hash_alg_map[alg].len;
+        *result = g_new0(uint8_t, *resultlen);
+    } else if (*resultlen != qcrypto_hash_alg_map[alg].len) {
+        error_setg(errp,
+                   "Result buffer size %zu is smaller than hash %zu",
+                   *resultlen, qcrypto_hash_alg_map[alg].len);
+        return -1;
+    }
+
+    qcrypto_hash_alg_map[alg].result(&ctx, *resultlen, *result);
+
+    return 0;
+}
--- a/crypto/hash.c
+++ b/crypto/hash.c
@@ -22,16 +22,14 @@
 #include "qapi/error.h"
 #include "crypto/hash.h"

-#ifdef CONFIG_GNUTLS_HASH
-#include <gnutls/gnutls.h>
-#include <gnutls/crypto.h>
-#endif
-
-
 static size_t qcrypto_hash_alg_size[QCRYPTO_HASH_ALG__MAX] = {
    [QCRYPTO_HASH_ALG_MD5] = 16,
    [QCRYPTO_HASH_ALG_SHA1] = 20,
+    [QCRYPTO_HASH_ALG_SHA224] = 28,
    [QCRYPTO_HASH_ALG_SHA256] = 32,
+    [QCRYPTO_HASH_ALG_SHA384] = 48,
+    [QCRYPTO_HASH_ALG_SHA512] = 64,
+    [QCRYPTO_HASH_ALG_RIPEMD160] = 20,
 };

 size_t qcrypto_hash_digest_len(QCryptoHashAlgorithm alg)
@@ -41,105 +39,6 @@ size_t qcrypto_hash_digest_len(QCryptoHashAlgorithm alg)
 }


-#ifdef CONFIG_GNUTLS_HASH
-static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
-    [QCRYPTO_HASH_ALG_MD5] = GNUTLS_DIG_MD5,
-    [QCRYPTO_HASH_ALG_SHA1] = GNUTLS_DIG_SHA1,
-    [QCRYPTO_HASH_ALG_SHA256] = GNUTLS_DIG_SHA256,
-};
-
-gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
-{
-    if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map)) {
-        return true;
-    }
-    return false;
-}
-
-
-int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
-                        const struct iovec *iov,
-                        size_t niov,
-                        uint8_t **result,
-                        size_t *resultlen,
-                        Error **errp)
-{
-    int i, ret;
-    gnutls_hash_hd_t dig;
-
-    if (alg >= G_N_ELEMENTS(qcrypto_hash_alg_map)) {
-        error_setg(errp,
-                   "Unknown hash algorithm %d",
-                   alg);
-        return -1;
-    }
-
-    ret = gnutls_hash_init(&dig, qcrypto_hash_alg_map[alg]);
-
-    if (ret < 0) {
-        error_setg(errp,
-                   "Unable to initialize hash algorithm: %s",
-                   gnutls_strerror(ret));
-        return -1;
-    }
-
-    for (i = 0; i < niov; i++) {
-        ret = gnutls_hash(dig, iov[i].iov_base, iov[i].iov_len);
-        if (ret < 0) {
-            error_setg(errp,
-                       "Unable process hash data: %s",
-                       gnutls_strerror(ret));
-            goto error;
-        }
-    }
-
-    ret = gnutls_hash_get_len(qcrypto_hash_alg_map[alg]);
-    if (ret <= 0) {
-        error_setg(errp,
-                   "Unable to get hash length: %s",
-                   gnutls_strerror(ret));
-        goto error;
-    }
-    if (*resultlen == 0) {
-        *resultlen = ret;
-        *result = g_new0(uint8_t, *resultlen);
-    } else if (*resultlen != ret) {
-        error_setg(errp,
-                   "Result buffer size %zu is smaller than hash %d",
-                   *resultlen, ret);
-        goto error;
-    }
-
-    gnutls_hash_deinit(dig, *result);
-    return 0;
-
- error:
-    gnutls_hash_deinit(dig, NULL);
-    return -1;
-}
-
-#else /* ! CONFIG_GNUTLS_HASH */
-
-gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg G_GNUC_UNUSED)
-{
-    return false;
-}
-
-int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
-                        const struct iovec *iov G_GNUC_UNUSED,
-                        size_t niov G_GNUC_UNUSED,
-                        uint8_t **result G_GNUC_UNUSED,
-                        size_t *resultlen G_GNUC_UNUSED,
-                        Error **errp)
-{
-    error_setg(errp,
-               "Hash algorithm %d not supported without GNUTLS",
-               alg);
-    return -1;
-}
-
-#endif /* ! CONFIG_GNUTLS_HASH */
-
 int qcrypto_hash_bytes(QCryptoHashAlgorithm alg,
                       const char *buf,
                       size_t len,
--- a/crypto/init.c
+++ b/crypto/init.c
@@ -59,8 +59,7 @@

 #if (defined(CONFIG_GCRYPT) &&                  \
     (!defined(CONFIG_GNUTLS) ||                \
-      !defined(GNUTLS_VERSION_NUMBER) ||       \
-      (GNUTLS_VERSION_NUMBER < 0x020c00)) &&    \
+     (LIBGNUTLS_VERSION_NUMBER < 0x020c00)) &&    \
     (!defined(GCRYPT_VERSION_NUMBER) ||        \
      (GCRYPT_VERSION_NUMBER < 0x010600)))
 #define QCRYPTO_INIT_GCRYPT_THREADS
--- a/crypto/ivgenpriv.h
+++ b/crypto/ivgenpriv.h
@@ -18,8 +18,8 @@
 *
 */

-#ifndef QCRYPTO_IVGEN_PRIV_H__
-#define QCRYPTO_IVGEN_PRIV_H__
+#ifndef QCRYPTO_IVGENPRIV_H
+#define QCRYPTO_IVGENPRIV_H

 #include "crypto/ivgen.h"

@@ -46,4 +46,4 @@ struct QCryptoIVGen {
 };


-#endif /* QCRYPTO_IVGEN_PRIV_H__ */
+#endif /* QCRYPTO_IVGENPRIV_H */
--- a/crypto/pbkdf-gcrypt.c
+++ b/crypto/pbkdf-gcrypt.c
@@ -19,9 +19,9 @@
 */

 #include "qemu/osdep.h"
+#include <gcrypt.h>
 #include "qapi/error.h"
 #include "crypto/pbkdf.h"
-#include "gcrypt.h"

 bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash)
 {
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .6.50
 .7.50