ui: egl-headless requires dmabuf support

Reported-by: Thomas Huth <thuth@redhat.com> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Message-id: 20170517122744.3541-1-kraxel@redhat.com
Merge remote-tracking branch 'dgilbert/tags/pull-hmp-20170517' into staging
2017-05-19 10:46:00 +02:00 · 2017-05-18 13:36:15 +01:00 · 2017-05-18 10:05:52 +01:00 · 2017-05-18 10:01:08 +01:00 · 2017-05-18 00:35:15 +03:00 · 2017-05-18 00:35:15 +03:00
691 changed files with 22798 additions and 8759 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -34,3 +34,6 @@
 [submodule "roms/skiboot"]
 	path = roms/skiboot
 	url = git://git.qemu.org/skiboot.git
+[submodule "roms/QemuMacDrivers"]
+	path = roms/QemuMacDrivers
+	url = git://git.qemu.org/QemuMacDrivers.git
--- a/.travis.yml
+++ b/.travis.yml
@@ -86,9 +86,6 @@ matrix:
    - env: CONFIG="--enable-trace-backends=ust"
           TEST_CMD=""
      compiler: gcc
-    - env: CONFIG="--with-coroutine=gthread"
-           TEST_CMD=""
-      compiler: gcc
    - env: CONFIG=""
      os: osx
      compiler: clang
@@ -191,7 +188,7 @@ matrix:
      compiler: none
      env:
        - COMPILER_NAME=gcc CXX=g++-5 CC=gcc-5
-        - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user --with-coroutine=gthread"
+        - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user"
        - TEST_CMD=""
      before_script:
        - ./configure ${CONFIG} --extra-cflags="-g3 -O0 -fsanitize=thread -fuse-ld=gold" || cat config.log
--- a/44
+++ b/44
@@ -12,6 +12,8 @@ consult qemu-devel and not any specific individual privately.
 Descriptions of section entries:

 	M: Mail patches to: FullName <address@domain>
+	R: Designated reviewer: FullName <address@domain>
+	   These reviewers should be CCed on patches.
 	L: Mailing list that is relevant to this area
 	W: Web-page with status/info
 	Q: Patchwork web based patch tracking system site
@@ -196,8 +198,8 @@ F: hw/nios2/
 F: disas/nios2.c

 OpenRISC
-M: Jia Liu <proljc@gmail.com>
-S: Maintained
+M: Stafford Horne <shorne@gmail.com>
+S: Odd Fixes
 F: target/openrisc/
 F: hw/openrisc/
 F: tests/tcg/openrisc/
@@ -327,6 +329,7 @@ L: xen-devel@lists.xenproject.org
 S: Supported
 F: xen-*
 F: */xen*
+F: hw/9pfs/xen-9p-backend.c
 F: hw/char/xen_console.c
 F: hw/display/xenfb.c
 F: hw/net/xen_nic.c
@@ -351,6 +354,12 @@ L: qemu-devel@nongnu.org
 S: Maintained
 F: *posix*

+NETBSD
+L: qemu-devel@nongnu.org
+M: Kamil Rytarowski <kamil@netbsd.org>
+S: Maintained
+K: (?i)NetBSD
+
 W32, W64
 L: qemu-devel@nongnu.org
 M: Stefan Weil <sw@weilnetz.de>
@@ -645,7 +654,6 @@ F: hw/ppc/ppc440_bamboo.c

 e500
 M: Alexander Graf <agraf@suse.de>
-M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/e500.[hc]
@@ -656,7 +664,6 @@ F: pc-bios/u-boot.e500

 mpc8544ds
 M: Alexander Graf <agraf@suse.de>
-M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/mpc8544ds.c
@@ -933,7 +940,6 @@ F: include/hw/ppc/ppc4xx.h

 ppce500
 M: Alexander Graf <agraf@suse.de>
-M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/e500*
@@ -1170,6 +1176,7 @@ F: include/block/
 F: qemu-img*
 F: qemu-io*
 F: tests/qemu-iotests/
+F: util/qemu-progress.c
 T: git git://repo.or.cz/qemu/kevin.git block

 Block I/O path
@@ -1177,8 +1184,8 @@ M: Stefan Hajnoczi <stefanha@redhat.com>
 M: Fam Zheng <famz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
-F: async.c
-F: aio-*.c
+F: util/async.c
+F: util/aio-*.c
 F: block/io.c
 F: migration/block*
 F: include/block/aio.h
@@ -1307,8 +1314,8 @@ Main loop
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: cpus.c
-F: main-loop.c
-F: qemu-timer.c
+F: util/main-loop.c
+F: util/qemu-timer.c
 F: vl.c

 Human Monitor (HMP)
@@ -1395,6 +1402,7 @@ S: Supported
 F: qobject/
 F: include/qapi/qmp/
 X: include/qapi/qmp/dispatch.h
+F: scripts/coccinelle/qobject.cocci
 F: tests/check-qdict.c
 F: tests/check-qfloat.c
 F: tests/check-qint.c
@@ -1486,6 +1494,7 @@ S: Maintained
 F: crypto/
 F: include/crypto/
 F: tests/test-crypto-*
+F: qemu.sasl

 Coroutines
 M: Stefan Hajnoczi <stefanha@redhat.com>
@@ -1548,6 +1557,18 @@ F: net/colo*
 F: net/filter-rewriter.c
 F: net/filter-mirror.c

+Record/replay
+M: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
+R: Paolo Bonzini <pbonzini@redhat.com>
+W: http://wiki.qemu.org/Features/record-replay
+S: Supported
+F: replay/*
+F: block/blkreplay.c
+F: net/filter-replay.c
+F: include/sysemu/replay.h
+F: docs/replay.txt
+F: stubs/replay.c
+
 Usermode Emulation
 ------------------
 Overall
@@ -1564,6 +1585,7 @@ F: default-configs/*-bsd-user.mak

 Linux user
 M: Riku Voipio <riku.voipio@iki.fi>
+R: Laurent Vivier <laurent@vivier.eu>
 S: Maintained
 F: linux-user/
 F: default-configs/*-linux-user.mak
@@ -1817,8 +1839,8 @@ S: Supported
 F: tests/image-fuzzer/

 Replication
-M: Wen Congyang <wency@cn.fujitsu.com>
-M: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
+M: Wen Congyang <wencongyang2@huawei.com>
+M: Xie Changlong <xiechanglong.d@gmail.com>
 S: Supported
 F: replication*
 F: block/replication.c
--- a/9
+++ b/9
@@ -346,7 +346,7 @@ dtc/%:
 	mkdir -p $@

 $(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(chardev-obj-y) \
-	$(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY)) $(trace-obj-y)
+	$(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))

 ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
 # Only keep -O and -g cflags
@@ -366,11 +366,11 @@ Makefile: $(version-obj-y)
 # Build libraries

 libqemustub.a: $(stub-obj-y)
-libqemuutil.a: $(util-obj-y)
+libqemuutil.a: $(util-obj-y) $(trace-obj-y)

 ######################################################################

-COMMON_LDADDS = $(trace-obj-y) libqemuutil.a libqemustub.a
+COMMON_LDADDS = libqemuutil.a libqemustub.a

 qemu-img.o: qemu-img-cmds.h

@@ -552,7 +552,8 @@ multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
 s390-ccw.img \
 spapr-rtas.bin slof.bin skiboot.lid \
 palcode-clipper \
-u-boot.e500
+u-boot.e500 \
+qemu_vga.ndrv
 else
 BLOBS=
 endif
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -49,7 +49,6 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
 common-obj-$(CONFIG_LINUX) += fsdev/

 common-obj-y += migration/
-common-obj-y += page_cache.o #aio.o

 common-obj-$(CONFIG_SPICE) += spice-qemu-char.o

--- a/Makefile.target
+++ b/Makefile.target
@@ -149,12 +149,6 @@ obj-y += dump.o
 obj-y += migration/ram.o migration/savevm.o
 LIBS := $(libs_softmmu) $(LIBS)

-# xen support
-obj-$(CONFIG_XEN) += xen-common.o
-obj-$(CONFIG_XEN_I386) += xen-hvm.o xen-mapcache.o
-obj-$(call lnot,$(CONFIG_XEN)) += xen-common-stub.o
-obj-$(call lnot,$(CONFIG_XEN_I386)) += xen-hvm-stub.o
-
 # Hardware support
 ifeq ($(TARGET_NAME), sparc64)
 obj-y += hw/sparc64/
@@ -188,8 +182,7 @@ dummy := $(call unnest-vars,.., \
               qom-obj-y \
               io-obj-y \
               common-obj-y \
-               common-obj-m \
-               trace-obj-y)
+               common-obj-m)
 target-obj-y := $(target-obj-y-save)
 all-obj-y += $(common-obj-y)
 all-obj-y += $(target-obj-y)
@@ -201,7 +194,7 @@ all-obj-$(CONFIG_SOFTMMU) += $(io-obj-y)

 $(QEMU_PROG_BUILD): config-devices.mak

-COMMON_LDADDS = $(trace-obj-y) ../libqemuutil.a ../libqemustub.a
+COMMON_LDADDS = ../libqemuutil.a ../libqemustub.a

 # build either PROG or PROGW
 $(QEMU_PROG_BUILD): $(all-obj-y) $(COMMON_LDADDS)
--- a/2
+++ b/2
@@ -1 +1 @@
-2.9.0
+2.9.50
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -2028,6 +2028,8 @@ void AUD_del_capture (CaptureVoiceOut *cap, void *cb_opaque)
                    sw = sw1;
                }
                QLIST_REMOVE (cap, entries);
+                g_free (cap->hw.mix_buf);
+                g_free (cap->buf);
                g_free (cap);
            }
            return;
--- a/audio/wavcapture.c
+++ b/audio/wavcapture.c
@@ -88,6 +88,7 @@ static void wav_capture_destroy (void *opaque)
    WAVState *wav = opaque;

    AUD_del_capture (wav->cap, wav);
+    g_free (wav);
 }

 static void wav_capture_info (void *opaque)
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -51,7 +51,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 #ifndef CONFIG_LINUX
    error_setg(errp, "-mem-path not supported on this host");
 #else
-    if (!memory_region_size(&backend->mr)) {
+    if (!host_memory_backend_mr_inited(backend)) {
        gchar *path;
        backend->force_prealloc = mem_prealloc;
        path = object_get_canonical_path(OBJECT(backend));
@@ -76,7 +76,7 @@ static void set_mem_path(Object *o, const char *str, Error **errp)
    HostMemoryBackend *backend = MEMORY_BACKEND(o);
    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);

-    if (memory_region_size(&backend->mr)) {
+    if (host_memory_backend_mr_inited(backend)) {
        error_setg(errp, "cannot change property value");
        return;
    }
@@ -96,7 +96,7 @@ static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
    HostMemoryBackend *backend = MEMORY_BACKEND(o);
    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);

-    if (memory_region_size(&backend->mr)) {
+    if (host_memory_backend_mr_inited(backend)) {
        error_setg(errp, "cannot change property value");
        return;
    }
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -45,7 +45,7 @@ host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
    Error *local_err = NULL;
    uint64_t value;

-    if (memory_region_size(&backend->mr)) {
+    if (host_memory_backend_mr_inited(backend)) {
        error_setg(&local_err, "cannot change property value");
        goto out;
    }
@@ -146,7 +146,7 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(obj);

-    if (!memory_region_size(&backend->mr)) {
+    if (!host_memory_backend_mr_inited(backend)) {
        backend->merge = value;
        return;
    }
@@ -172,7 +172,7 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(obj);

-    if (!memory_region_size(&backend->mr)) {
+    if (!host_memory_backend_mr_inited(backend)) {
        backend->dump = value;
        return;
    }
@@ -208,7 +208,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
        }
    }

-    if (!memory_region_size(&backend->mr)) {
+    if (!host_memory_backend_mr_inited(backend)) {
        backend->prealloc = value;
        return;
    }
@@ -237,10 +237,19 @@ static void host_memory_backend_init(Object *obj)
    backend->prealloc = mem_prealloc;
 }

+bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
+{
+    /*
+     * NOTE: We forbid zero-length memory backend, so here zero means
+     * "we haven't inited the backend memory region yet".
+     */
+    return memory_region_size(&backend->mr) != 0;
+}
+
 MemoryRegion *
 host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
 {
-    return memory_region_size(&backend->mr) ? &backend->mr : NULL;
+    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 }

 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
--- a/block.c
+++ b/block.c
@@ -192,6 +192,52 @@ void path_combine(char *dest, int dest_size,
    }
 }

+/* Returns whether the image file is opened as read-only. Note that this can
+ * return false and writing to the image file is still not possible because the
+ * image is inactivated. */
+bool bdrv_is_read_only(BlockDriverState *bs)
+{
+    return bs->read_only;
+}
+
+/* Returns whether the image file can be written to right now */
+bool bdrv_is_writable(BlockDriverState *bs)
+{
+    return !bdrv_is_read_only(bs) && !(bs->open_flags & BDRV_O_INACTIVE);
+}
+
+int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
+{
+    /* Do not set read_only if copy_on_read is enabled */
+    if (bs->copy_on_read && read_only) {
+        error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
+                   bdrv_get_device_or_node_name(bs));
+        return -EINVAL;
+    }
+
+    /* Do not clear read_only if it is prohibited */
+    if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR)) {
+        error_setg(errp, "Node '%s' is read only",
+                   bdrv_get_device_or_node_name(bs));
+        return -EPERM;
+    }
+
+    return 0;
+}
+
+int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
+{
+    int ret = 0;
+
+    ret = bdrv_can_set_read_only(bs, read_only, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    bs->read_only = read_only;
+    return 0;
+}
+
 void bdrv_get_full_backing_filename_from_filename(const char *backed,
                                                  const char *backing,
                                                  char *dest, size_t sz,
@@ -725,6 +771,13 @@ static void bdrv_child_cb_drained_end(BdrvChild *child)
    bdrv_drained_end(bs);
 }

+static int bdrv_child_cb_inactivate(BdrvChild *child)
+{
+    BlockDriverState *bs = child->opaque;
+    assert(bs->open_flags & BDRV_O_INACTIVE);
+    return 0;
+}
+
 /*
 * Returns the options and flags that a temporary snapshot should get, based on
 * the originally requested flags (the originally requested image will have
@@ -763,6 +816,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
     * the parent. */
    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
+    qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);

    /* Inherit the read-only option from the parent if it's not set */
    qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
@@ -784,6 +838,7 @@ const BdrvChildRole child_file = {
    .inherit_options = bdrv_inherited_options,
    .drained_begin   = bdrv_child_cb_drained_begin,
    .drained_end     = bdrv_child_cb_drained_end,
+    .inactivate      = bdrv_child_cb_inactivate,
 };

 /*
@@ -805,6 +860,7 @@ const BdrvChildRole child_format = {
    .inherit_options = bdrv_inherited_fmt_options,
    .drained_begin   = bdrv_child_cb_drained_begin,
    .drained_end     = bdrv_child_cb_drained_end,
+    .inactivate      = bdrv_child_cb_inactivate,
 };

 static void bdrv_backing_attach(BdrvChild *c)
@@ -871,6 +927,7 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
     * which is only applied on the top level (BlockBackend) */
    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
+    qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);

    /* backing files always opened read-only */
    qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
@@ -889,6 +946,7 @@ const BdrvChildRole child_backing = {
    .inherit_options = bdrv_backing_options,
    .drained_begin   = bdrv_child_cb_drained_begin,
    .drained_end     = bdrv_child_cb_drained_end,
+    .inactivate      = bdrv_child_cb_inactivate,
 };

 static int bdrv_open_flags(BlockDriverState *bs, int flags)
@@ -937,16 +995,14 @@ static void update_flags_from_options(int *flags, QemuOpts *opts)
 static void update_options_from_flags(QDict *options, int flags)
 {
    if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
-        qdict_put(options, BDRV_OPT_CACHE_DIRECT,
-                  qbool_from_bool(flags & BDRV_O_NOCACHE));
+        qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
    }
    if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
-        qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
-                  qbool_from_bool(flags & BDRV_O_NO_FLUSH));
+        qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
+                       flags & BDRV_O_NO_FLUSH);
    }
    if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
-        qdict_put(options, BDRV_OPT_READ_ONLY,
-                  qbool_from_bool(!(flags & BDRV_O_RDWR)));
+        qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
    }
 }

@@ -1115,6 +1171,11 @@ QemuOptsList bdrv_runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "discard operation (ignore/off, unmap/on)",
        },
+        {
+            .name = BDRV_OPT_FORCE_SHARE,
+            .type = QEMU_OPT_BOOL,
+            .help = "always accept other writers (default: off)",
+        },
        { /* end of list */ }
    },
 };
@@ -1154,6 +1215,16 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
    drv = bdrv_find_format(driver_name);
    assert(drv != NULL);

+    bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
+
+    if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
+        error_setg(errp,
+                   BDRV_OPT_FORCE_SHARE
+                   "=on can only be used with read-only images");
+        ret = -EINVAL;
+        goto fail_opts;
+    }
+
    if (file != NULL) {
        filename = blk_bs(file)->filename;
    } else {
@@ -1167,7 +1238,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
        filename = qdict_get_try_str(options, "filename");
    }

-    if (drv->bdrv_needs_filename && !filename) {
+    if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
        error_setg(errp, "The '%s' block driver requires a file name",
                   drv->format_name);
        ret = -EINVAL;
@@ -1362,7 +1433,7 @@ static int bdrv_fill_options(QDict **options, const char *filename,
    /* Fetch the file name from the options QDict if necessary */
    if (protocol && filename) {
        if (!qdict_haskey(*options, "filename")) {
-            qdict_put(*options, "filename", qstring_from_str(filename));
+            qdict_put_str(*options, "filename", filename);
            parse_filename = true;
        } else {
            error_setg(errp, "Can't specify 'file' and 'filename' options at "
@@ -1383,7 +1454,7 @@ static int bdrv_fill_options(QDict **options, const char *filename,
            }

            drvname = drv->format_name;
-            qdict_put(*options, "driver", qstring_from_str(drvname));
+            qdict_put_str(*options, "driver", drvname);
        } else {
            error_setg(errp, "Must specify either driver or file");
            return -EINVAL;
@@ -1413,6 +1484,22 @@ static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
 static void bdrv_child_abort_perm_update(BdrvChild *c);
 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);

+static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
+                            BdrvChild *c,
+                            const BdrvChildRole *role,
+                            uint64_t parent_perm, uint64_t parent_shared,
+                            uint64_t *nperm, uint64_t *nshared)
+{
+    if (bs->drv && bs->drv->bdrv_child_perm) {
+        bs->drv->bdrv_child_perm(bs, c, role,
+                                 parent_perm, parent_shared,
+                                 nperm, nshared);
+    }
+    if (child_bs && child_bs->force_share) {
+        *nshared = BLK_PERM_ALL;
+    }
+}
+
 /*
 * Check whether permissions on this node can be changed in a way that
 * @cumulative_perms and @cumulative_shared_perms are the new cumulative
@@ -1432,7 +1519,7 @@ static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms,

    /* Write permissions never work with read-only images */
    if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
-        bdrv_is_read_only(bs))
+        !bdrv_is_writable(bs))
    {
        error_setg(errp, "Block node is read-only");
        return -EPERM;
@@ -1457,9 +1544,9 @@ static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms,
    /* Check all children */
    QLIST_FOREACH(c, &bs->children, next) {
        uint64_t cur_perm, cur_shared;
-        drv->bdrv_child_perm(bs, c, c->role,
-                             cumulative_perms, cumulative_shared_perms,
-                             &cur_perm, &cur_shared);
+        bdrv_child_perm(bs, c->bs, c, c->role,
+                        cumulative_perms, cumulative_shared_perms,
+                        &cur_perm, &cur_shared);
        ret = bdrv_child_check_perm(c, cur_perm, cur_shared, ignore_children,
                                    errp);
        if (ret < 0) {
@@ -1519,9 +1606,9 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms,
    /* Update all children */
    QLIST_FOREACH(c, &bs->children, next) {
        uint64_t cur_perm, cur_shared;
-        drv->bdrv_child_perm(bs, c, c->role,
-                             cumulative_perms, cumulative_shared_perms,
-                             &cur_perm, &cur_shared);
+        bdrv_child_perm(bs, c->bs, c, c->role,
+                        cumulative_perms, cumulative_shared_perms,
+                        &cur_perm, &cur_shared);
        bdrv_child_set_perm(c, cur_perm, cur_shared);
    }
 }
@@ -1551,7 +1638,7 @@ static char *bdrv_child_user_desc(BdrvChild *c)
    return g_strdup("another user");
 }

-static char *bdrv_perm_names(uint64_t perm)
+char *bdrv_perm_names(uint64_t perm)
 {
    struct perm_name {
        uint64_t perm;
@@ -1717,7 +1804,7 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
        bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared);

        /* Format drivers may touch metadata even if the guest doesn't write */
-        if (!bdrv_is_read_only(bs)) {
+        if (bdrv_is_writable(bs)) {
            perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
        }

@@ -1743,6 +1830,10 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
                  BLK_PERM_WRITE_UNCHANGED;
    }

+    if (bs->open_flags & BDRV_O_INACTIVE) {
+        shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
+    }
+
    *nperm = perm;
    *nshared = shared;
 }
@@ -1856,8 +1947,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,

    assert(parent_bs->drv);
    assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs));
-    parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role,
-                                    perm, shared_perm, &perm, &shared_perm);
+    bdrv_child_perm(parent_bs, child_bs, NULL, child_role,
+                    perm, shared_perm, &perm, &shared_perm);

    child = bdrv_root_attach_child(child_bs, child_name, child_role,
                                   perm, shared_perm, parent_bs, errp);
@@ -2038,7 +2129,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
    }

    if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
-        qdict_put(options, "driver", qstring_from_str(bs->backing_format));
+        qdict_put_str(options, "driver", bs->backing_format);
    }

    backing_hd = bdrv_open_inherit(*backing_filename ? backing_filename : NULL,
@@ -2160,7 +2251,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
    char *tmp_filename = g_malloc0(PATH_MAX + 1);
    int64_t total_size;
    QemuOpts *opts = NULL;
-    BlockDriverState *bs_snapshot;
+    BlockDriverState *bs_snapshot = NULL;
    Error *local_err = NULL;
    int ret;

@@ -2193,38 +2284,32 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
    }

    /* Prepare options QDict for the temporary file */
-    qdict_put(snapshot_options, "file.driver",
-              qstring_from_str("file"));
-    qdict_put(snapshot_options, "file.filename",
-              qstring_from_str(tmp_filename));
-    qdict_put(snapshot_options, "driver",
-              qstring_from_str("qcow2"));
+    qdict_put_str(snapshot_options, "file.driver", "file");
+    qdict_put_str(snapshot_options, "file.filename", tmp_filename);
+    qdict_put_str(snapshot_options, "driver", "qcow2");

    bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
    snapshot_options = NULL;
    if (!bs_snapshot) {
-        ret = -EINVAL;
        goto out;
    }

-    /* bdrv_append() consumes a strong reference to bs_snapshot (i.e. it will
-     * call bdrv_unref() on it), so in order to be able to return one, we have
-     * to increase bs_snapshot's refcount here */
+    /* bdrv_append() consumes a strong reference to bs_snapshot
+     * (i.e. it will call bdrv_unref() on it) even on error, so in
+     * order to be able to return one, we have to increase
+     * bs_snapshot's refcount here */
    bdrv_ref(bs_snapshot);
    bdrv_append(bs_snapshot, bs, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
-        ret = -EINVAL;
+        bs_snapshot = NULL;
        goto out;
    }

-    g_free(tmp_filename);
-    return bs_snapshot;
-
 out:
    QDECREF(snapshot_options);
    g_free(tmp_filename);
-    return NULL;
+    return bs_snapshot;
 }

 /*
@@ -2373,8 +2458,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
                goto fail;
            }

-            qdict_put(options, "file",
-                      qstring_from_str(bdrv_get_node_name(file_bs)));
+            qdict_put_str(options, "file", bdrv_get_node_name(file_bs));
        }
    }

@@ -2396,8 +2480,8 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
         * sure to update both bs->options (which has the full effective
         * options for bs) and options (which has file.* already removed).
         */
-        qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
-        qdict_put(options, "driver", qstring_from_str(drv->format_name));
+        qdict_put_str(bs->options, "driver", drv->format_name);
+        qdict_put_str(options, "driver", drv->format_name);
    } else if (!drv) {
        error_setg(errp, "Must specify either driver or file");
        goto fail;
@@ -2752,6 +2836,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
    BlockDriver *drv;
    QemuOpts *opts;
    const char *value;
+    bool read_only;

    assert(reopen_state != NULL);
    assert(reopen_state->bs->drv != NULL);
@@ -2772,20 +2857,21 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
     * that they are checked at the end of this function. */
    value = qemu_opt_get(opts, "node-name");
    if (value) {
-        qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
+        qdict_put_str(reopen_state->options, "node-name", value);
    }

    value = qemu_opt_get(opts, "driver");
    if (value) {
-        qdict_put(reopen_state->options, "driver", qstring_from_str(value));
+        qdict_put_str(reopen_state->options, "driver", value);
    }

-    /* if we are to stay read-only, do not allow permission change
-     * to r/w */
-    if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
-        reopen_state->flags & BDRV_O_RDWR) {
-        error_setg(errp, "Node '%s' is read only",
-                   bdrv_get_device_or_node_name(reopen_state->bs));
+    /* If we are to stay read-only, do not allow permission change
+     * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
+     * not set, or if the BDS still has copy_on_read enabled */
+    read_only = !(reopen_state->flags & BDRV_O_RDWR);
+    ret = bdrv_can_set_read_only(reopen_state->bs, read_only, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        goto error;
    }

@@ -3268,26 +3354,30 @@ exit:
 /**
 * Truncate file to 'offset' bytes (needed only for file protocols)
 */
-int bdrv_truncate(BdrvChild *child, int64_t offset)
+int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp)
 {
    BlockDriverState *bs = child->bs;
    BlockDriver *drv = bs->drv;
    int ret;

-    /* FIXME: Some format block drivers use this function instead of implicitly
-     *        growing their file by writing beyond its end.
-     *        See bdrv_aligned_pwritev() for an explanation why we currently
-     *        cannot assert this permission in that case. */
-    // assert(child->perm & BLK_PERM_RESIZE);
+    assert(child->perm & BLK_PERM_RESIZE);

-    if (!drv)
+    if (!drv) {
+        error_setg(errp, "No medium inserted");
        return -ENOMEDIUM;
-    if (!drv->bdrv_truncate)
+    }
+    if (!drv->bdrv_truncate) {
+        error_setg(errp, "Image format driver does not support resize");
        return -ENOTSUP;
-    if (bs->read_only)
+    }
+    if (bs->read_only) {
+        error_setg(errp, "Image is read-only");
        return -EACCES;
+    }

-    ret = drv->bdrv_truncate(bs, offset);
+    assert(!(bs->open_flags & BDRV_O_INACTIVE));
+
+    ret = drv->bdrv_truncate(bs, offset, errp);
    if (ret == 0) {
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
        bdrv_dirty_bitmap_truncate(bs);
@@ -3355,11 +3445,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
    *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
 }

-bool bdrv_is_read_only(BlockDriverState *bs)
-{
-    return bs->read_only;
-}
-
 bool bdrv_is_sg(BlockDriverState *bs)
 {
    return bs->sg;
@@ -3887,7 +3972,8 @@ void bdrv_init_with_whitelist(void)

 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
-    BdrvChild *child;
+    BdrvChild *child, *parent;
+    uint64_t perm, shared_perm;
    Error *local_err = NULL;
    int ret;

@@ -3923,6 +4009,26 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
        error_setg_errno(errp, -ret, "Could not refresh total sector count");
        return;
    }
+
+    /* Update permissions, they may differ for inactive nodes */
+    bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
+    ret = bdrv_check_perm(bs, perm, shared_perm, NULL, &local_err);
+    if (ret < 0) {
+        bs->open_flags |= BDRV_O_INACTIVE;
+        error_propagate(errp, local_err);
+        return;
+    }
+    bdrv_set_perm(bs, perm, shared_perm);
+
+    QLIST_FOREACH(parent, &bs->parents, next_parent) {
+        if (parent->role->activate) {
+            parent->role->activate(parent, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
+            }
+        }
+    }
 }

 void bdrv_invalidate_cache_all(Error **errp)
@@ -3947,7 +4053,7 @@ void bdrv_invalidate_cache_all(Error **errp)
 static int bdrv_inactivate_recurse(BlockDriverState *bs,
                                   bool setting_flag)
 {
-    BdrvChild *child;
+    BdrvChild *child, *parent;
    int ret;

    if (!setting_flag && bs->drv->bdrv_inactivate) {
@@ -3957,6 +4063,27 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
        }
    }

+    if (setting_flag) {
+        uint64_t perm, shared_perm;
+
+        bs->open_flags |= BDRV_O_INACTIVE;
+
+        QLIST_FOREACH(parent, &bs->parents, next_parent) {
+            if (parent->role->inactivate) {
+                ret = parent->role->inactivate(parent);
+                if (ret < 0) {
+                    bs->open_flags &= ~BDRV_O_INACTIVE;
+                    return ret;
+                }
+            }
+        }
+
+        /* Update permissions, they may differ for inactive nodes */
+        bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
+        bdrv_check_perm(bs, perm, shared_perm, NULL, &error_abort);
+        bdrv_set_perm(bs, perm, shared_perm);
+    }
+
    QLIST_FOREACH(child, &bs->children, next) {
        ret = bdrv_inactivate_recurse(child->bs, setting_flag);
        if (ret < 0) {
@@ -3964,9 +4091,6 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
        }
    }

-    if (setting_flag) {
-        bs->open_flags |= BDRV_O_INACTIVE;
-    }
    return 0;
 }

@@ -4161,8 +4285,8 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs)

 void bdrv_img_create(const char *filename, const char *fmt,
                     const char *base_filename, const char *base_fmt,
-                     char *options, uint64_t img_size, int flags,
-                     Error **errp, bool quiet)
+                     char *options, uint64_t img_size, int flags, bool quiet,
+                     Error **errp)
 {
    QemuOptsList *create_opts = NULL;
    QemuOpts *opts = NULL;
@@ -4268,8 +4392,7 @@ void bdrv_img_create(const char *filename, const char *fmt,

            if (backing_fmt) {
                backing_options = qdict_new();
-                qdict_put(backing_options, "driver",
-                          qstring_from_str(backing_fmt));
+                qdict_put_str(backing_options, "driver", backing_fmt);
            }

            bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
@@ -4674,11 +4797,9 @@ void bdrv_refresh_filename(BlockDriverState *bs)
         * contain a representation of the filename, therefore the following
         * suffices without querying the (exact_)filename of this BDS. */
        if (bs->file->bs->full_open_options) {
-            qdict_put_obj(opts, "driver",
-                          QOBJECT(qstring_from_str(drv->format_name)));
+            qdict_put_str(opts, "driver", drv->format_name);
            QINCREF(bs->file->bs->full_open_options);
-            qdict_put_obj(opts, "file",
-                          QOBJECT(bs->file->bs->full_open_options));
+            qdict_put(opts, "file", bs->file->bs->full_open_options);

            bs->full_open_options = opts;
        } else {
@@ -4694,8 +4815,7 @@ void bdrv_refresh_filename(BlockDriverState *bs)

        opts = qdict_new();
        append_open_options(opts, bs);
-        qdict_put_obj(opts, "driver",
-                      QOBJECT(qstring_from_str(drv->format_name)));
+        qdict_put_str(opts, "driver", drv->format_name);

        if (bs->exact_filename[0]) {
            /* This may not work for all block protocol drivers (some may
@@ -4705,8 +4825,7 @@ void bdrv_refresh_filename(BlockDriverState *bs)
             * needs some special format of the options QDict, it needs to
             * implement the driver-specific bdrv_refresh_filename() function.
             */
-            qdict_put_obj(opts, "filename",
-                          QOBJECT(qstring_from_str(bs->exact_filename)));
+            qdict_put_str(opts, "filename", bs->exact_filename);
        }

        bs->full_open_options = opts;
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -19,6 +19,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+block-obj-$(CONFIG_VXHS) += vxhs.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
@@ -38,6 +39,7 @@ rbd.o-cflags       := $(RBD_CFLAGS)
 rbd.o-libs         := $(RBD_LIBS)
 gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs     := $(GLUSTERFS_LIBS)
+vxhs.o-libs        := $(VXHS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
 block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -1,6 +1,7 @@
 /*
 * Block protocol for I/O error injection
 *
+ * Copyright (C) 2016-2017 Red Hat, Inc.
 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -37,7 +38,12 @@
 typedef struct BDRVBlkdebugState {
    int state;
    int new_state;
-    int align;
+    uint64_t align;
+    uint64_t max_transfer;
+    uint64_t opt_write_zero;
+    uint64_t max_write_zero;
+    uint64_t opt_discard;
+    uint64_t max_discard;

    /* For blkdebug_refresh_filename() */
    char *config_file;
@@ -301,7 +307,7 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,
    if (!strstart(filename, "blkdebug:", &filename)) {
        /* There was no prefix; therefore, all options have to be already
           present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
+        qdict_put_str(options, "x-image", filename);
        return;
    }

@@ -320,7 +326,7 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,

    /* TODO Allow multi-level nesting and set file.filename here */
    filename = c + 1;
-    qdict_put(options, "x-image", qstring_from_str(filename));
+    qdict_put_str(options, "x-image", filename);
 }

 static QemuOptsList runtime_opts = {
@@ -342,6 +348,31 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_SIZE,
            .help = "Required alignment in bytes",
        },
+        {
+            .name = "max-transfer",
+            .type = QEMU_OPT_SIZE,
+            .help = "Maximum transfer size in bytes",
+        },
+        {
+            .name = "opt-write-zero",
+            .type = QEMU_OPT_SIZE,
+            .help = "Optimum write zero alignment in bytes",
+        },
+        {
+            .name = "max-write-zero",
+            .type = QEMU_OPT_SIZE,
+            .help = "Maximum write zero size in bytes",
+        },
+        {
+            .name = "opt-discard",
+            .type = QEMU_OPT_SIZE,
+            .help = "Optimum discard alignment in bytes",
+        },
+        {
+            .name = "max-discard",
+            .type = QEMU_OPT_SIZE,
+            .help = "Maximum discard size in bytes",
+        },
        { /* end of list */ }
    },
 };
@@ -352,8 +383,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVBlkdebugState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
-    uint64_t align;
    int ret;
+    uint64_t align;

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -382,21 +413,69 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

-    /* Set request alignment */
-    align = qemu_opt_get_size(opts, "align", 0);
-    if (align < INT_MAX && is_power_of_2(align)) {
-        s->align = align;
-    } else if (align) {
-        error_setg(errp, "Invalid alignment");
-        ret = -EINVAL;
-        goto fail_unref;
+    bs->supported_write_flags = BDRV_REQ_FUA &
+        bs->file->bs->supported_write_flags;
+    bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+        bs->file->bs->supported_zero_flags;
+    ret = -EINVAL;
+
+    /* Set alignment overrides */
+    s->align = qemu_opt_get_size(opts, "align", 0);
+    if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
+        error_setg(errp, "Cannot meet constraints with align %" PRIu64,
+                   s->align);
+        goto out;
+    }
+    align = MAX(s->align, bs->file->bs->bl.request_alignment);
+
+    s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
+    if (s->max_transfer &&
+        (s->max_transfer >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->max_transfer, align))) {
+        error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
+                   s->max_transfer);
+        goto out;
+    }
+
+    s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
+    if (s->opt_write_zero &&
+        (s->opt_write_zero >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
+        error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
+                   s->opt_write_zero);
+        goto out;
+    }
+
+    s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
+    if (s->max_write_zero &&
+        (s->max_write_zero >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->max_write_zero,
+                          MAX(s->opt_write_zero, align)))) {
+        error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
+                   s->max_write_zero);
+        goto out;
+    }
+
+    s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
+    if (s->opt_discard &&
+        (s->opt_discard >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->opt_discard, align))) {
+        error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
+                   s->opt_discard);
+        goto out;
+    }
+
+    s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
+    if (s->max_discard &&
+        (s->max_discard >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->max_discard,
+                          MAX(s->opt_discard, align)))) {
+        error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
+                   s->max_discard);
+        goto out;
    }

    ret = 0;
-    goto out;
-
-fail_unref:
-    bdrv_unref_child(bs, bs->file);
 out:
    if (ret < 0) {
        g_free(s->config_file);
@@ -405,11 +484,30 @@ out:
    return ret;
 }

-static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
+static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes)
 {
    BDRVBlkdebugState *s = bs->opaque;
-    int error = rule->options.inject.error;
-    bool immediately = rule->options.inject.immediately;
+    BlkdebugRule *rule = NULL;
+    int error;
+    bool immediately;
+
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        uint64_t inject_offset = rule->options.inject.offset;
+
+        if (inject_offset == -1 ||
+            (bytes && inject_offset >= offset &&
+             inject_offset < offset + bytes))
+        {
+            break;
+        }
+    }
+
+    if (!rule || !rule->options.inject.error) {
+        return 0;
+    }
+
+    immediately = rule->options.inject.immediately;
+    error = rule->options.inject.error;

    if (rule->options.inject.once) {
        QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
@@ -428,21 +526,18 @@ static int coroutine_fn
 blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                   QEMUIOVector *qiov, int flags)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
+    int err;

-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        uint64_t inject_offset = rule->options.inject.offset;
-
-        if (inject_offset == -1 ||
-            (inject_offset >= offset && inject_offset < offset + bytes))
-        {
-            break;
-        }
+    /* Sanity check block layer guarantees */
+    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
+    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
+    if (bs->bl.max_transfer) {
+        assert(bytes <= bs->bl.max_transfer);
    }

-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, rule);
+    err = rule_check(bs, offset, bytes);
+    if (err) {
+        return err;
    }

    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@@ -452,21 +547,18 @@ static int coroutine_fn
 blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                    QEMUIOVector *qiov, int flags)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
+    int err;

-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        uint64_t inject_offset = rule->options.inject.offset;
-
-        if (inject_offset == -1 ||
-            (inject_offset >= offset && inject_offset < offset + bytes))
-        {
-            break;
-        }
+    /* Sanity check block layer guarantees */
+    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
+    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
+    if (bs->bl.max_transfer) {
+        assert(bytes <= bs->bl.max_transfer);
    }

-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, rule);
+    err = rule_check(bs, offset, bytes);
+    if (err) {
+        return err;
    }

    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -474,22 +566,81 @@ blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,

 static int blkdebug_co_flush(BlockDriverState *bs)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
+    int err = rule_check(bs, 0, 0);

-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.offset == -1) {
-            break;
-        }
-    }
-
-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, rule);
+    if (err) {
+        return err;
    }

    return bdrv_co_flush(bs->file->bs);
 }

+static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
+                                                  int64_t offset, int count,
+                                                  BdrvRequestFlags flags)
+{
+    uint32_t align = MAX(bs->bl.request_alignment,
+                         bs->bl.pwrite_zeroes_alignment);
+    int err;
+
+    /* Only pass through requests that are larger than requested
+     * preferred alignment (so that we test the fallback to writes on
+     * unaligned portions), and check that the block layer never hands
+     * us anything unaligned that crosses an alignment boundary.  */
+    if (count < align) {
+        assert(QEMU_IS_ALIGNED(offset, align) ||
+               QEMU_IS_ALIGNED(offset + count, align) ||
+               DIV_ROUND_UP(offset, align) ==
+               DIV_ROUND_UP(offset + count, align));
+        return -ENOTSUP;
+    }
+    assert(QEMU_IS_ALIGNED(offset, align));
+    assert(QEMU_IS_ALIGNED(count, align));
+    if (bs->bl.max_pwrite_zeroes) {
+        assert(count <= bs->bl.max_pwrite_zeroes);
+    }
+
+    err = rule_check(bs, offset, count);
+    if (err) {
+        return err;
+    }
+
+    return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
+}
+
+static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
+                                             int64_t offset, int count)
+{
+    uint32_t align = bs->bl.pdiscard_alignment;
+    int err;
+
+    /* Only pass through requests that are larger than requested
+     * minimum alignment, and ensure that unaligned requests do not
+     * cross optimum discard boundaries. */
+    if (count < bs->bl.request_alignment) {
+        assert(QEMU_IS_ALIGNED(offset, align) ||
+               QEMU_IS_ALIGNED(offset + count, align) ||
+               DIV_ROUND_UP(offset, align) ==
+               DIV_ROUND_UP(offset + count, align));
+        return -ENOTSUP;
+    }
+    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
+    assert(QEMU_IS_ALIGNED(count, bs->bl.request_alignment));
+    if (align && count >= align) {
+        assert(QEMU_IS_ALIGNED(offset, align));
+        assert(QEMU_IS_ALIGNED(count, align));
+    }
+    if (bs->bl.max_pdiscard) {
+        assert(count <= bs->bl.max_pdiscard);
+    }
+
+    err = rule_check(bs, offset, count);
+    if (err) {
+        return err;
+    }
+
+    return bdrv_co_pdiscard(bs->file->bs, offset, count);
+}

 static void blkdebug_close(BlockDriverState *bs)
 {
@@ -661,9 +812,9 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
    return bdrv_getlength(bs->file->bs);
 }

-static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
+static int blkdebug_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
 {
-    return bdrv_truncate(bs->file, offset);
+    return bdrv_truncate(bs->file, offset, errp);
 }

 static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
@@ -695,10 +846,10 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
    }

    opts = qdict_new();
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));
+    qdict_put_str(opts, "driver", "blkdebug");

    QINCREF(bs->file->bs->full_open_options);
-    qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));
+    qdict_put(opts, "image", bs->file->bs->full_open_options);

    for (e = qdict_first(options); e; e = qdict_next(options, e)) {
        if (strcmp(qdict_entry_key(e), "x-image")) {
@@ -717,6 +868,21 @@ static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
    if (s->align) {
        bs->bl.request_alignment = s->align;
    }
+    if (s->max_transfer) {
+        bs->bl.max_transfer = s->max_transfer;
+    }
+    if (s->opt_write_zero) {
+        bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
+    }
+    if (s->max_write_zero) {
+        bs->bl.max_pwrite_zeroes = s->max_write_zero;
+    }
+    if (s->opt_discard) {
+        bs->bl.pdiscard_alignment = s->opt_discard;
+    }
+    if (s->max_discard) {
+        bs->bl.max_pdiscard = s->max_discard;
+    }
 }

 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
@@ -744,6 +910,8 @@ static BlockDriver bdrv_blkdebug = {
    .bdrv_co_preadv         = blkdebug_co_preadv,
    .bdrv_co_pwritev        = blkdebug_co_pwritev,
    .bdrv_co_flush_to_disk  = blkdebug_co_flush,
+    .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
+    .bdrv_co_pdiscard       = blkdebug_co_pdiscard,

    .bdrv_debug_event           = blkdebug_debug_event,
    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -37,9 +37,6 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,

    ret = 0;
 fail:
-    if (ret < 0) {
-        bdrv_unref_child(bs, bs->file);
-    }
    return ret;
 }

--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -67,7 +67,7 @@ static void blkverify_parse_filename(const char *filename, QDict *options,
    if (!strstart(filename, "blkverify:", &filename)) {
        /* There was no prefix; therefore, all options have to be already
           present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
+        qdict_put_str(options, "x-image", filename);
        return;
    }

@@ -84,7 +84,7 @@ static void blkverify_parse_filename(const char *filename, QDict *options,

    /* TODO Allow multi-level nesting and set file.filename here */
    filename = c + 1;
-    qdict_put(options, "x-image", qstring_from_str(filename));
+    qdict_put_str(options, "x-image", filename);
 }

 static QemuOptsList runtime_opts = {
@@ -142,9 +142,6 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,

    ret = 0;
 fail:
-    if (ret < 0) {
-        bdrv_unref_child(bs, bs->file);
-    }
    qemu_opts_del(opts);
    return ret;
 }
@@ -291,13 +288,12 @@ static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
        && s->test_file->bs->full_open_options)
    {
        QDict *opts = qdict_new();
-        qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify")));
+        qdict_put_str(opts, "driver", "blkverify");

        QINCREF(bs->file->bs->full_open_options);
-        qdict_put_obj(opts, "raw", QOBJECT(bs->file->bs->full_open_options));
+        qdict_put(opts, "raw", bs->file->bs->full_open_options);
        QINCREF(s->test_file->bs->full_open_options);
-        qdict_put_obj(opts, "test",
-                      QOBJECT(s->test_file->bs->full_open_options));
+        qdict_put(opts, "test", s->test_file->bs->full_open_options);

        bs->full_open_options = opts;
    }
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -130,6 +130,56 @@ static const char *blk_root_get_name(BdrvChild *child)
    return blk_name(child->opaque);
 }

+/*
+ * Notifies the user of the BlockBackend that migration has completed. qdev
+ * devices can tighten their permissions in response (specifically revoke
+ * shared write permissions that we needed for storage migration).
+ *
+ * If an error is returned, the VM cannot be allowed to be resumed.
+ */
+static void blk_root_activate(BdrvChild *child, Error **errp)
+{
+    BlockBackend *blk = child->opaque;
+    Error *local_err = NULL;
+
+    if (!blk->disable_perm) {
+        return;
+    }
+
+    blk->disable_perm = false;
+
+    blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        blk->disable_perm = true;
+        return;
+    }
+}
+
+static int blk_root_inactivate(BdrvChild *child)
+{
+    BlockBackend *blk = child->opaque;
+
+    if (blk->disable_perm) {
+        return 0;
+    }
+
+    /* Only inactivate BlockBackends for guest devices (which are inactive at
+     * this point because the VM is stopped) and unattached monitor-owned
+     * BlockBackends. If there is still any other user like a block job, then
+     * we simply can't inactivate the image. */
+    if (!blk->dev && !blk->name[0]) {
+        return -EPERM;
+    }
+
+    blk->disable_perm = true;
+    if (blk->root) {
+        bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
+    }
+
+    return 0;
+}
+
 static const BdrvChildRole child_root = {
    .inherit_options    = blk_root_inherit_options,

@@ -140,6 +190,9 @@ static const BdrvChildRole child_root = {

    .drained_begin      = blk_root_drained_begin,
    .drained_end        = blk_root_drained_end,
+
+    .activate           = blk_root_activate,
+    .inactivate         = blk_root_inactivate,
 };

 /*
@@ -420,7 +473,7 @@ void monitor_remove_blk(BlockBackend *blk)
 * Return @blk's name, a non-null string.
 * Returns an empty string iff @blk is not referenced by the monitor.
 */
-const char *blk_name(BlockBackend *blk)
+const char *blk_name(const BlockBackend *blk)
 {
    return blk->name ?: "";
 }
@@ -601,34 +654,6 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
    *shared_perm = blk->shared_perm;
 }

-/*
- * Notifies the user of all BlockBackends that migration has completed. qdev
- * devices can tighten their permissions in response (specifically revoke
- * shared write permissions that we needed for storage migration).
- *
- * If an error is returned, the VM cannot be allowed to be resumed.
- */
-void blk_resume_after_migration(Error **errp)
-{
-    BlockBackend *blk;
-    Error *local_err = NULL;
-
-    for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
-        if (!blk->disable_perm) {
-            continue;
-        }
-
-        blk->disable_perm = false;
-
-        blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            blk->disable_perm = true;
-            return;
-        }
-    }
-}
-
 static int blk_do_attach_dev(BlockBackend *blk, void *dev)
 {
    if (blk->dev) {
@@ -1746,13 +1771,14 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
                   BDRV_REQ_WRITE_COMPRESSED);
 }

-int blk_truncate(BlockBackend *blk, int64_t offset)
+int blk_truncate(BlockBackend *blk, int64_t offset, Error **errp)
 {
    if (!blk_is_available(blk)) {
+        error_setg(errp, "No medium inserted");
        return -ENOMEDIUM;
    }

-    return bdrv_truncate(blk->root, offset);
+    return bdrv_truncate(blk->root, offset, errp);
 }

 static void blk_pdiscard_entry(void *opaque)
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -110,7 +110,10 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

-    bs->read_only = true; /* no write support yet */
+    ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */
+    if (ret < 0) {
+        return ret;
+    }

    ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
    if (ret < 0) {
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -72,7 +72,10 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

-    bs->read_only = true;
+    ret = bdrv_set_read_only(bs, true, errp);
+    if (ret < 0) {
+        return ret;
+    }

    /* read header */
    ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
--- a/block/commit.c
+++ b/block/commit.c
@@ -151,7 +151,7 @@ static void coroutine_fn commit_run(void *opaque)
    }

    if (base_len < s->common.len) {
-        ret = blk_truncate(s->base, s->common.len);
+        ret = blk_truncate(s->base, s->common.len, NULL);
        if (ret) {
            goto out;
        }
@@ -511,8 +511,9 @@ int bdrv_commit(BlockDriverState *bs)
     * grow the backing file image if possible.  If not possible,
     * we must return an error */
    if (length > backing_length) {
-        ret = blk_truncate(backing, length);
+        ret = blk_truncate(backing, length, &local_err);
        if (ret < 0) {
+            error_report_err(local_err);
            goto ro_cleanup;
        }
    }
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -59,8 +59,8 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block,
                                      size_t offset,
                                      uint8_t *buf,
                                      size_t buflen,
-                                      Error **errp,
-                                      void *opaque)
+                                      void *opaque,
+                                      Error **errp)
 {
    BlockDriverState *bs = opaque;
    ssize_t ret;
@@ -86,8 +86,8 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
                                       size_t offset,
                                       const uint8_t *buf,
                                       size_t buflen,
-                                       Error **errp,
-                                       void *opaque)
+                                       void *opaque,
+                                       Error **errp)
 {
    struct BlockCryptoCreateData *data = opaque;
    ssize_t ret;
@@ -103,8 +103,8 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,

 static ssize_t block_crypto_init_func(QCryptoBlock *block,
                                      size_t headerlen,
-                                      Error **errp,
-                                      void *opaque)
+                                      void *opaque,
+                                      Error **errp)
 {
    struct BlockCryptoCreateData *data = opaque;
    int ret;
@@ -381,7 +381,8 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,
    return ret;
 }

-static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)
+static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,
+                                 Error **errp)
 {
    BlockCrypto *crypto = bs->opaque;
    size_t payload_offset =
@@ -389,7 +390,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)

    offset += payload_offset;

-    return bdrv_truncate(bs->file, offset);
+    return bdrv_truncate(bs->file, offset, errp);
 }

 static void block_crypto_close(BlockDriverState *bs)
--- a/block/curl.c
+++ b/block/curl.c
@@ -76,15 +76,12 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #define CURL_TIMEOUT_DEFAULT 5
 #define CURL_TIMEOUT_MAX 10000

-#define FIND_RET_NONE   0
-#define FIND_RET_OK     1
-#define FIND_RET_WAIT   2
-
 #define CURL_BLOCK_OPT_URL       "url"
 #define CURL_BLOCK_OPT_READAHEAD "readahead"
 #define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
 #define CURL_BLOCK_OPT_TIMEOUT "timeout"
 #define CURL_BLOCK_OPT_COOKIE    "cookie"
+#define CURL_BLOCK_OPT_COOKIE_SECRET "cookie-secret"
 #define CURL_BLOCK_OPT_USERNAME "username"
 #define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
 #define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
@@ -93,14 +90,17 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 struct BDRVCURLState;

 typedef struct CURLAIOCB {
-    BlockAIOCB common;
+    Coroutine *co;
    QEMUIOVector *qiov;

-    int64_t sector_num;
-    int nb_sectors;
+    uint64_t offset;
+    uint64_t bytes;
+    int ret;

    size_t start;
    size_t end;
+
+    QSIMPLEQ_ENTRY(CURLAIOCB) next;
 } CURLAIOCB;

 typedef struct CURLSocket {
@@ -115,7 +115,7 @@ typedef struct CURLState
    CURL *curl;
    QLIST_HEAD(, CURLSocket) sockets;
    char *orig_buf;
-    size_t buf_start;
+    uint64_t buf_start;
    size_t buf_off;
    size_t buf_len;
    char range[128];
@@ -126,7 +126,7 @@ typedef struct CURLState
 typedef struct BDRVCURLState {
    CURLM *multi;
    QEMUTimer timer;
-    size_t len;
+    uint64_t len;
    CURLState states[CURL_NUM_STATES];
    char *url;
    size_t readahead_size;
@@ -136,6 +136,7 @@ typedef struct BDRVCURLState {
    bool accept_range;
    AioContext *aio_context;
    QemuMutex mutex;
+    QSIMPLEQ_HEAD(, CURLAIOCB) free_state_waitq;
    char *username;
    char *password;
    char *proxyusername;
@@ -147,6 +148,7 @@ static void curl_multi_do(void *arg);
 static void curl_multi_read(void *arg);

 #ifdef NEED_CURL_TIMER_CALLBACK
+/* Called from curl_multi_do_locked, with s->mutex held.  */
 static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 {
    BDRVCURLState *s = opaque;
@@ -163,6 +165,7 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 }
 #endif

+/* Called from curl_multi_do_locked, with s->mutex held.  */
 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
                        void *userp, void *sp)
 {
@@ -212,6 +215,7 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
    return 0;
 }

+/* Called from curl_multi_do_locked, with s->mutex held.  */
 static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
    BDRVCURLState *s = opaque;
@@ -226,6 +230,7 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
    return realsize;
 }

+/* Called from curl_multi_do_locked, with s->mutex held.  */
 static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
    CURLState *s = ((CURLState*)opaque);
@@ -253,7 +258,7 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
            continue;

        if ((s->buf_off >= acb->end)) {
-            size_t request_length = acb->nb_sectors * BDRV_SECTOR_SIZE;
+            size_t request_length = acb->bytes;

            qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
                                acb->end - acb->start);
@@ -264,9 +269,11 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
                                  request_length - offset);
            }

-            acb->common.cb(acb->common.opaque, 0);
-            qemu_aio_unref(acb);
+            acb->ret = 0;
            s->acb[i] = NULL;
+            qemu_mutex_unlock(&s->s->mutex);
+            aio_co_wake(acb->co);
+            qemu_mutex_lock(&s->s->mutex);
        }
    }

@@ -275,18 +282,19 @@ read_end:
    return size * nmemb;
 }

-static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
-                         CURLAIOCB *acb)
+/* Called with s->mutex held.  */
+static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
+                          CURLAIOCB *acb)
 {
    int i;
-    size_t end = start + len;
-    size_t clamped_end = MIN(end, s->len);
-    size_t clamped_len = clamped_end - start;
+    uint64_t end = start + len;
+    uint64_t clamped_end = MIN(end, s->len);
+    uint64_t clamped_len = clamped_end - start;

    for (i=0; i<CURL_NUM_STATES; i++) {
        CURLState *state = &s->states[i];
-        size_t buf_end = (state->buf_start + state->buf_off);
-        size_t buf_fend = (state->buf_start + state->buf_len);
+        uint64_t buf_end = (state->buf_start + state->buf_off);
+        uint64_t buf_fend = (state->buf_start + state->buf_len);

        if (!state->orig_buf)
            continue;
@@ -305,9 +313,8 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
            if (clamped_len < len) {
                qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len);
            }
-            acb->common.cb(acb->common.opaque, 0);
-
-            return FIND_RET_OK;
+            acb->ret = 0;
+            return true;
        }

        // Wait for unfinished chunks
@@ -325,13 +332,13 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
            for (j=0; j<CURL_NUM_ACB; j++) {
                if (!state->acb[j]) {
                    state->acb[j] = acb;
-                    return FIND_RET_WAIT;
+                    return true;
                }
            }
        }
    }

-    return FIND_RET_NONE;
+    return false;
 }

 /* Called with s->mutex held.  */
@@ -376,11 +383,11 @@ static void curl_multi_check_completion(BDRVCURLState *s)
                        continue;
                    }

-                    qemu_mutex_unlock(&s->mutex);
-                    acb->common.cb(acb->common.opaque, -EIO);
-                    qemu_mutex_lock(&s->mutex);
-                    qemu_aio_unref(acb);
+                    acb->ret = -EIO;
                    state->acb[i] = NULL;
+                    qemu_mutex_unlock(&s->mutex);
+                    aio_co_wake(acb->co);
+                    qemu_mutex_lock(&s->mutex);
                }
            }

@@ -449,32 +456,28 @@ static void curl_multi_timeout_do(void *arg)
 #endif
 }

-static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
+/* Called with s->mutex held.  */
+static CURLState *curl_find_state(BDRVCURLState *s)
 {
    CURLState *state = NULL;
-    int i, j;
-
-    do {
-        for (i=0; i<CURL_NUM_STATES; i++) {
-            for (j=0; j<CURL_NUM_ACB; j++)
-                if (s->states[i].acb[j])
-                    continue;
-            if (s->states[i].in_use)
-                continue;
+    int i;

+    for (i = 0; i < CURL_NUM_STATES; i++) {
+        if (!s->states[i].in_use) {
            state = &s->states[i];
            state->in_use = 1;
            break;
        }
-        if (!state) {
-            aio_poll(bdrv_get_aio_context(bs), true);
-        }
-    } while(!state);
+    }
+    return state;
+}

+static int curl_init_state(BDRVCURLState *s, CURLState *state)
+{
    if (!state->curl) {
        state->curl = curl_easy_init();
        if (!state->curl) {
-            return NULL;
+            return -EIO;
        }
        curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
        curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
@@ -527,11 +530,18 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
    QLIST_INIT(&state->sockets);
    state->s = s;

-    return state;
+    return 0;
 }

+/* Called with s->mutex held.  */
 static void curl_clean_state(CURLState *s)
 {
+    CURLAIOCB *next;
+    int j;
+    for (j = 0; j < CURL_NUM_ACB; j++) {
+        assert(!s->acb[j]);
+    }
+
    if (s->s->multi)
        curl_multi_remove_handle(s->s->multi, s->curl);

@@ -543,12 +553,20 @@ static void curl_clean_state(CURLState *s)
    }

    s->in_use = 0;
+
+    next = QSIMPLEQ_FIRST(&s->s->free_state_waitq);
+    if (next) {
+        QSIMPLEQ_REMOVE_HEAD(&s->s->free_state_waitq, next);
+        qemu_mutex_unlock(&s->s->mutex);
+        aio_co_wake(next->co);
+        qemu_mutex_lock(&s->s->mutex);
+    }
 }

 static void curl_parse_filename(const char *filename, QDict *options,
                                Error **errp)
 {
-    qdict_put(options, CURL_BLOCK_OPT_URL, qstring_from_str(filename));
+    qdict_put_str(options, CURL_BLOCK_OPT_URL, filename);
 }

 static void curl_detach_aio_context(BlockDriverState *bs)
@@ -556,6 +574,7 @@ static void curl_detach_aio_context(BlockDriverState *bs)
    BDRVCURLState *s = bs->opaque;
    int i;

+    qemu_mutex_lock(&s->mutex);
    for (i = 0; i < CURL_NUM_STATES; i++) {
        if (s->states[i].in_use) {
            curl_clean_state(&s->states[i]);
@@ -571,6 +590,7 @@ static void curl_detach_aio_context(BlockDriverState *bs)
        curl_multi_cleanup(s->multi);
        s->multi = NULL;
    }
+    qemu_mutex_unlock(&s->mutex);

    timer_del(&s->timer);
 }
@@ -623,6 +643,11 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "Pass the cookie or list of cookies with each request"
        },
+        {
+            .name = CURL_BLOCK_OPT_COOKIE_SECRET,
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret used as cookie passed with each request"
+        },
        {
            .name = CURL_BLOCK_OPT_USERNAME,
            .type = QEMU_OPT_STRING,
@@ -657,6 +682,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    Error *local_err = NULL;
    const char *file;
    const char *cookie;
+    const char *cookie_secret;
    double d;
    const char *secretid;
    const char *protocol_delimiter;
@@ -668,6 +694,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
        return -EROFS;
    }

+    qemu_mutex_init(&s->mutex);
    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -693,7 +720,22 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true);

    cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE);
-    s->cookie = g_strdup(cookie);
+    cookie_secret = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE_SECRET);
+
+    if (cookie && cookie_secret) {
+        error_setg(errp,
+                   "curl driver cannot handle both cookie and cookie secret");
+        goto out_noclean;
+    }
+
+    if (cookie_secret) {
+        s->cookie = qcrypto_secret_lookup_as_utf8(cookie_secret, errp);
+        if (!s->cookie) {
+            goto out_noclean;
+        }
+    } else {
+        s->cookie = g_strdup(cookie);
+    }

    file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL);
    if (file == NULL) {
@@ -736,14 +778,22 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    }

    DPRINTF("CURL: Opening %s\n", file);
+    QSIMPLEQ_INIT(&s->free_state_waitq);
    s->aio_context = bdrv_get_aio_context(bs);
    s->url = g_strdup(file);
-    state = curl_init_state(bs, s);
-    if (!state)
+    qemu_mutex_lock(&s->mutex);
+    state = curl_find_state(s);
+    qemu_mutex_unlock(&s->mutex);
+    if (!state) {
        goto out_noclean;
+    }

    // Get file size

+    if (curl_init_state(s, state) < 0) {
+        goto out;
+    }
+
    s->accept_range = false;
    curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1);
    curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION,
@@ -771,7 +821,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    }
 #endif

-    s->len = (size_t)d;
+    s->len = d;

    if ((!strncasecmp(s->url, "http://", strlen("http://"))
        || !strncasecmp(s->url, "https://", strlen("https://")))
@@ -780,13 +830,14 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
                "Server does not support 'range' (byte ranges).");
        goto out;
    }
-    DPRINTF("CURL: Size = %zd\n", s->len);
+    DPRINTF("CURL: Size = %" PRIu64 "\n", s->len);

+    qemu_mutex_lock(&s->mutex);
    curl_clean_state(state);
+    qemu_mutex_unlock(&s->mutex);
    curl_easy_cleanup(state->curl);
    state->curl = NULL;

-    qemu_mutex_init(&s->mutex);
    curl_attach_aio_context(bs, bdrv_get_aio_context(bs));

    qemu_opts_del(opts);
@@ -797,53 +848,51 @@ out:
    curl_easy_cleanup(state->curl);
    state->curl = NULL;
 out_noclean:
+    qemu_mutex_destroy(&s->mutex);
    g_free(s->cookie);
    g_free(s->url);
    qemu_opts_del(opts);
    return -EINVAL;
 }

-static const AIOCBInfo curl_aiocb_info = {
-    .aiocb_size         = sizeof(CURLAIOCB),
-};
-
-
-static void curl_readv_bh_cb(void *p)
+static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
 {
    CURLState *state;
    int running;
-    int ret = -EINPROGRESS;

-    CURLAIOCB *acb = p;
-    BlockDriverState *bs = acb->common.bs;
    BDRVCURLState *s = bs->opaque;

-    size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
-    size_t end;
+    uint64_t start = acb->offset;
+    uint64_t end;

    qemu_mutex_lock(&s->mutex);

    // In case we have the requested data already (e.g. read-ahead),
    // we can just call the callback and be done.
-    switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
-        case FIND_RET_OK:
-            qemu_aio_unref(acb);
-            // fall through
-        case FIND_RET_WAIT:
-            goto out;
-        default:
-            break;
+    if (curl_find_buf(s, start, acb->bytes, acb)) {
+        goto out;
    }

    // No cache found, so let's start a new request
-    state = curl_init_state(acb->common.bs, s);
-    if (!state) {
-        ret = -EIO;
+    for (;;) {
+        state = curl_find_state(s);
+        if (state) {
+            break;
+        }
+        QSIMPLEQ_INSERT_TAIL(&s->free_state_waitq, acb, next);
+        qemu_mutex_unlock(&s->mutex);
+        qemu_coroutine_yield();
+        qemu_mutex_lock(&s->mutex);
+    }
+
+    if (curl_init_state(s, state) < 0) {
+        curl_clean_state(state);
+        acb->ret = -EIO;
        goto out;
    }

    acb->start = 0;
-    acb->end = MIN(acb->nb_sectors * BDRV_SECTOR_SIZE, s->len - start);
+    acb->end = MIN(acb->bytes, s->len - start);

    state->buf_off = 0;
    g_free(state->orig_buf);
@@ -853,14 +902,14 @@ static void curl_readv_bh_cb(void *p)
    state->orig_buf = g_try_malloc(state->buf_len);
    if (state->buf_len && state->orig_buf == NULL) {
        curl_clean_state(state);
-        ret = -ENOMEM;
+        acb->ret = -ENOMEM;
        goto out;
    }
    state->acb[0] = acb;

-    snprintf(state->range, 127, "%zd-%zd", start, end);
-    DPRINTF("CURL (AIO): Reading %llu at %zd (%s)\n",
-            (acb->nb_sectors * BDRV_SECTOR_SIZE), start, state->range);
+    snprintf(state->range, 127, "%" PRIu64 "-%" PRIu64, start, end);
+    DPRINTF("CURL (AIO): Reading %" PRIu64 " at %" PRIu64 " (%s)\n",
+            acb->bytes, start, state->range);
    curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);

    curl_multi_add_handle(s->multi, state->curl);
@@ -870,26 +919,24 @@ static void curl_readv_bh_cb(void *p)

 out:
    qemu_mutex_unlock(&s->mutex);
-    if (ret != -EINPROGRESS) {
-        acb->common.cb(acb->common.opaque, ret);
-        qemu_aio_unref(acb);
-    }
 }

-static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn curl_co_preadv(BlockDriverState *bs,
+        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    CURLAIOCB *acb;
+    CURLAIOCB acb = {
+        .co = qemu_coroutine_self(),
+        .ret = -EINPROGRESS,
+        .qiov = qiov,
+        .offset = offset,
+        .bytes = bytes
+    };

-    acb = qemu_aio_get(&curl_aiocb_info, bs, cb, opaque);
-
-    acb->qiov = qiov;
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-
-    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
-    return &acb->common;
+    curl_setup_preadv(bs, &acb);
+    while (acb.ret == -EINPROGRESS) {
+        qemu_coroutine_yield();
+    }
+    return acb.ret;
 }

 static void curl_close(BlockDriverState *bs)
@@ -920,7 +967,7 @@ static BlockDriver bdrv_http = {
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,

-    .bdrv_aio_readv             = curl_aio_readv,
+    .bdrv_co_preadv             = curl_co_preadv,

    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
@@ -936,7 +983,7 @@ static BlockDriver bdrv_https = {
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,

-    .bdrv_aio_readv             = curl_aio_readv,
+    .bdrv_co_preadv             = curl_co_preadv,

    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
@@ -952,7 +999,7 @@ static BlockDriver bdrv_ftp = {
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,

-    .bdrv_aio_readv             = curl_aio_readv,
+    .bdrv_co_preadv             = curl_co_preadv,

    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
@@ -968,7 +1015,7 @@ static BlockDriver bdrv_ftps = {
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,

-    .bdrv_aio_readv             = curl_aio_readv,
+    .bdrv_co_preadv             = curl_co_preadv,

    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -419,8 +419,12 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

+    ret = bdrv_set_read_only(bs, true, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
    block_module_load_one("dmg-bz2");
-    bs->read_only = true;

    s->n_chunks = 0;
    s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -25,8 +25,6 @@
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
-#include "qemu/timer.h"
-#include "qemu/log.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "trace.h"
@@ -131,12 +129,23 @@ do { \

 #define MAX_BLOCKSIZE	4096

+/* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes,
+ * leaving a few more bytes for its future use. */
+#define RAW_LOCK_PERM_BASE             100
+#define RAW_LOCK_SHARED_BASE           200
+
 typedef struct BDRVRawState {
    int fd;
+    int lock_fd;
+    bool use_lock;
    int type;
    int open_flags;
    size_t buf_align;

+    /* The current permissions. */
+    uint64_t perm;
+    uint64_t shared_perm;
+
 #ifdef CONFIG_XFS
    bool is_xfs:1;
 #endif
@@ -377,7 +386,7 @@ static void raw_parse_filename(const char *filename, QDict *options,
     * function call can be ignored. */
    strstart(filename, "file:", &filename);

-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+    qdict_put_str(options, "filename", filename);
 }

 static QemuOptsList raw_runtime_opts = {
@@ -394,6 +403,11 @@ static QemuOptsList raw_runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "host AIO implementation (threads, native)",
        },
+        {
+            .name = "locking",
+            .type = QEMU_OPT_STRING,
+            .help = "file locking mode (on/off/auto, default: auto)",
+        },
        { /* end of list */ }
    },
 };
@@ -408,6 +422,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    BlockdevAioOptions aio, aio_default;
    int fd, ret;
    struct stat st;
+    OnOffAuto locking;

    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -437,6 +452,37 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    }
    s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);

+    locking = qapi_enum_parse(OnOffAuto_lookup, qemu_opt_get(opts, "locking"),
+                              ON_OFF_AUTO__MAX, ON_OFF_AUTO_AUTO, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+    switch (locking) {
+    case ON_OFF_AUTO_ON:
+        s->use_lock = true;
+#ifndef F_OFD_SETLK
+        fprintf(stderr,
+                "File lock requested but OFD locking syscall is unavailable, "
+                "falling back to POSIX file locks.\n"
+                "Due to the implementation, locks can be lost unexpectedly.\n");
+#endif
+        break;
+    case ON_OFF_AUTO_OFF:
+        s->use_lock = false;
+        break;
+    case ON_OFF_AUTO_AUTO:
+#ifdef F_OFD_SETLK
+        s->use_lock = true;
+#else
+        s->use_lock = false;
+#endif
+        break;
+    default:
+        abort();
+    }
+
    s->open_flags = open_flags;
    raw_parse_flags(bdrv_flags, &s->open_flags);

@@ -452,6 +498,21 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    }
    s->fd = fd;

+    s->lock_fd = -1;
+    if (s->use_lock) {
+        fd = qemu_open(filename, s->open_flags);
+        if (fd < 0) {
+            ret = -errno;
+            error_setg_errno(errp, errno, "Could not open '%s' for locking",
+                             filename);
+            qemu_close(s->fd);
+            goto fail;
+        }
+        s->lock_fd = fd;
+    }
+    s->perm = 0;
+    s->shared_perm = BLK_PERM_ALL;
+
 #ifdef CONFIG_LINUX_AIO
     /* Currently Linux does AIO only for files opened with O_DIRECT */
    if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
@@ -539,6 +600,161 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
    return raw_open_common(bs, options, flags, 0, errp);
 }

+typedef enum {
+    RAW_PL_PREPARE,
+    RAW_PL_COMMIT,
+    RAW_PL_ABORT,
+} RawPermLockOp;
+
+#define PERM_FOREACH(i) \
+    for ((i) = 0; (1ULL << (i)) <= BLK_PERM_ALL; i++)
+
+/* Lock bytes indicated by @perm_lock_bits and @shared_perm_lock_bits in the
+ * file; if @unlock == true, also unlock the unneeded bytes.
+ * @shared_perm_lock_bits is the mask of all permissions that are NOT shared.
+ */
+static int raw_apply_lock_bytes(BDRVRawState *s,
+                                uint64_t perm_lock_bits,
+                                uint64_t shared_perm_lock_bits,
+                                bool unlock, Error **errp)
+{
+    int ret;
+    int i;
+
+    PERM_FOREACH(i) {
+        int off = RAW_LOCK_PERM_BASE + i;
+        if (perm_lock_bits & (1ULL << i)) {
+            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
+            if (ret) {
+                error_setg(errp, "Failed to lock byte %d", off);
+                return ret;
+            }
+        } else if (unlock) {
+            ret = qemu_unlock_fd(s->lock_fd, off, 1);
+            if (ret) {
+                error_setg(errp, "Failed to unlock byte %d", off);
+                return ret;
+            }
+        }
+    }
+    PERM_FOREACH(i) {
+        int off = RAW_LOCK_SHARED_BASE + i;
+        if (shared_perm_lock_bits & (1ULL << i)) {
+            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
+            if (ret) {
+                error_setg(errp, "Failed to lock byte %d", off);
+                return ret;
+            }
+        } else if (unlock) {
+            ret = qemu_unlock_fd(s->lock_fd, off, 1);
+            if (ret) {
+                error_setg(errp, "Failed to unlock byte %d", off);
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+/* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */
+static int raw_check_lock_bytes(BDRVRawState *s,
+                                uint64_t perm, uint64_t shared_perm,
+                                Error **errp)
+{
+    int ret;
+    int i;
+
+    PERM_FOREACH(i) {
+        int off = RAW_LOCK_SHARED_BASE + i;
+        uint64_t p = 1ULL << i;
+        if (perm & p) {
+            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
+            if (ret) {
+                char *perm_name = bdrv_perm_names(p);
+                error_setg(errp,
+                           "Failed to get \"%s\" lock",
+                           perm_name);
+                g_free(perm_name);
+                error_append_hint(errp,
+                                  "Is another process using the image?\n");
+                return ret;
+            }
+        }
+    }
+    PERM_FOREACH(i) {
+        int off = RAW_LOCK_PERM_BASE + i;
+        uint64_t p = 1ULL << i;
+        if (!(shared_perm & p)) {
+            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
+            if (ret) {
+                char *perm_name = bdrv_perm_names(p);
+                error_setg(errp,
+                           "Failed to get shared \"%s\" lock",
+                           perm_name);
+                g_free(perm_name);
+                error_append_hint(errp,
+                                  "Is another process using the image?\n");
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+static int raw_handle_perm_lock(BlockDriverState *bs,
+                                RawPermLockOp op,
+                                uint64_t new_perm, uint64_t new_shared,
+                                Error **errp)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret = 0;
+    Error *local_err = NULL;
+
+    if (!s->use_lock) {
+        return 0;
+    }
+
+    if (bdrv_get_flags(bs) & BDRV_O_INACTIVE) {
+        return 0;
+    }
+
+    assert(s->lock_fd > 0);
+
+    switch (op) {
+    case RAW_PL_PREPARE:
+        ret = raw_apply_lock_bytes(s, s->perm | new_perm,
+                                   ~s->shared_perm | ~new_shared,
+                                   false, errp);
+        if (!ret) {
+            ret = raw_check_lock_bytes(s, new_perm, new_shared, errp);
+            if (!ret) {
+                return 0;
+            }
+        }
+        op = RAW_PL_ABORT;
+        /* fall through to unlock bytes. */
+    case RAW_PL_ABORT:
+        raw_apply_lock_bytes(s, s->perm, ~s->shared_perm, true, &local_err);
+        if (local_err) {
+            /* Theoretically the above call only unlocks bytes and it cannot
+             * fail. Something weird happened, report it.
+             */
+            error_report_err(local_err);
+        }
+        break;
+    case RAW_PL_COMMIT:
+        raw_apply_lock_bytes(s, new_perm, ~new_shared, true, &local_err);
+        if (local_err) {
+            /* Theoretically the above call only unlocks bytes and it cannot
+             * fail. Something weird happened, report it.
+             */
+            error_report_err(local_err);
+        }
+        break;
+    }
+    return ret;
+}
+
 static int raw_reopen_prepare(BDRVReopenState *state,
                              BlockReopenQueue *queue, Error **errp)
 {
@@ -1407,26 +1623,37 @@ static void raw_close(BlockDriverState *bs)
        qemu_close(s->fd);
        s->fd = -1;
    }
+    if (s->lock_fd >= 0) {
+        qemu_close(s->lock_fd);
+        s->lock_fd = -1;
+    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset)
+static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
 {
    BDRVRawState *s = bs->opaque;
    struct stat st;
+    int ret;

    if (fstat(s->fd, &st)) {
-        return -errno;
+        ret = -errno;
+        error_setg_errno(errp, -ret, "Failed to fstat() the file");
+        return ret;
    }

    if (S_ISREG(st.st_mode)) {
        if (ftruncate(s->fd, offset) < 0) {
-            return -errno;
+            ret = -errno;
+            error_setg_errno(errp, -ret, "Failed to resize the file");
+            return ret;
        }
    } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
-       if (offset > raw_getlength(bs)) {
-           return -EINVAL;
-       }
+        if (offset > raw_getlength(bs)) {
+            error_setg(errp, "Cannot grow device files");
+            return -EINVAL;
+        }
    } else {
+        error_setg(errp, "Resizing this file is not supported");
        return -ENOTSUP;
    }

@@ -1944,6 +2171,25 @@ static QemuOptsList raw_create_opts = {
    }
 };

+static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
+                          Error **errp)
+{
+    return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
+}
+
+static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
+{
+    BDRVRawState *s = bs->opaque;
+    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
+    s->perm = perm;
+    s->shared_perm = shared;
+}
+
+static void raw_abort_perm_update(BlockDriverState *bs)
+{
+    raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
+}
+
 BlockDriver bdrv_file = {
    .format_name = "file",
    .protocol_name = "file",
@@ -1974,7 +2220,9 @@ BlockDriver bdrv_file = {
    .bdrv_get_info = raw_get_info,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,
-
+    .bdrv_check_perm = raw_check_perm,
+    .bdrv_set_perm   = raw_set_perm,
+    .bdrv_abort_perm_update = raw_abort_perm_update,
    .create_opts = &raw_create_opts,
 };

@@ -2150,7 +2398,7 @@ static void hdev_parse_filename(const char *filename, QDict *options,
    /* The prefix is optional, just as for "file". */
    strstart(filename, "host_device:", &filename);

-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+    qdict_put_str(options, "filename", filename);
 }

 static bool hdev_is_sg(BlockDriverState *bs)
@@ -2239,7 +2487,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
            goto hdev_open_Mac_error;
        }

-        qdict_put(options, "filename", qstring_from_str(bsd_path));
+        qdict_put_str(options, "filename", bsd_path);

 hdev_open_Mac_error:
        g_free(mediaType);
@@ -2433,6 +2681,9 @@ static BlockDriver bdrv_host_device = {
    .bdrv_get_info = raw_get_info,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,
+    .bdrv_check_perm = raw_check_perm,
+    .bdrv_set_perm   = raw_set_perm,
+    .bdrv_abort_perm_update = raw_abort_perm_update,
    .bdrv_probe_blocksizes = hdev_probe_blocksizes,
    .bdrv_probe_geometry = hdev_probe_geometry,

@@ -2449,7 +2700,7 @@ static void cdrom_parse_filename(const char *filename, QDict *options,
    /* The prefix is optional, just as for "file". */
    strstart(filename, "host_cdrom:", &filename);

-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+    qdict_put_str(options, "filename", filename);
 }
 #endif

--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -24,7 +24,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/cutils.h"
-#include "qemu/timer.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "block/raw-aio.h"
@@ -282,7 +281,7 @@ static void raw_parse_filename(const char *filename, QDict *options,
     * function call can be ignored. */
    strstart(filename, "file:", &filename);

-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+    qdict_put_str(options, "filename", filename);
 }

 static QemuOptsList raw_runtime_opts = {
@@ -345,6 +344,12 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

+    if (qdict_get_try_bool(options, "locking", false)) {
+        error_setg(errp, "locking=on is not supported on Windows");
+        ret = -EINVAL;
+        goto fail;
+    }
+
    filename = qemu_opt_get(opts, "filename");

    use_aio = get_aio_option(opts, flags, &local_err);
@@ -461,7 +466,7 @@ static void raw_close(BlockDriverState *bs)
    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset)
+static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
 {
    BDRVRawState *s = bs->opaque;
    LONG low, high;
@@ -476,11 +481,11 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)
     */
    dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN);
    if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) {
-        fprintf(stderr, "SetFilePointer error: %lu\n", GetLastError());
+        error_setg_win32(errp, GetLastError(), "SetFilePointer error");
        return -EIO;
    }
    if (SetEndOfFile(s->hfile) == 0) {
-        fprintf(stderr, "SetEndOfFile error: %lu\n", GetLastError());
+        error_setg_win32(errp, GetLastError(), "SetEndOfFile error");
        return -EIO;
    }
    return 0;
@@ -669,7 +674,7 @@ static void hdev_parse_filename(const char *filename, QDict *options,
    /* The prefix is optional, just as for "file". */
    strstart(filename, "host_device:", &filename);

-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+    qdict_put_str(options, "filename", filename);
 }

 static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -321,7 +321,7 @@ static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
 static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
                                  const char *filename)
 {
-    SocketAddressFlat *gsconf;
+    SocketAddress *gsconf;
    URI *uri;
    QueryParams *qp = NULL;
    bool is_unix = false;
@@ -332,19 +332,19 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
        return -EINVAL;
    }

-    gconf->server = g_new0(SocketAddressFlatList, 1);
-    gconf->server->value = gsconf = g_new0(SocketAddressFlat, 1);
+    gconf->server = g_new0(SocketAddressList, 1);
+    gconf->server->value = gsconf = g_new0(SocketAddress, 1);

    /* transport */
    if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
-        gsconf->type = SOCKET_ADDRESS_FLAT_TYPE_INET;
+        gsconf->type = SOCKET_ADDRESS_TYPE_INET;
    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
-        gsconf->type = SOCKET_ADDRESS_FLAT_TYPE_INET;
+        gsconf->type = SOCKET_ADDRESS_TYPE_INET;
    } else if (!strcmp(uri->scheme, "gluster+unix")) {
-        gsconf->type = SOCKET_ADDRESS_FLAT_TYPE_UNIX;
+        gsconf->type = SOCKET_ADDRESS_TYPE_UNIX;
        is_unix = true;
    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
-        gsconf->type = SOCKET_ADDRESS_FLAT_TYPE_INET;
+        gsconf->type = SOCKET_ADDRESS_TYPE_INET;
        error_report("Warning: rdma feature is not supported, falling "
                     "back to tcp");
    } else {
@@ -396,7 +396,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
    struct glfs *glfs;
    int ret;
    int old_errno;
-    SocketAddressFlatList *server;
+    SocketAddressList *server;
    unsigned long long port;

    glfs = glfs_find_preopened(gconf->volume);
@@ -413,11 +413,11 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,

    for (server = gconf->server; server; server = server->next) {
        switch (server->value->type) {
-        case SOCKET_ADDRESS_FLAT_TYPE_UNIX:
+        case SOCKET_ADDRESS_TYPE_UNIX:
            ret = glfs_set_volfile_server(glfs, "unix",
                                   server->value->u.q_unix.path, 0);
            break;
-        case SOCKET_ADDRESS_FLAT_TYPE_INET:
+        case SOCKET_ADDRESS_TYPE_INET:
            if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 ||
                port > 65535) {
                error_setg(errp, "'%s' is not a valid port number",
@@ -429,8 +429,8 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
                                   server->value->u.inet.host,
                                   (int)port);
            break;
-        case SOCKET_ADDRESS_FLAT_TYPE_VSOCK:
-        case SOCKET_ADDRESS_FLAT_TYPE_FD:
+        case SOCKET_ADDRESS_TYPE_VSOCK:
+        case SOCKET_ADDRESS_TYPE_FD:
        default:
            abort();
        }
@@ -450,7 +450,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
        error_setg(errp, "Gluster connection for volume %s, path %s failed"
                         " to connect", gconf->volume, gconf->path);
        for (server = gconf->server; server; server = server->next) {
-            if (server->value->type  == SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
+            if (server->value->type  == SOCKET_ADDRESS_TYPE_UNIX) {
                error_append_hint(errp, "hint: failed on socket %s ",
                                  server->value->u.q_unix.path);
            } else {
@@ -487,8 +487,8 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
                                  QDict *options, Error **errp)
 {
    QemuOpts *opts;
-    SocketAddressFlat *gsconf = NULL;
-    SocketAddressFlatList *curr = NULL;
+    SocketAddress *gsconf = NULL;
+    SocketAddressList *curr = NULL;
    QDict *backing_options = NULL;
    Error *local_err = NULL;
    char *str = NULL;
@@ -542,14 +542,14 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
            goto out;

        }
-        gsconf = g_new0(SocketAddressFlat, 1);
+        gsconf = g_new0(SocketAddress, 1);
        if (!strcmp(ptr, "tcp")) {
            ptr = "inet";       /* accept legacy "tcp" */
        }
-        type = qapi_enum_parse(SocketAddressFlatType_lookup, ptr,
-                               SOCKET_ADDRESS_FLAT_TYPE__MAX, -1, NULL);
-        if (type != SOCKET_ADDRESS_FLAT_TYPE_INET
-            && type != SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
+        type = qapi_enum_parse(SocketAddressType_lookup, ptr,
+                               SOCKET_ADDRESS_TYPE__MAX, -1, NULL);
+        if (type != SOCKET_ADDRESS_TYPE_INET
+            && type != SOCKET_ADDRESS_TYPE_UNIX) {
            error_setg(&local_err,
                       "Parameter '%s' may be 'inet' or 'unix'",
                       GLUSTER_OPT_TYPE);
@@ -559,7 +559,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
        gsconf->type = type;
        qemu_opts_del(opts);

-        if (gsconf->type == SOCKET_ADDRESS_FLAT_TYPE_INET) {
+        if (gsconf->type == SOCKET_ADDRESS_TYPE_INET) {
            /* create opts info from runtime_inet_opts list */
            opts = qemu_opts_create(&runtime_inet_opts, NULL, 0, &error_abort);
            qemu_opts_absorb_qdict(opts, backing_options, &local_err);
@@ -628,11 +628,11 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
        }

        if (gconf->server == NULL) {
-            gconf->server = g_new0(SocketAddressFlatList, 1);
+            gconf->server = g_new0(SocketAddressList, 1);
            gconf->server->value = gsconf;
            curr = gconf->server;
        } else {
-            curr->next = g_new0(SocketAddressFlatList, 1);
+            curr->next = g_new0(SocketAddressList, 1);
            curr->next->value = gsconf;
            curr = curr->next;
        }
@@ -648,7 +648,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,

 out:
    error_propagate(errp, local_err);
-    qapi_free_SocketAddressFlat(gsconf);
+    qapi_free_SocketAddress(gsconf);
    qemu_opts_del(opts);
    g_free(str);
    QDECREF(backing_options);
@@ -1092,14 +1092,17 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
    return acb.ret;
 }

-static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
+static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset,
+                                 Error **errp)
 {
    int ret;
    BDRVGlusterState *s = bs->opaque;

    ret = glfs_ftruncate(s->fd, offset);
    if (ret < 0) {
-        return -errno;
+        ret = -errno;
+        error_setg_errno(errp, -ret, "Failed to truncate file");
+        return ret;
    }

    return 0;
--- a/block/io.c
+++ b/block/io.c
@@ -1362,16 +1362,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
    assert(!waited || !req->serialising);
    assert(req->overlap_offset <= offset);
    assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
-    /* FIXME: Block migration uses the BlockBackend of the guest device at a
-     *        point when it has not yet taken write permissions. This will be
-     *        fixed by a future patch, but for now we have to bypass this
-     *        assertion for block migration to work. */
-    // assert(child->perm & BLK_PERM_WRITE);
-    /* FIXME: Because of the above, we also cannot guarantee that all format
-     *        BDS take the BLK_PERM_RESIZE permission on their file BDS, since
-     *        they are not obligated to do so if they do not have any parent
-     *        that has taken the permission to write to them. */
-    // assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
+    assert(child->perm & BLK_PERM_WRITE);
+    assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);

    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);

@@ -1452,7 +1444,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
    int ret = 0;

    head_padding_bytes = offset & (align - 1);
-    tail_padding_bytes = align - ((offset + bytes) & (align - 1));
+    tail_padding_bytes = (align - (offset + bytes)) & (align - 1);


    assert(flags & BDRV_REQ_ZERO_WRITE);
@@ -1792,8 +1784,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,

    if (ret & BDRV_BLOCK_RAW) {
        assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        ret = bdrv_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
-                                    *pnum, pnum, file);
+        ret = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
+                                       *pnum, pnum, file);
        goto out;
    }

@@ -2308,7 +2300,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)

    bdrv_inc_in_flight(bs);

-    if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
+    if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
        bdrv_is_sg(bs)) {
        goto early_exit;
    }
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -2059,22 +2059,24 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
    }
 }

-static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
+static int iscsi_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
 {
    IscsiLun *iscsilun = bs->opaque;
    Error *local_err = NULL;

    if (iscsilun->type != TYPE_DISK) {
+        error_setg(errp, "Cannot resize non-disk iSCSI devices");
        return -ENOTSUP;
    }

    iscsi_readcapacity_sync(iscsilun, &local_err);
    if (local_err != NULL) {
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return -EIO;
    }

    if (offset > iscsi_getlength(bs)) {
+        error_setg(errp, "Cannot grow iSCSI devices");
        return -EINVAL;
    }

--- a/block/mirror.c
+++ b/block/mirror.c
@@ -724,7 +724,7 @@ static void coroutine_fn mirror_run(void *opaque)
        }

        if (s->bdev_length > base_length) {
-            ret = blk_truncate(s->target, s->bdev_length);
+            ret = blk_truncate(s->target, s->bdev_length, NULL);
            if (ret < 0) {
                goto immediate_exit;
            }
@@ -1112,10 +1112,11 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                             BlockdevOnError on_target_error,
                             bool unmap,
                             BlockCompletionFunc *cb,
-                             void *opaque, Error **errp,
+                             void *opaque,
                             const BlockJobDriver *driver,
                             bool is_none_mode, BlockDriverState *base,
-                             bool auto_complete, const char *filter_node_name)
+                             bool auto_complete, const char *filter_node_name,
+                             Error **errp)
 {
    MirrorBlockJob *s;
    BlockDriverState *mirror_top_bs;
@@ -1280,17 +1281,17 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
    mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
                     speed, granularity, buf_size, backing_mode,
-                     on_source_error, on_target_error, unmap, NULL, NULL, errp,
+                     on_source_error, on_target_error, unmap, NULL, NULL,
                     &mirror_job_driver, is_none_mode, base, false,
-                     filter_node_name);
+                     filter_node_name, errp);
 }

 void commit_active_start(const char *job_id, BlockDriverState *bs,
                         BlockDriverState *base, int creation_flags,
                         int64_t speed, BlockdevOnError on_error,
                         const char *filter_node_name,
-                         BlockCompletionFunc *cb, void *opaque, Error **errp,
-                         bool auto_complete)
+                         BlockCompletionFunc *cb, void *opaque,
+                         bool auto_complete, Error **errp)
 {
    int orig_base_flags;
    Error *local_err = NULL;
@@ -1303,9 +1304,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,

    mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                     MIRROR_LEAVE_BACKING_CHAIN,
-                     on_error, on_error, true, cb, opaque, &local_err,
+                     on_error, on_error, true, cb, opaque,
                     &commit_active_job_driver, false, base, auto_complete,
-                     filter_node_name);
+                     filter_node_name, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto error_restore_flags;
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -47,7 +47,7 @@ typedef struct BDRVNBDState {
    NBDClientSession client;

    /* For nbd_refresh_filename() */
-    SocketAddressFlat *saddr;
+    SocketAddress *saddr;
    char *export, *tlscredsid;
 } BDRVNBDState;

@@ -79,7 +79,7 @@ static int nbd_parse_uri(const char *filename, QDict *options)
    p = uri->path ? uri->path : "/";
    p += strspn(p, "/");
    if (p[0]) {
-        qdict_put(options, "export", qstring_from_str(p));
+        qdict_put_str(options, "export", p);
    }

    qp = query_params_parse(uri->query);
@@ -94,9 +94,8 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            ret = -EINVAL;
            goto out;
        }
-        qdict_put(options, "server.type", qstring_from_str("unix"));
-        qdict_put(options, "server.path",
-                  qstring_from_str(qp->p[0].value));
+        qdict_put_str(options, "server.type", "unix");
+        qdict_put_str(options, "server.path", qp->p[0].value);
    } else {
        QString *host;
        char *port_str;
@@ -115,11 +114,11 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            host = qstring_from_str(uri->server);
        }

-        qdict_put(options, "server.type", qstring_from_str("inet"));
+        qdict_put_str(options, "server.type", "inet");
        qdict_put(options, "server.host", host);

        port_str = g_strdup_printf("%d", uri->port ?: NBD_DEFAULT_PORT);
-        qdict_put(options, "server.port", qstring_from_str(port_str));
+        qdict_put_str(options, "server.port", port_str);
        g_free(port_str);
    }

@@ -181,7 +180,7 @@ static void nbd_parse_filename(const char *filename, QDict *options,
        export_name[0] = 0; /* truncate 'file' */
        export_name += strlen(EN_OPTSTR);

-        qdict_put(options, "export", qstring_from_str(export_name));
+        qdict_put_str(options, "export", export_name);
    }

    /* extract the host_spec - fail if it's not nbd:... */
@@ -196,19 +195,19 @@ static void nbd_parse_filename(const char *filename, QDict *options,

    /* are we a UNIX or TCP socket? */
    if (strstart(host_spec, "unix:", &unixpath)) {
-        qdict_put(options, "server.type", qstring_from_str("unix"));
-        qdict_put(options, "server.path", qstring_from_str(unixpath));
+        qdict_put_str(options, "server.type", "unix");
+        qdict_put_str(options, "server.path", unixpath);
    } else {
-        InetSocketAddress *addr = NULL;
+        InetSocketAddress *addr = g_new(InetSocketAddress, 1);

-        addr = inet_parse(host_spec, errp);
-        if (!addr) {
-            goto out;
+        if (inet_parse(addr, host_spec, errp)) {
+            goto out_inet;
        }

-        qdict_put(options, "server.type", qstring_from_str("inet"));
-        qdict_put(options, "server.host", qstring_from_str(addr->host));
-        qdict_put(options, "server.port", qstring_from_str(addr->port));
+        qdict_put_str(options, "server.type", "inet");
+        qdict_put_str(options, "server.host", addr->host);
+        qdict_put_str(options, "server.port", addr->port);
+    out_inet:
        qapi_free_InetSocketAddress(addr);
    }

@@ -247,22 +246,22 @@ static bool nbd_process_legacy_socket_options(QDict *output_options,
            return false;
        }

-        qdict_put(output_options, "server.type", qstring_from_str("unix"));
-        qdict_put(output_options, "server.path", qstring_from_str(path));
+        qdict_put_str(output_options, "server.type", "unix");
+        qdict_put_str(output_options, "server.path", path);
    } else if (host) {
-        qdict_put(output_options, "server.type", qstring_from_str("inet"));
-        qdict_put(output_options, "server.host", qstring_from_str(host));
-        qdict_put(output_options, "server.port",
-                  qstring_from_str(port ?: stringify(NBD_DEFAULT_PORT)));
+        qdict_put_str(output_options, "server.type", "inet");
+        qdict_put_str(output_options, "server.host", host);
+        qdict_put_str(output_options, "server.port",
+                      port ?: stringify(NBD_DEFAULT_PORT));
    }

    return true;
 }

-static SocketAddressFlat *nbd_config(BDRVNBDState *s, QDict *options,
-                                     Error **errp)
+static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
+                                 Error **errp)
 {
-    SocketAddressFlat *saddr = NULL;
+    SocketAddress *saddr = NULL;
    QDict *addr = NULL;
    QObject *crumpled_addr = NULL;
    Visitor *iv = NULL;
@@ -288,7 +287,7 @@ static SocketAddressFlat *nbd_config(BDRVNBDState *s, QDict *options,
     * visitor expects the former.
     */
    iv = qobject_input_visitor_new(crumpled_addr);
-    visit_type_SocketAddressFlat(iv, NULL, &saddr, &local_err);
+    visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto done;
@@ -307,10 +306,9 @@ NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
    return &s->client;
 }

-static QIOChannelSocket *nbd_establish_connection(SocketAddressFlat *saddr_flat,
+static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
                                                  Error **errp)
 {
-    SocketAddress *saddr = socket_address_crumple(saddr_flat);
    QIOChannelSocket *sioc;
    Error *local_err = NULL;

@@ -320,7 +318,6 @@ static QIOChannelSocket *nbd_establish_connection(SocketAddressFlat *saddr_flat,
    qio_channel_socket_connect_sync(sioc,
                                    saddr,
                                    &local_err);
-    qapi_free_SocketAddress(saddr);
    if (local_err) {
        object_unref(OBJECT(sioc));
        error_propagate(errp, local_err);
@@ -413,7 +410,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
        goto error;
    }

-    /* Translate @host, @port, and @path to a SocketAddressFlat */
+    /* Translate @host, @port, and @path to a SocketAddress */
    if (!nbd_process_legacy_socket_options(options, opts, errp)) {
        goto error;
    }
@@ -434,7 +431,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
        }

        /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */
-        if (s->saddr->type != SOCKET_ADDRESS_FLAT_TYPE_INET) {
+        if (s->saddr->type != SOCKET_ADDRESS_TYPE_INET) {
            error_setg(errp, "TLS only supported over IP sockets");
            goto error;
        }
@@ -461,7 +458,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
        object_unref(OBJECT(tlscreds));
    }
    if (ret < 0) {
-        qapi_free_SocketAddressFlat(s->saddr);
+        qapi_free_SocketAddress(s->saddr);
        g_free(s->export);
        g_free(s->tlscredsid);
    }
@@ -487,7 +484,7 @@ static void nbd_close(BlockDriverState *bs)

    nbd_client_close(bs);

-    qapi_free_SocketAddressFlat(s->saddr);
+    qapi_free_SocketAddress(s->saddr);
    g_free(s->export);
    g_free(s->tlscredsid);
 }
@@ -518,17 +515,17 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
    Visitor *ov;
    const char *host = NULL, *port = NULL, *path = NULL;

-    if (s->saddr->type == SOCKET_ADDRESS_FLAT_TYPE_INET) {
+    if (s->saddr->type == SOCKET_ADDRESS_TYPE_INET) {
        const InetSocketAddress *inet = &s->saddr->u.inet;
        if (!inet->has_ipv4 && !inet->has_ipv6 && !inet->has_to) {
            host = inet->host;
            port = inet->port;
        }
-    } else if (s->saddr->type == SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
+    } else if (s->saddr->type == SOCKET_ADDRESS_TYPE_UNIX) {
        path = s->saddr->u.q_unix.path;
    } /* else can't represent as pseudo-filename */

-    qdict_put(opts, "driver", qstring_from_str("nbd"));
+    qdict_put_str(opts, "driver", "nbd");

    if (path && s->export) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
@@ -545,16 +542,16 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
    }

    ov = qobject_output_visitor_new(&saddr_qdict);
-    visit_type_SocketAddressFlat(ov, NULL, &s->saddr, &error_abort);
+    visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
    visit_complete(ov, &saddr_qdict);
    visit_free(ov);
    qdict_put_obj(opts, "server", saddr_qdict);

    if (s->export) {
-        qdict_put(opts, "export", qstring_from_str(s->export));
+        qdict_put_str(opts, "export", s->export);
    }
    if (s->tlscredsid) {
-        qdict_put(opts, "tls-creds", qstring_from_str(s->tlscredsid));
+        qdict_put_str(opts, "tls-creds", s->tlscredsid);
    }

    qdict_flatten(opts);
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -104,9 +104,9 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
        goto out;
    }

-    qdict_put(options, "server.host", qstring_from_str(uri->server));
-    qdict_put(options, "server.type", qstring_from_str("inet"));
-    qdict_put(options, "path", qstring_from_str(uri->path));
+    qdict_put_str(options, "server.host", uri->server);
+    qdict_put_str(options, "server.type", "inet");
+    qdict_put_str(options, "path", uri->path);

    for (i = 0; i < qp->n; i++) {
        unsigned long long val;
@@ -121,23 +121,17 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
            goto out;
        }
        if (!strcmp(qp->p[i].name, "uid")) {
-            qdict_put(options, "user",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "user", qp->p[i].value);
        } else if (!strcmp(qp->p[i].name, "gid")) {
-            qdict_put(options, "group",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "group", qp->p[i].value);
        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
-            qdict_put(options, "tcp-syn-count",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "tcp-syn-count", qp->p[i].value);
        } else if (!strcmp(qp->p[i].name, "readahead")) {
-            qdict_put(options, "readahead-size",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "readahead-size", qp->p[i].value);
        } else if (!strcmp(qp->p[i].name, "pagecache")) {
-            qdict_put(options, "page-cache-size",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "page-cache-size", qp->p[i].value);
        } else if (!strcmp(qp->p[i].name, "debug")) {
-            qdict_put(options, "debug",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "debug", qp->p[i].value);
        } else {
            error_setg(errp, "Unknown NFS parameter name: %s",
                       qp->p[i].name);
@@ -497,7 +491,7 @@ out:


 static int64_t nfs_client_open(NFSClient *client, QDict *options,
-                               int flags, Error **errp, int open_flags)
+                               int flags, int open_flags, Error **errp)
 {
    int ret = -EINVAL;
    QemuOpts *opts = NULL;
@@ -663,7 +657,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,

    ret = nfs_client_open(client, options,
                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
-                          errp, bs->open_flags);
+                          bs->open_flags, errp);
    if (ret < 0) {
        return ret;
    }
@@ -705,7 +699,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
        goto out;
    }

-    ret = nfs_client_open(client, options, O_CREAT, errp, 0);
+    ret = nfs_client_open(client, options, O_CREAT, 0, errp);
    if (ret < 0) {
        goto out;
    }
@@ -764,10 +758,18 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
    return (task.ret < 0 ? task.ret : st.st_blocks * 512);
 }

-static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
+static int nfs_file_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
 {
    NFSClient *client = bs->opaque;
-    return nfs_ftruncate(client->context, client->fh, offset);
+    int ret;
+
+    ret = nfs_ftruncate(client->context, client->fh, offset);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to truncate file");
+        return ret;
+    }
+
+    return 0;
 }

 /* Note that this will not re-establish a connection with the NFS server
@@ -811,7 +813,7 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
    QObject *server_qdict;
    Visitor *ov;

-    qdict_put(opts, "driver", qstring_from_str("nfs"));
+    qdict_put_str(opts, "driver", "nfs");

    if (client->uid && !client->gid) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
@@ -834,28 +836,25 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
    visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
    visit_complete(ov, &server_qdict);
    qdict_put_obj(opts, "server", server_qdict);
-    qdict_put(opts, "path", qstring_from_str(client->path));
+    qdict_put_str(opts, "path", client->path);

    if (client->uid) {
-        qdict_put(opts, "user", qint_from_int(client->uid));
+        qdict_put_int(opts, "user", client->uid);
    }
    if (client->gid) {
-        qdict_put(opts, "group", qint_from_int(client->gid));
+        qdict_put_int(opts, "group", client->gid);
    }
    if (client->tcp_syncnt) {
-        qdict_put(opts, "tcp-syn-cnt",
-                  qint_from_int(client->tcp_syncnt));
+        qdict_put_int(opts, "tcp-syn-cnt", client->tcp_syncnt);
    }
    if (client->readahead) {
-        qdict_put(opts, "readahead-size",
-                  qint_from_int(client->readahead));
+        qdict_put_int(opts, "readahead-size", client->readahead);
    }
    if (client->pagecache) {
-        qdict_put(opts, "page-cache-size",
-                  qint_from_int(client->pagecache));
+        qdict_put_int(opts, "page-cache-size", client->pagecache);
    }
    if (client->debug) {
-        qdict_put(opts, "debug", qint_from_int(client->debug));
+        qdict_put_int(opts, "debug", client->debug);
    }

    visit_free(ov);
--- a/block/null.c
+++ b/block/null.c
@@ -232,7 +232,7 @@ static void null_refresh_filename(BlockDriverState *bs, QDict *opts)
                 bs->drv->format_name);
    }

-    qdict_put(opts, "driver", qstring_from_str(bs->drv->format_name));
+    qdict_put_str(opts, "driver", bs->drv->format_name);
    bs->full_open_options = opts;
 }

--- a/block/parallels.c
+++ b/block/parallels.c
@@ -223,7 +223,8 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                                     space << BDRV_SECTOR_BITS, 0);
        } else {
            ret = bdrv_truncate(bs->file,
-                                (s->data_end + space) << BDRV_SECTOR_BITS);
+                                (s->data_end + space) << BDRV_SECTOR_BITS,
+                                NULL);
        }
        if (ret < 0) {
            return ret;
@@ -456,8 +457,10 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
                size - res->image_end_offset);
        res->leaks += count;
        if (fix & BDRV_FIX_LEAKS) {
-            ret = bdrv_truncate(bs->file, res->image_end_offset);
+            Error *local_err = NULL;
+            ret = bdrv_truncate(bs->file, res->image_end_offset, &local_err);
            if (ret < 0) {
+                error_report_err(local_err);
                res->check_errors++;
                return ret;
            }
@@ -504,7 +507,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(file, true);

-    ret = blk_truncate(file, 0);
+    ret = blk_truncate(file, 0, errp);
    if (ret < 0) {
        goto exit;
    }
@@ -696,7 +699,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (!(flags & BDRV_O_RESIZE) || !bdrv_has_zero_init(bs->file->bs) ||
-            bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs)) != 0) {
+            bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs), NULL) != 0) {
        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
    }

@@ -739,7 +742,7 @@ static void parallels_close(BlockDriverState *bs)
    }

    if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
+        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, NULL);
    }

    g_free(s->bat_dirty_bmap);
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -32,7 +32,7 @@
 #include <zlib.h>
 #include "qapi/qmp/qerror.h"
 #include "crypto/cipher.h"
-#include "migration/migration.h"
+#include "migration/blocker.h"

 /**************************************************************/
 /* QEMU COW block driver with compression and encryption support */
@@ -473,7 +473,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                /* round to cluster size */
                cluster_offset = (cluster_offset + s->cluster_size - 1) &
                    ~(s->cluster_size - 1);
-                bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
+                bdrv_truncate(bs->file, cluster_offset + s->cluster_size, NULL);
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
                if (bs->encrypted &&
@@ -833,7 +833,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(qcow_blk, true);

-    ret = blk_truncate(qcow_blk, 0);
+    ret = blk_truncate(qcow_blk, 0, errp);
    if (ret < 0) {
        goto exit;
    }
@@ -916,7 +916,7 @@ static int qcow_make_empty(BlockDriverState *bs)
    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
            l1_length) < 0)
        return -1;
-    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, NULL);
    if (ret < 0)
        return ret;

--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -309,14 +309,19 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
        uint64_t *l2_table, uint64_t stop_flags)
 {
    int i;
+    QCow2ClusterType first_cluster_type;
    uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
    uint64_t first_entry = be64_to_cpu(l2_table[0]);
    uint64_t offset = first_entry & mask;

-    if (!offset)
+    if (!offset) {
        return 0;
+    }

-    assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL);
+    /* must be allocated */
+    first_cluster_type = qcow2_get_cluster_type(first_entry);
+    assert(first_cluster_type == QCOW2_CLUSTER_NORMAL ||
+           first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC);

    for (i = 0; i < nb_clusters; i++) {
        uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
@@ -328,14 +333,21 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
 	return i;
 }

-static int count_contiguous_clusters_by_type(int nb_clusters,
-                                             uint64_t *l2_table,
-                                             int wanted_type)
+/*
+ * Checks how many consecutive unallocated clusters in a given L2
+ * table have the same cluster type.
+ */
+static int count_contiguous_clusters_unallocated(int nb_clusters,
+                                                 uint64_t *l2_table,
+                                                 QCow2ClusterType wanted_type)
 {
    int i;

+    assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN ||
+           wanted_type == QCOW2_CLUSTER_UNALLOCATED);
    for (i = 0; i < nb_clusters; i++) {
-        int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
+        uint64_t entry = be64_to_cpu(l2_table[i]);
+        QCow2ClusterType type = qcow2_get_cluster_type(entry);

        if (type != wanted_type) {
            break;
@@ -487,6 +499,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    int l1_bits, c;
    unsigned int offset_in_cluster;
    uint64_t bytes_available, bytes_needed, nb_clusters;
+    QCow2ClusterType type;
    int ret;

    offset_in_cluster = offset_into_cluster(s, offset);
@@ -509,13 +522,13 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,

    l1_index = offset >> l1_bits;
    if (l1_index >= s->l1_size) {
-        ret = QCOW2_CLUSTER_UNALLOCATED;
+        type = QCOW2_CLUSTER_UNALLOCATED;
        goto out;
    }

    l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
    if (!l2_offset) {
-        ret = QCOW2_CLUSTER_UNALLOCATED;
+        type = QCOW2_CLUSTER_UNALLOCATED;
        goto out;
    }

@@ -544,38 +557,37 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     * true */
    assert(nb_clusters <= INT_MAX);

-    ret = qcow2_get_cluster_type(*cluster_offset);
-    switch (ret) {
+    type = qcow2_get_cluster_type(*cluster_offset);
+    if (s->qcow_version < 3 && (type == QCOW2_CLUSTER_ZERO_PLAIN ||
+                                type == QCOW2_CLUSTER_ZERO_ALLOC)) {
+        qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
+                                " in pre-v3 image (L2 offset: %#" PRIx64
+                                ", L2 index: %#x)", l2_offset, l2_index);
+        ret = -EIO;
+        goto fail;
+    }
+    switch (type) {
    case QCOW2_CLUSTER_COMPRESSED:
        /* Compressed clusters can only be processed one by one */
        c = 1;
        *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
        break;
-    case QCOW2_CLUSTER_ZERO:
-        if (s->qcow_version < 3) {
-            qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
-                                    " in pre-v3 image (L2 offset: %#" PRIx64
-                                    ", L2 index: %#x)", l2_offset, l2_index);
-            ret = -EIO;
-            goto fail;
-        }
-        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
-                                              QCOW2_CLUSTER_ZERO);
-        *cluster_offset = 0;
-        break;
+    case QCOW2_CLUSTER_ZERO_PLAIN:
    case QCOW2_CLUSTER_UNALLOCATED:
        /* how many empty clusters ? */
-        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
-                                              QCOW2_CLUSTER_UNALLOCATED);
+        c = count_contiguous_clusters_unallocated(nb_clusters,
+                                                  &l2_table[l2_index], type);
        *cluster_offset = 0;
        break;
+    case QCOW2_CLUSTER_ZERO_ALLOC:
    case QCOW2_CLUSTER_NORMAL:
        /* how many allocated clusters ? */
        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], QCOW_OFLAG_ZERO);
+                                      &l2_table[l2_index], QCOW_OFLAG_ZERO);
        *cluster_offset &= L2E_OFFSET_MASK;
        if (offset_into_cluster(s, *cluster_offset)) {
-            qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#"
+            qcow2_signal_corruption(bs, true, -1, -1,
+                                    "Cluster allocation offset %#"
                                    PRIx64 " unaligned (L2 offset: %#" PRIx64
                                    ", L2 index: %#x)", *cluster_offset,
                                    l2_offset, l2_index);
@@ -602,7 +614,7 @@ out:
    assert(bytes_available - offset_in_cluster <= UINT_MAX);
    *bytes = bytes_available - offset_in_cluster;

-    return ret;
+    return type;

 fail:
    qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
@@ -835,7 +847,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     * Don't discard clusters that reach a refcount of 0 (e.g. compressed
     * clusters), the next write will reuse them anyway.
     */
-    if (j != 0) {
+    if (!m->keep_old_clusters && j != 0) {
        for (i = 0; i < j; i++) {
            qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
                                    QCOW2_DISCARD_NEVER);
@@ -860,7 +872,7 @@ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,

    for (i = 0; i < nb_clusters; i++) {
        uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
-        int cluster_type = qcow2_get_cluster_type(l2_entry);
+        QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);

        switch(cluster_type) {
        case QCOW2_CLUSTER_NORMAL:
@@ -870,7 +882,8 @@ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
            break;
        case QCOW2_CLUSTER_UNALLOCATED:
        case QCOW2_CLUSTER_COMPRESSED:
-        case QCOW2_CLUSTER_ZERO:
+        case QCOW2_CLUSTER_ZERO_PLAIN:
+        case QCOW2_CLUSTER_ZERO_ALLOC:
            break;
        default:
            abort();
@@ -1132,8 +1145,9 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
    uint64_t entry;
    uint64_t nb_clusters;
    int ret;
+    bool keep_old_clusters = false;

-    uint64_t alloc_cluster_offset;
+    uint64_t alloc_cluster_offset = 0;

    trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
                             *bytes);
@@ -1170,31 +1184,54 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
     * wrong with our code. */
    assert(nb_clusters > 0);

+    if (qcow2_get_cluster_type(entry) == QCOW2_CLUSTER_ZERO_ALLOC &&
+        (entry & QCOW_OFLAG_COPIED) &&
+        (!*host_offset ||
+         start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK)))
+    {
+        /* Try to reuse preallocated zero clusters; contiguous normal clusters
+         * would be fine, too, but count_cow_clusters() above has limited
+         * nb_clusters already to a range of COW clusters */
+        int preallocated_nb_clusters =
+            count_contiguous_clusters(nb_clusters, s->cluster_size,
+                                      &l2_table[l2_index], QCOW_OFLAG_COPIED);
+        assert(preallocated_nb_clusters > 0);
+
+        nb_clusters = preallocated_nb_clusters;
+        alloc_cluster_offset = entry & L2E_OFFSET_MASK;
+
+        /* We want to reuse these clusters, so qcow2_alloc_cluster_link_l2()
+         * should not free them. */
+        keep_old_clusters = true;
+    }
+
    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);

-    /* Allocate, if necessary at a given offset in the image file */
-    alloc_cluster_offset = start_of_cluster(s, *host_offset);
-    ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
-                                  &nb_clusters);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* Can't extend contiguous allocation */
-    if (nb_clusters == 0) {
-        *bytes = 0;
-        return 0;
-    }
-
-    /* !*host_offset would overwrite the image header and is reserved for "no
-     * host offset preferred". If 0 was a valid host offset, it'd trigger the
-     * following overlap check; do that now to avoid having an invalid value in
-     * *host_offset. */
    if (!alloc_cluster_offset) {
-        ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
-                                            nb_clusters * s->cluster_size);
-        assert(ret < 0);
-        goto fail;
+        /* Allocate, if necessary at a given offset in the image file */
+        alloc_cluster_offset = start_of_cluster(s, *host_offset);
+        ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
+                                      &nb_clusters);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        /* Can't extend contiguous allocation */
+        if (nb_clusters == 0) {
+            *bytes = 0;
+            return 0;
+        }
+
+        /* !*host_offset would overwrite the image header and is reserved for
+         * "no host offset preferred". If 0 was a valid host offset, it'd
+         * trigger the following overlap check; do that now to avoid having an
+         * invalid value in *host_offset. */
+        if (!alloc_cluster_offset) {
+            ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
+                                                nb_clusters * s->cluster_size);
+            assert(ret < 0);
+            goto fail;
+        }
    }

    /*
@@ -1225,6 +1262,8 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
        .offset         = start_of_cluster(s, guest_offset),
        .nb_clusters    = nb_clusters,

+        .keep_old_clusters  = keep_old_clusters,
+
        .cow_start = {
            .offset     = 0,
            .nb_bytes   = offset_into_cluster(s, guest_offset),
@@ -1472,24 +1511,25 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         * but rather fall through to the backing file.
         */
        switch (qcow2_get_cluster_type(old_l2_entry)) {
-            case QCOW2_CLUSTER_UNALLOCATED:
-                if (full_discard || !bs->backing) {
-                    continue;
-                }
-                break;
+        case QCOW2_CLUSTER_UNALLOCATED:
+            if (full_discard || !bs->backing) {
+                continue;
+            }
+            break;

-            case QCOW2_CLUSTER_ZERO:
-                if (!full_discard) {
-                    continue;
-                }
-                break;
+        case QCOW2_CLUSTER_ZERO_PLAIN:
+            if (!full_discard) {
+                continue;
+            }
+            break;

-            case QCOW2_CLUSTER_NORMAL:
-            case QCOW2_CLUSTER_COMPRESSED:
-                break;
+        case QCOW2_CLUSTER_ZERO_ALLOC:
+        case QCOW2_CLUSTER_NORMAL:
+        case QCOW2_CLUSTER_COMPRESSED:
+            break;

-            default:
-                abort();
+        default:
+            abort();
        }

        /* First remove L2 entries */
@@ -1509,35 +1549,36 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
    return nb_clusters;
 }

-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type, bool full_discard)
+int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
+                          uint64_t bytes, enum qcow2_discard_type type,
+                          bool full_discard)
 {
    BDRVQcow2State *s = bs->opaque;
-    uint64_t end_offset;
+    uint64_t end_offset = offset + bytes;
    uint64_t nb_clusters;
+    int64_t cleared;
    int ret;

-    end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
-
-    /* The caller must cluster-align start; round end down except at EOF */
+    /* Caller must pass aligned values, except at image end */
    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
-    if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) {
-        end_offset = start_of_cluster(s, end_offset);
-    }
+    assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
+           end_offset == bs->total_sectors << BDRV_SECTOR_BITS);

-    nb_clusters = size_to_clusters(s, end_offset - offset);
+    nb_clusters = size_to_clusters(s, bytes);

    s->cache_discards = true;

    /* Each L2 table is handled by its own loop iteration */
    while (nb_clusters > 0) {
-        ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard);
-        if (ret < 0) {
+        cleared = discard_single_l2(bs, offset, nb_clusters, type,
+                                    full_discard);
+        if (cleared < 0) {
+            ret = cleared;
            goto fail;
        }

-        nb_clusters -= ret;
-        offset += (ret * s->cluster_size);
+        nb_clusters -= cleared;
+        offset += (cleared * s->cluster_size);
    }

    ret = 0;
@@ -1561,6 +1602,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
    int l2_index;
    int ret;
    int i;
+    bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP);

    ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
    if (ret < 0) {
@@ -1573,12 +1615,22 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,

    for (i = 0; i < nb_clusters; i++) {
        uint64_t old_offset;
+        QCow2ClusterType cluster_type;

        old_offset = be64_to_cpu(l2_table[l2_index + i]);

-        /* Update L2 entries */
+        /*
+         * Minimize L2 changes if the cluster already reads back as
+         * zeroes with correct allocation.
+         */
+        cluster_type = qcow2_get_cluster_type(old_offset);
+        if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN ||
+            (cluster_type == QCOW2_CLUSTER_ZERO_ALLOC && !unmap)) {
+            continue;
+        }
+
        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-        if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) {
+        if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) {
            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
            qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
        } else {
@@ -1591,31 +1643,39 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
    return nb_clusters;
 }

-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
-                        int flags)
+int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
+                          uint64_t bytes, int flags)
 {
    BDRVQcow2State *s = bs->opaque;
+    uint64_t end_offset = offset + bytes;
    uint64_t nb_clusters;
+    int64_t cleared;
    int ret;

+    /* Caller must pass aligned values, except at image end */
+    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
+    assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
+           end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
+
    /* The zero flag is only supported by version 3 and newer */
    if (s->qcow_version < 3) {
        return -ENOTSUP;
    }

    /* Each L2 table is handled by its own loop iteration */
-    nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
+    nb_clusters = size_to_clusters(s, bytes);

    s->cache_discards = true;

    while (nb_clusters > 0) {
-        ret = zero_single_l2(bs, offset, nb_clusters, flags);
-        if (ret < 0) {
+        cleared = zero_single_l2(bs, offset, nb_clusters, flags);
+        if (cleared < 0) {
+            ret = cleared;
            goto fail;
        }

-        nb_clusters -= ret;
-        offset += (ret * s->cluster_size);
+        nb_clusters -= cleared;
+        offset += (cleared * s->cluster_size);
    }

    ret = 0;
@@ -1699,14 +1759,14 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
        for (j = 0; j < s->l2_size; j++) {
            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
            int64_t offset = l2_entry & L2E_OFFSET_MASK;
-            int cluster_type = qcow2_get_cluster_type(l2_entry);
-            bool preallocated = offset != 0;
+            QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);

-            if (cluster_type != QCOW2_CLUSTER_ZERO) {
+            if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN &&
+                cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) {
                continue;
            }

-            if (!preallocated) {
+            if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                if (!bs->backing) {
                    /* not backed; therefore we can simply deallocate the
                     * cluster */
@@ -1741,7 +1801,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                                        "%#" PRIx64 " unaligned (L2 offset: %#"
                                        PRIx64 ", L2 index: %#x)", offset,
                                        l2_offset, j);
-                if (!preallocated) {
+                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
                                        QCOW2_DISCARD_ALWAYS);
                }
@@ -1751,7 +1811,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,

            ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
            if (ret < 0) {
-                if (!preallocated) {
+                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
                                        QCOW2_DISCARD_ALWAYS);
                }
@@ -1760,7 +1820,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,

            ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
            if (ret < 0) {
-                if (!preallocated) {
+                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
                                        QCOW2_DISCARD_ALWAYS);
                }
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1028,18 +1028,17 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
        }
        break;
    case QCOW2_CLUSTER_NORMAL:
-    case QCOW2_CLUSTER_ZERO:
-        if (l2_entry & L2E_OFFSET_MASK) {
-            if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
-                qcow2_signal_corruption(bs, false, -1, -1,
-                                        "Cannot free unaligned cluster %#llx",
-                                        l2_entry & L2E_OFFSET_MASK);
-            } else {
-                qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
-                                    nb_clusters << s->cluster_bits, type);
-            }
+    case QCOW2_CLUSTER_ZERO_ALLOC:
+        if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
+            qcow2_signal_corruption(bs, false, -1, -1,
+                                    "Cannot free unaligned cluster %#llx",
+                                    l2_entry & L2E_OFFSET_MASK);
+        } else {
+            qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
+                                nb_clusters << s->cluster_bits, type);
        }
        break;
+    case QCOW2_CLUSTER_ZERO_PLAIN:
    case QCOW2_CLUSTER_UNALLOCATED:
        break;
    default:
@@ -1059,9 +1058,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
    int64_t l1_table_offset, int l1_size, int addend)
 {
    BDRVQcow2State *s = bs->opaque;
-    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount;
+    uint64_t *l1_table, *l2_table, l2_offset, entry, l1_size2, refcount;
    bool l1_allocated = false;
-    int64_t old_offset, old_l2_offset;
+    int64_t old_entry, old_l2_offset;
    int i, j, l1_modified = 0, nb_csectors;
    int ret;

@@ -1089,15 +1088,16 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
            goto fail;
        }

-        for(i = 0;i < l1_size; i++)
+        for (i = 0; i < l1_size; i++) {
            be64_to_cpus(&l1_table[i]);
+        }
    } else {
        assert(l1_size == s->l1_size);
        l1_table = s->l1_table;
        l1_allocated = false;
    }

-    for(i = 0; i < l1_size; i++) {
+    for (i = 0; i < l1_size; i++) {
        l2_offset = l1_table[i];
        if (l2_offset) {
            old_l2_offset = l2_offset;
@@ -1117,81 +1117,79 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                goto fail;
            }

-            for(j = 0; j < s->l2_size; j++) {
+            for (j = 0; j < s->l2_size; j++) {
                uint64_t cluster_index;
+                uint64_t offset;

-                offset = be64_to_cpu(l2_table[j]);
-                old_offset = offset;
-                offset &= ~QCOW_OFLAG_COPIED;
+                entry = be64_to_cpu(l2_table[j]);
+                old_entry = entry;
+                entry &= ~QCOW_OFLAG_COPIED;
+                offset = entry & L2E_OFFSET_MASK;

-                switch (qcow2_get_cluster_type(offset)) {
-                    case QCOW2_CLUSTER_COMPRESSED:
-                        nb_csectors = ((offset >> s->csize_shift) &
-                                       s->csize_mask) + 1;
-                        if (addend != 0) {
-                            ret = update_refcount(bs,
-                                (offset & s->cluster_offset_mask) & ~511,
+                switch (qcow2_get_cluster_type(entry)) {
+                case QCOW2_CLUSTER_COMPRESSED:
+                    nb_csectors = ((entry >> s->csize_shift) &
+                                   s->csize_mask) + 1;
+                    if (addend != 0) {
+                        ret = update_refcount(bs,
+                                (entry & s->cluster_offset_mask) & ~511,
                                nb_csectors * 512, abs(addend), addend < 0,
                                QCOW2_DISCARD_SNAPSHOT);
-                            if (ret < 0) {
-                                goto fail;
-                            }
-                        }
-                        /* compressed clusters are never modified */
-                        refcount = 2;
-                        break;
-
-                    case QCOW2_CLUSTER_NORMAL:
-                    case QCOW2_CLUSTER_ZERO:
-                        if (offset_into_cluster(s, offset & L2E_OFFSET_MASK)) {
-                            qcow2_signal_corruption(bs, true, -1, -1, "Data "
-                                                    "cluster offset %#llx "
-                                                    "unaligned (L2 offset: %#"
-                                                    PRIx64 ", L2 index: %#x)",
-                                                    offset & L2E_OFFSET_MASK,
-                                                    l2_offset, j);
-                            ret = -EIO;
-                            goto fail;
-                        }
-
-                        cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
-                        if (!cluster_index) {
-                            /* unallocated */
-                            refcount = 0;
-                            break;
-                        }
-                        if (addend != 0) {
-                            ret = qcow2_update_cluster_refcount(bs,
-                                    cluster_index, abs(addend), addend < 0,
-                                    QCOW2_DISCARD_SNAPSHOT);
-                            if (ret < 0) {
-                                goto fail;
-                            }
-                        }
-
-                        ret = qcow2_get_refcount(bs, cluster_index, &refcount);
                        if (ret < 0) {
                            goto fail;
                        }
-                        break;
+                    }
+                    /* compressed clusters are never modified */
+                    refcount = 2;
+                    break;

-                    case QCOW2_CLUSTER_UNALLOCATED:
-                        refcount = 0;
-                        break;
+                case QCOW2_CLUSTER_NORMAL:
+                case QCOW2_CLUSTER_ZERO_ALLOC:
+                    if (offset_into_cluster(s, offset)) {
+                        qcow2_signal_corruption(bs, true, -1, -1, "Cluster "
+                                                "allocation offset %#" PRIx64
+                                                " unaligned (L2 offset: %#"
+                                                PRIx64 ", L2 index: %#x)",
+                                                offset, l2_offset, j);
+                        ret = -EIO;
+                        goto fail;
+                    }

-                    default:
-                        abort();
+                    cluster_index = offset >> s->cluster_bits;
+                    assert(cluster_index);
+                    if (addend != 0) {
+                        ret = qcow2_update_cluster_refcount(bs,
+                                    cluster_index, abs(addend), addend < 0,
+                                    QCOW2_DISCARD_SNAPSHOT);
+                        if (ret < 0) {
+                            goto fail;
+                        }
+                    }
+
+                    ret = qcow2_get_refcount(bs, cluster_index, &refcount);
+                    if (ret < 0) {
+                        goto fail;
+                    }
+                    break;
+
+                case QCOW2_CLUSTER_ZERO_PLAIN:
+                case QCOW2_CLUSTER_UNALLOCATED:
+                    refcount = 0;
+                    break;
+
+                default:
+                    abort();
                }

                if (refcount == 1) {
-                    offset |= QCOW_OFLAG_COPIED;
+                    entry |= QCOW_OFLAG_COPIED;
                }
-                if (offset != old_offset) {
+                if (entry != old_entry) {
                    if (addend > 0) {
                        qcow2_cache_set_dependency(bs, s->l2_table_cache,
                            s->refcount_block_cache);
                    }
-                    l2_table[j] = cpu_to_be64(offset);
+                    l2_table[j] = cpu_to_be64(entry);
                    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache,
                                                 l2_table);
                }
@@ -1441,12 +1439,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
            }
            break;

-        case QCOW2_CLUSTER_ZERO:
-            if ((l2_entry & L2E_OFFSET_MASK) == 0) {
-                break;
-            }
-            /* fall through */
-
+        case QCOW2_CLUSTER_ZERO_ALLOC:
        case QCOW2_CLUSTER_NORMAL:
        {
            uint64_t offset = l2_entry & L2E_OFFSET_MASK;
@@ -1476,6 +1469,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
            break;
        }

+        case QCOW2_CLUSTER_ZERO_PLAIN:
        case QCOW2_CLUSTER_UNALLOCATED:
            break;

@@ -1638,10 +1632,10 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
        for (j = 0; j < s->l2_size; j++) {
            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
            uint64_t data_offset = l2_entry & L2E_OFFSET_MASK;
-            int cluster_type = qcow2_get_cluster_type(l2_entry);
+            QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);

-            if ((cluster_type == QCOW2_CLUSTER_NORMAL) ||
-                ((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) {
+            if (cluster_type == QCOW2_CLUSTER_NORMAL ||
+                cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) {
                ret = qcow2_get_refcount(bs,
                                         data_offset >> s->cluster_bits,
                                         &refcount);
@@ -1728,14 +1722,17 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,

            if (fix & BDRV_FIX_ERRORS) {
                int64_t new_nb_clusters;
+                Error *local_err = NULL;

                if (offset > INT64_MAX - s->cluster_size) {
                    ret = -EINVAL;
                    goto resize_fail;
                }

-                ret = bdrv_truncate(bs->file, offset + s->cluster_size);
+                ret = bdrv_truncate(bs->file, offset + s->cluster_size,
+                                    &local_err);
                if (ret < 0) {
+                    error_report_err(local_err);
                    goto resize_fail;
                }
                size = bdrv_getlength(bs->file->bs);
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -440,10 +440,9 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)

    /* The VM state isn't needed any more in the active L1 table; in fact, it
     * hurts by causing expensive COW for the next snapshot. */
-    qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
-                           align_offset(sn->vm_state_size, s->cluster_size)
-                                >> BDRV_SECTOR_BITS,
-                           QCOW2_DISCARD_NEVER, false);
+    qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
+                          align_offset(sn->vm_state_size, s->cluster_size),
+                          QCOW2_DISCARD_NEVER, false);

 #ifdef DEBUG_ALLOC
    {
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1385,7 +1385,7 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
        *file = bs->file->bs;
        status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
    }
-    if (ret == QCOW2_CLUSTER_ZERO) {
+    if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
        status |= BDRV_BLOCK_ZERO;
    } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
        status |= BDRV_BLOCK_DATA;
@@ -1482,7 +1482,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
            }
            break;

-        case QCOW2_CLUSTER_ZERO:
+        case QCOW2_CLUSTER_ZERO_PLAIN:
+        case QCOW2_CLUSTER_ZERO_ALLOC:
            qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
            break;

@@ -2139,7 +2140,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
         * too, as long as the bulk is allocated here). Therefore, using
         * floating point arithmetic is fine. */
        int64_t meta_size = 0;
-        uint64_t nreftablee, nrefblocke, nl1e, nl2e;
+        uint64_t nreftablee, nrefblocke, nl1e, nl2e, refblock_count;
        int64_t aligned_total_size = align_offset(total_size, cluster_size);
        int refblock_bits, refblock_size;
        /* refcount entry size in bytes */
@@ -2182,11 +2183,12 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        nrefblocke = (aligned_total_size + meta_size + cluster_size)
                   / (cluster_size - rces - rces * sizeof(uint64_t)
                                                 / cluster_size);
-        meta_size += DIV_ROUND_UP(nrefblocke, refblock_size) * cluster_size;
+        refblock_count = DIV_ROUND_UP(nrefblocke, refblock_size);
+        meta_size += refblock_count * cluster_size;

        /* total size of refcount tables */
-        nreftablee = nrefblocke / refblock_size;
-        nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t));
+        nreftablee = align_offset(refblock_count,
+                                  cluster_size / sizeof(uint64_t));
        meta_size += nreftablee * sizeof(uint64_t);

        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
@@ -2265,7 +2267,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     * table)
     */
    options = qdict_new();
-    qdict_put(options, "driver", qstring_from_str("qcow2"));
+    qdict_put_str(options, "driver", "qcow2");
    blk = blk_new_open(filename, NULL, options,
                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
                       &local_err);
@@ -2294,9 +2296,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
    }

    /* Okay, now that we have a valid image, let's give it the right size */
-    ret = blk_truncate(blk, total_size);
+    ret = blk_truncate(blk, total_size, errp);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not resize image");
+        error_prepend(errp, "Could not resize image: ");
        goto out;
    }

@@ -2327,7 +2329,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,

    /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
    options = qdict_new();
-    qdict_put(options, "driver", qstring_from_str("qcow2"));
+    qdict_put_str(options, "driver", "qcow2");
    blk = blk_new_open(filename, NULL, options,
                       BDRV_O_RDWR | BDRV_O_NO_BACKING, &local_err);
    if (blk == NULL) {
@@ -2449,6 +2451,10 @@ static bool is_zero_sectors(BlockDriverState *bs, int64_t start,
    BlockDriverState *file;
    int64_t res;

+    if (start + count > bs->total_sectors) {
+        count = bs->total_sectors - start;
+    }
+
    if (!count) {
        return true;
    }
@@ -2467,6 +2473,9 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
    uint32_t tail = (offset + count) % s->cluster_size;

    trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, count);
+    if (offset + count == bs->total_sectors * BDRV_SECTOR_SIZE) {
+        tail = 0;
+    }

    if (head || tail) {
        int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
@@ -2490,7 +2499,9 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
        count = s->cluster_size;
        nr = s->cluster_size;
        ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
-        if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
+        if (ret != QCOW2_CLUSTER_UNALLOCATED &&
+            ret != QCOW2_CLUSTER_ZERO_PLAIN &&
+            ret != QCOW2_CLUSTER_ZERO_ALLOC) {
            qemu_co_mutex_unlock(&s->lock);
            return -ENOTSUP;
        }
@@ -2501,7 +2512,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
    trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count);

    /* Whatever is left can use real zero clusters */
-    ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS, flags);
+    ret = qcow2_cluster_zeroize(bs, offset, count, flags);
    qemu_co_mutex_unlock(&s->lock);

    return ret;
@@ -2515,42 +2526,48 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,

    if (!QEMU_IS_ALIGNED(offset | count, s->cluster_size)) {
        assert(count < s->cluster_size);
-        return -ENOTSUP;
+        /* Ignore partial clusters, except for the special case of the
+         * complete partial cluster at the end of an unaligned file */
+        if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
+            offset + count != bs->total_sectors * BDRV_SECTOR_SIZE) {
+            return -ENOTSUP;
+        }
    }

    qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
-                                 QCOW2_DISCARD_REQUEST, false);
+    ret = qcow2_cluster_discard(bs, offset, count, QCOW2_DISCARD_REQUEST,
+                                false);
    qemu_co_mutex_unlock(&s->lock);
    return ret;
 }

-static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
+static int qcow2_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
 {
    BDRVQcow2State *s = bs->opaque;
    int64_t new_l1_size;
    int ret;

    if (offset & 511) {
-        error_report("The new size must be a multiple of 512");
+        error_setg(errp, "The new size must be a multiple of 512");
        return -EINVAL;
    }

    /* cannot proceed if image has snapshots */
    if (s->nb_snapshots) {
-        error_report("Can't resize an image which has snapshots");
+        error_setg(errp, "Can't resize an image which has snapshots");
        return -ENOTSUP;
    }

    /* shrinking is currently not supported */
    if (offset < bs->total_sectors * 512) {
-        error_report("qcow2 doesn't support shrinking images yet");
+        error_setg(errp, "qcow2 doesn't support shrinking images yet");
        return -ENOTSUP;
    }

    new_l1_size = size_to_l1(s, offset);
    ret = qcow2_grow_l1_table(bs, new_l1_size, true);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to grow the L1 table");
        return ret;
    }

@@ -2559,6 +2576,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
                           &offset, sizeof(uint64_t));
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to update the image size");
        return ret;
    }

@@ -2584,7 +2602,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
        /* align end of file to a sector boundary to ease reading with
           sector based I/Os */
        cluster_offset = bdrv_getlength(bs->file->bs);
-        return bdrv_truncate(bs->file, cluster_offset);
+        return bdrv_truncate(bs->file, cluster_offset, NULL);
    }

    buf = qemu_blockalign(bs, s->cluster_size);
@@ -2674,6 +2692,7 @@ fail:
 static int make_completely_empty(BlockDriverState *bs)
 {
    BDRVQcow2State *s = bs->opaque;
+    Error *local_err = NULL;
    int ret, l1_clusters;
    int64_t offset;
    uint64_t *new_reftable = NULL;
@@ -2798,8 +2817,10 @@ static int make_completely_empty(BlockDriverState *bs)
        goto fail;
    }

-    ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
+    ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size,
+                        &local_err);
    if (ret < 0) {
+        error_report_err(local_err);
        goto fail;
    }

@@ -2822,9 +2843,8 @@ fail:
 static int qcow2_make_empty(BlockDriverState *bs)
 {
    BDRVQcow2State *s = bs->opaque;
-    uint64_t start_sector;
-    int sector_step = (QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size) /
-                       BDRV_SECTOR_SIZE);
+    uint64_t offset, end_offset;
+    int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
    int l1_clusters, ret = 0;

    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
@@ -2841,18 +2861,15 @@ static int qcow2_make_empty(BlockDriverState *bs)

    /* This fallback code simply discards every active cluster; this is slow,
     * but works in all cases */
-    for (start_sector = 0; start_sector < bs->total_sectors;
-         start_sector += sector_step)
-    {
+    end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
+    for (offset = 0; offset < end_offset; offset += step) {
        /* As this function is generally used after committing an external
         * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
         * default action for this kind of discard is to pass the discard,
         * which will ideally result in an actually smaller image file, as
         * is probably desired. */
-        ret = qcow2_discard_clusters(bs, start_sector * BDRV_SECTOR_SIZE,
-                                     MIN(sector_step,
-                                         bs->total_sectors - start_sector),
-                                     QCOW2_DISCARD_SNAPSHOT, true);
+        ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
+                                    QCOW2_DISCARD_SNAPSHOT, true);
        if (ret < 0) {
            break;
        }
@@ -3273,9 +3290,10 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
            return ret;
        }

-        ret = blk_truncate(blk, new_size);
+        ret = blk_truncate(blk, new_size, &local_err);
        blk_unref(blk);
        if (ret < 0) {
+            error_report_err(local_err);
            return ret;
        }
    }
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -322,6 +322,9 @@ typedef struct QCowL2Meta
    /** Number of newly allocated clusters */
    int nb_clusters;

+    /** Do not free the old clusters */
+    bool keep_old_clusters;
+
    /**
     * Requests that overlap with this allocation and wait to be restarted
     * when the allocating request has completed.
@@ -346,12 +349,13 @@ typedef struct QCowL2Meta
    QLIST_ENTRY(QCowL2Meta) next_in_flight;
 } QCowL2Meta;

-enum {
+typedef enum QCow2ClusterType {
    QCOW2_CLUSTER_UNALLOCATED,
+    QCOW2_CLUSTER_ZERO_PLAIN,
+    QCOW2_CLUSTER_ZERO_ALLOC,
    QCOW2_CLUSTER_NORMAL,
    QCOW2_CLUSTER_COMPRESSED,
-    QCOW2_CLUSTER_ZERO
-};
+} QCow2ClusterType;

 typedef enum QCow2MetadataOverlap {
    QCOW2_OL_MAIN_HEADER_BITNR    = 0,
@@ -440,12 +444,15 @@ static inline uint64_t qcow2_max_refcount_clusters(BDRVQcow2State *s)
    return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
 }

-static inline int qcow2_get_cluster_type(uint64_t l2_entry)
+static inline QCow2ClusterType qcow2_get_cluster_type(uint64_t l2_entry)
 {
    if (l2_entry & QCOW_OFLAG_COMPRESSED) {
        return QCOW2_CLUSTER_COMPRESSED;
    } else if (l2_entry & QCOW_OFLAG_ZERO) {
-        return QCOW2_CLUSTER_ZERO;
+        if (l2_entry & L2E_OFFSET_MASK) {
+            return QCOW2_CLUSTER_ZERO_ALLOC;
+        }
+        return QCOW2_CLUSTER_ZERO_PLAIN;
    } else if (!(l2_entry & L2E_OFFSET_MASK)) {
        return QCOW2_CLUSTER_UNALLOCATED;
    } else {
@@ -544,10 +551,11 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                         int compressed_size);

 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type, bool full_discard);
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
-                        int flags);
+int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
+                          uint64_t bytes, enum qcow2_discard_type type,
+                          bool full_discard);
+int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
+                          uint64_t bytes, int flags);

 int qcow2_expand_zero_clusters(BlockDriverState *bs,
                               BlockDriverAmendStatusCB *status_cb,
--- a/block/qed.c
+++ b/block/qed.c
@@ -635,7 +635,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    blk_set_allow_write_beyond_eof(blk, true);

    /* File must start empty and grow, check truncate is supported */
-    ret = blk_truncate(blk, 0);
+    ret = blk_truncate(blk, 0, errp);
    if (ret < 0) {
        goto out;
    }
@@ -1518,7 +1518,7 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
    return cb.ret;
 }

-static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
+static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
 {
    BDRVQEDState *s = bs->opaque;
    uint64_t old_image_size;
@@ -1526,11 +1526,12 @@ static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)

    if (!qed_is_image_size_valid(offset, s->header.cluster_size,
                                 s->header.table_size)) {
+        error_setg(errp, "Invalid image size specified");
        return -EINVAL;
    }

-    /* Shrinking is currently not supported */
    if ((uint64_t)offset < s->header.image_size) {
+        error_setg(errp, "Shrinking images is currently not supported");
        return -ENOTSUP;
    }

@@ -1539,6 +1540,7 @@ static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
    ret = qed_write_header_sync(s);
    if (ret < 0) {
        s->header.image_size = old_image_size;
+        error_setg_errno(errp, -ret, "Failed to update the image size");
    }
    return ret;
 }
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1096,19 +1096,15 @@ static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
    children = qlist_new();
    for (i = 0; i < s->num_children; i++) {
        QINCREF(s->children[i]->bs->full_open_options);
-        qlist_append_obj(children,
-                         QOBJECT(s->children[i]->bs->full_open_options));
+        qlist_append(children, s->children[i]->bs->full_open_options);
    }

    opts = qdict_new();
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("quorum")));
-    qdict_put_obj(opts, QUORUM_OPT_VOTE_THRESHOLD,
-                  QOBJECT(qint_from_int(s->threshold)));
-    qdict_put_obj(opts, QUORUM_OPT_BLKVERIFY,
-                  QOBJECT(qbool_from_bool(s->is_blkverify)));
-    qdict_put_obj(opts, QUORUM_OPT_REWRITE,
-                  QOBJECT(qbool_from_bool(s->rewrite_corrupted)));
-    qdict_put_obj(opts, "children", QOBJECT(children));
+    qdict_put_str(opts, "driver", "quorum");
+    qdict_put_int(opts, QUORUM_OPT_VOTE_THRESHOLD, s->threshold);
+    qdict_put_bool(opts, QUORUM_OPT_BLKVERIFY, s->is_blkverify);
+    qdict_put_bool(opts, QUORUM_OPT_REWRITE, s->rewrite_corrupted);
+    qdict_put(opts, "children", children);

    bs->full_open_options = opts;
 }
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -327,21 +327,23 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset)
+static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
 {
    BDRVRawState *s = bs->opaque;

    if (s->has_size) {
+        error_setg(errp, "Cannot resize fixed-size raw disks");
        return -ENOTSUP;
    }

    if (INT64_MAX - offset < s->offset) {
+        error_setg(errp, "Disk size too large for the chosen offset");
        return -EINVAL;
    }

    s->size = offset;
    offset += s->offset;
-    return bdrv_truncate(bs->file, offset);
+    return bdrv_truncate(bs->file, offset, errp);
 }

 static int raw_media_changed(BlockDriverState *bs)
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -94,7 +94,7 @@ typedef struct BDRVRBDState {
    rados_t cluster;
    rados_ioctx_t io_ctx;
    rbd_image_t image;
-    char *name;
+    char *image_name;
    char *snap;
 } BDRVRBDState;

@@ -154,20 +154,20 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
        goto done;
    }
    qemu_rbd_unescape(found_str);
-    qdict_put(options, "pool", qstring_from_str(found_str));
+    qdict_put_str(options, "pool", found_str);

    if (strchr(p, '@')) {
        found_str = qemu_rbd_next_tok(p, '@', &p);
        qemu_rbd_unescape(found_str);
-        qdict_put(options, "image", qstring_from_str(found_str));
+        qdict_put_str(options, "image", found_str);

        found_str = qemu_rbd_next_tok(p, ':', &p);
        qemu_rbd_unescape(found_str);
-        qdict_put(options, "snapshot", qstring_from_str(found_str));
+        qdict_put_str(options, "snapshot", found_str);
    } else {
        found_str = qemu_rbd_next_tok(p, ':', &p);
        qemu_rbd_unescape(found_str);
-        qdict_put(options, "image", qstring_from_str(found_str));
+        qdict_put_str(options, "image", found_str);
    }
    if (!p) {
        goto done;
@@ -189,9 +189,9 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
        qemu_rbd_unescape(value);

        if (!strcmp(name, "conf")) {
-            qdict_put(options, "conf", qstring_from_str(value));
+            qdict_put_str(options, "conf", value);
        } else if (!strcmp(name, "id")) {
-            qdict_put(options, "user" , qstring_from_str(value));
+            qdict_put_str(options, "user", value);
        } else {
            /*
             * We pass these internally to qemu_rbd_set_keypairs(), so
@@ -204,8 +204,8 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
            if (!keypairs) {
                keypairs = qlist_new();
            }
-            qlist_append(keypairs, qstring_from_str(name));
-            qlist_append(keypairs, qstring_from_str(value));
+            qlist_append_str(keypairs, name);
+            qlist_append_str(keypairs, value);
        }
    }

@@ -350,7 +350,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
    int64_t bytes = 0;
    int64_t objsize;
    int obj_order = 0;
-    const char *pool, *name, *conf, *clientname, *keypairs;
+    const char *pool, *image_name, *conf, *user, *keypairs;
    const char *secretid;
    rados_t cluster;
    rados_ioctx_t io_ctx;
@@ -393,11 +393,11 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
     */
    pool       = qdict_get_try_str(options, "pool");
    conf       = qdict_get_try_str(options, "conf");
-    clientname = qdict_get_try_str(options, "user");
-    name       = qdict_get_try_str(options, "image");
+    user       = qdict_get_try_str(options, "user");
+    image_name = qdict_get_try_str(options, "image");
    keypairs   = qdict_get_try_str(options, "=keyvalue-pairs");

-    ret = rados_create(&cluster, clientname);
+    ret = rados_create(&cluster, user);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error initializing");
        goto exit;
@@ -434,7 +434,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
        goto shutdown;
    }

-    ret = rbd_create(io_ctx, name, bytes, &obj_order);
+    ret = rbd_create(io_ctx, image_name, bytes, &obj_order);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error rbd create");
    }
@@ -540,7 +540,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
                         Error **errp)
 {
    BDRVRBDState *s = bs->opaque;
-    const char *pool, *snap, *conf, *clientname, *name, *keypairs;
+    const char *pool, *snap, *conf, *user, *image_name, *keypairs;
    const char *secretid;
    QemuOpts *opts;
    Error *local_err = NULL;
@@ -567,24 +567,24 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    pool           = qemu_opt_get(opts, "pool");
    conf           = qemu_opt_get(opts, "conf");
    snap           = qemu_opt_get(opts, "snapshot");
-    clientname     = qemu_opt_get(opts, "user");
-    name           = qemu_opt_get(opts, "image");
+    user           = qemu_opt_get(opts, "user");
+    image_name     = qemu_opt_get(opts, "image");
    keypairs       = qemu_opt_get(opts, "=keyvalue-pairs");

-    if (!pool || !name) {
+    if (!pool || !image_name) {
        error_setg(errp, "Parameters 'pool' and 'image' are required");
        r = -EINVAL;
        goto failed_opts;
    }

-    r = rados_create(&s->cluster, clientname);
+    r = rados_create(&s->cluster, user);
    if (r < 0) {
        error_setg_errno(errp, -r, "error initializing");
        goto failed_opts;
    }

    s->snap = g_strdup(snap);
-    s->name = g_strdup(name);
+    s->image_name = g_strdup(image_name);

    /* try default location when conf=NULL, but ignore failure */
    r = rados_conf_read_file(s->cluster, conf);
@@ -635,13 +635,23 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
        goto failed_shutdown;
    }

-    r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
+    /* rbd_open is always r/w */
+    r = rbd_open(s->io_ctx, s->image_name, &s->image, s->snap);
    if (r < 0) {
-        error_setg_errno(errp, -r, "error reading header from %s", s->name);
+        error_setg_errno(errp, -r, "error reading header from %s",
+                         s->image_name);
        goto failed_open;
    }

-    bs->read_only = (s->snap != NULL);
+    /* If we are using an rbd snapshot, we must be r/o, otherwise
+     * leave as-is */
+    if (s->snap != NULL) {
+        r = bdrv_set_read_only(bs, true, &local_err);
+        if (r < 0) {
+            error_propagate(errp, local_err);
+            goto failed_open;
+        }
+    }

    qemu_opts_del(opts);
    return 0;
@@ -651,13 +661,33 @@ failed_open:
 failed_shutdown:
    rados_shutdown(s->cluster);
    g_free(s->snap);
-    g_free(s->name);
+    g_free(s->image_name);
 failed_opts:
    qemu_opts_del(opts);
    g_free(mon_host);
    return r;
 }

+
+/* Since RBD is currently always opened R/W via the API,
+ * we just need to check if we are using a snapshot or not, in
+ * order to determine if we will allow it to be R/W */
+static int qemu_rbd_reopen_prepare(BDRVReopenState *state,
+                                   BlockReopenQueue *queue, Error **errp)
+{
+    BDRVRBDState *s = state->bs->opaque;
+    int ret = 0;
+
+    if (s->snap && state->flags & BDRV_O_RDWR) {
+        error_setg(errp,
+                   "Cannot change node '%s' to r/w when using RBD snapshot",
+                   bdrv_get_device_or_node_name(state->bs));
+        ret = -EINVAL;
+    }
+
+    return ret;
+}
+
 static void qemu_rbd_close(BlockDriverState *bs)
 {
    BDRVRBDState *s = bs->opaque;
@@ -665,7 +695,7 @@ static void qemu_rbd_close(BlockDriverState *bs)
    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
    g_free(s->snap);
-    g_free(s->name);
+    g_free(s->image_name);
    rados_shutdown(s->cluster);
 }

@@ -886,13 +916,14 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
    return info.size;
 }

-static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset)
+static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
 {
    BDRVRBDState *s = bs->opaque;
    int r;

    r = rbd_resize(s->image, offset);
    if (r < 0) {
+        error_setg_errno(errp, -r, "Failed to resize file");
        return r;
    }

@@ -1064,6 +1095,7 @@ static BlockDriver bdrv_rbd = {
    .bdrv_parse_filename    = qemu_rbd_parse_filename,
    .bdrv_file_open         = qemu_rbd_open,
    .bdrv_close             = qemu_rbd_close,
+    .bdrv_reopen_prepare    = qemu_rbd_reopen_prepare,
    .bdrv_create            = qemu_rbd_create,
    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
    .bdrv_get_info          = qemu_rbd_getinfo,
--- a/block/replication.c
+++ b/block/replication.c
@@ -22,9 +22,17 @@
 #include "qapi/error.h"
 #include "replication.h"

+typedef enum {
+    BLOCK_REPLICATION_NONE,             /* block replication is not started */
+    BLOCK_REPLICATION_RUNNING,          /* block replication is running */
+    BLOCK_REPLICATION_FAILOVER,         /* failover is running in background */
+    BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
+    BLOCK_REPLICATION_DONE,             /* block replication is done */
+} ReplicationStage;
+
 typedef struct BDRVReplicationState {
    ReplicationMode mode;
-    int replication_state;
+    ReplicationStage stage;
    BdrvChild *active_disk;
    BdrvChild *hidden_disk;
    BdrvChild *secondary_disk;
@@ -36,14 +44,6 @@ typedef struct BDRVReplicationState {
    int error;
 } BDRVReplicationState;

-enum {
-    BLOCK_REPLICATION_NONE,             /* block replication is not started */
-    BLOCK_REPLICATION_RUNNING,          /* block replication is running */
-    BLOCK_REPLICATION_FAILOVER,         /* failover is running in background */
-    BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
-    BLOCK_REPLICATION_DONE,             /* block replication is done */
-};
-
 static void replication_start(ReplicationState *rs, ReplicationMode mode,
                              Error **errp);
 static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
@@ -141,10 +141,10 @@ static void replication_close(BlockDriverState *bs)
 {
    BDRVReplicationState *s = bs->opaque;

-    if (s->replication_state == BLOCK_REPLICATION_RUNNING) {
+    if (s->stage == BLOCK_REPLICATION_RUNNING) {
        replication_stop(s->rs, false, NULL);
    }
-    if (s->replication_state == BLOCK_REPLICATION_FAILOVER) {
+    if (s->stage == BLOCK_REPLICATION_FAILOVER) {
        block_job_cancel_sync(s->active_disk->bs->job);
    }

@@ -174,7 +174,7 @@ static int64_t replication_getlength(BlockDriverState *bs)

 static int replication_get_io_status(BDRVReplicationState *s)
 {
-    switch (s->replication_state) {
+    switch (s->stage) {
    case BLOCK_REPLICATION_NONE:
        return -EIO;
    case BLOCK_REPLICATION_RUNNING:
@@ -403,7 +403,7 @@ static void backup_job_completed(void *opaque, int ret)
    BlockDriverState *bs = opaque;
    BDRVReplicationState *s = bs->opaque;

-    if (s->replication_state != BLOCK_REPLICATION_FAILOVER) {
+    if (s->stage != BLOCK_REPLICATION_FAILOVER) {
        /* The backup job is cancelled unexpectedly */
        s->error = -EIO;
    }
@@ -445,7 +445,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
    aio_context_acquire(aio_context);
    s = bs->opaque;

-    if (s->replication_state != BLOCK_REPLICATION_NONE) {
+    if (s->stage != BLOCK_REPLICATION_NONE) {
        error_setg(errp, "Block replication is running or done");
        aio_context_release(aio_context);
        return;
@@ -545,7 +545,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        abort();
    }

-    s->replication_state = BLOCK_REPLICATION_RUNNING;
+    s->stage = BLOCK_REPLICATION_RUNNING;

    if (s->mode == REPLICATION_MODE_SECONDARY) {
        secondary_do_checkpoint(s, errp);
@@ -581,7 +581,7 @@ static void replication_get_error(ReplicationState *rs, Error **errp)
    aio_context_acquire(aio_context);
    s = bs->opaque;

-    if (s->replication_state != BLOCK_REPLICATION_RUNNING) {
+    if (s->stage != BLOCK_REPLICATION_RUNNING) {
        error_setg(errp, "Block replication is not running");
        aio_context_release(aio_context);
        return;
@@ -601,7 +601,7 @@ static void replication_done(void *opaque, int ret)
    BDRVReplicationState *s = bs->opaque;

    if (ret == 0) {
-        s->replication_state = BLOCK_REPLICATION_DONE;
+        s->stage = BLOCK_REPLICATION_DONE;

        /* refresh top bs's filename */
        bdrv_refresh_filename(bs);
@@ -610,7 +610,7 @@ static void replication_done(void *opaque, int ret)
        s->hidden_disk = NULL;
        s->error = 0;
    } else {
-        s->replication_state = BLOCK_REPLICATION_FAILOVER_FAILED;
+        s->stage = BLOCK_REPLICATION_FAILOVER_FAILED;
        s->error = -EIO;
    }
 }
@@ -625,7 +625,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
    aio_context_acquire(aio_context);
    s = bs->opaque;

-    if (s->replication_state != BLOCK_REPLICATION_RUNNING) {
+    if (s->stage != BLOCK_REPLICATION_RUNNING) {
        error_setg(errp, "Block replication is not running");
        aio_context_release(aio_context);
        return;
@@ -633,7 +633,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)

    switch (s->mode) {
    case REPLICATION_MODE_PRIMARY:
-        s->replication_state = BLOCK_REPLICATION_DONE;
+        s->stage = BLOCK_REPLICATION_DONE;
        s->error = 0;
        break;
    case REPLICATION_MODE_SECONDARY:
@@ -648,15 +648,15 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)

        if (!failover) {
            secondary_do_checkpoint(s, errp);
-            s->replication_state = BLOCK_REPLICATION_DONE;
+            s->stage = BLOCK_REPLICATION_DONE;
            aio_context_release(aio_context);
            return;
        }

-        s->replication_state = BLOCK_REPLICATION_FAILOVER;
+        s->stage = BLOCK_REPLICATION_FAILOVER;
        commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
                            BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
-                            NULL, replication_done, bs, errp, true);
+                            NULL, replication_done, bs, true, errp);
        break;
    default:
        aio_context_release(aio_context);
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -536,14 +536,12 @@ static SocketAddress *sd_socket_address(const char *path,
    SocketAddress *addr = g_new0(SocketAddress, 1);

    if (path) {
-        addr->type = SOCKET_ADDRESS_KIND_UNIX;
-        addr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
-        addr->u.q_unix.data->path = g_strdup(path);
+        addr->type = SOCKET_ADDRESS_TYPE_UNIX;
+        addr->u.q_unix.path = g_strdup(path);
    } else {
-        addr->type = SOCKET_ADDRESS_KIND_INET;
-        addr->u.inet.data = g_new0(InetSocketAddress, 1);
-        addr->u.inet.data->host = g_strdup(host ?: SD_DEFAULT_ADDR);
-        addr->u.inet.data->port = g_strdup(port ?: stringify(SD_DEFAULT_PORT));
+        addr->type = SOCKET_ADDRESS_TYPE_INET;
+        addr->u.inet.host = g_strdup(host ?: SD_DEFAULT_ADDR);
+        addr->u.inet.port = g_strdup(port ?: stringify(SD_DEFAULT_PORT));
    }

    return addr;
@@ -554,7 +552,6 @@ static SocketAddress *sd_server_config(QDict *options, Error **errp)
    QDict *server = NULL;
    QObject *crumpled_server = NULL;
    Visitor *iv = NULL;
-    SocketAddressFlat *saddr_flat = NULL;
    SocketAddress *saddr = NULL;
    Error *local_err = NULL;

@@ -574,16 +571,13 @@ static SocketAddress *sd_server_config(QDict *options, Error **errp)
     * visitor expects the former.
     */
    iv = qobject_input_visitor_new(crumpled_server);
-    visit_type_SocketAddressFlat(iv, NULL, &saddr_flat, &local_err);
+    visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto done;
    }

-    saddr = socket_address_crumple(saddr_flat);
-
 done:
-    qapi_free_SocketAddressFlat(saddr_flat);
    visit_free(iv);
    qobject_decref(crumpled_server);
    QDECREF(server);
@@ -595,9 +589,9 @@ static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)
 {
    int fd;

-    fd = socket_connect(s->addr, errp, NULL, NULL);
+    fd = socket_connect(s->addr, NULL, NULL, errp);

-    if (s->addr->type == SOCKET_ADDRESS_KIND_INET && fd >= 0) {
+    if (s->addr->type == SOCKET_ADDRESS_TYPE_INET && fd >= 0) {
        int ret = socket_set_nodelay(fd);
        if (ret < 0) {
            error_report("%s", strerror(errno));
@@ -2159,9 +2153,8 @@ static int64_t sd_getlength(BlockDriverState *bs)
    return s->inode.vdi_size;
 }

-static int sd_truncate(BlockDriverState *bs, int64_t offset)
+static int sd_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
 {
-    Error *local_err = NULL;
    BDRVSheepdogState *s = bs->opaque;
    int ret, fd;
    unsigned int datalen;
@@ -2169,16 +2162,15 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)

    max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
    if (offset < s->inode.vdi_size) {
-        error_report("shrinking is not supported");
+        error_setg(errp, "shrinking is not supported");
        return -EINVAL;
    } else if (offset > max_vdi_size) {
-        error_report("too big image size");
+        error_setg(errp, "too big image size");
        return -EINVAL;
    }

-    fd = connect_to_sdog(s, &local_err);
+    fd = connect_to_sdog(s, errp);
    if (fd < 0) {
-        error_report_err(local_err);
        return fd;
    }

@@ -2191,7 +2183,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
    close(fd);

    if (ret < 0) {
-        error_report("failed to update an inode.");
+        error_setg_errno(errp, -ret, "failed to update an inode");
    }

    return ret;
@@ -2456,7 +2448,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
    BDRVSheepdogState *s = bs->opaque;

    if (offset > s->inode.vdi_size) {
-        ret = sd_truncate(bs, offset);
+        ret = sd_truncate(bs, offset, NULL);
        if (ret < 0) {
            return ret;
        }
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -200,7 +200,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,

        qdict_extract_subqdict(options, &file_options, "file.");
        QDECREF(file_options);
-        qdict_put(options, "file", qstring_from_str(bdrv_get_node_name(file)));
+        qdict_put_str(options, "file", bdrv_get_node_name(file));

        drv->bdrv_close(bs);
        bdrv_unref_child(bs, bs->file);
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -227,24 +227,23 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
    }

    if(uri->user && strcmp(uri->user, "") != 0) {
-        qdict_put(options, "user", qstring_from_str(uri->user));
+        qdict_put_str(options, "user", uri->user);
    }

-    qdict_put(options, "server.host", qstring_from_str(uri->server));
+    qdict_put_str(options, "server.host", uri->server);

    port_str = g_strdup_printf("%d", uri->port ?: 22);
-    qdict_put(options, "server.port", qstring_from_str(port_str));
+    qdict_put_str(options, "server.port", port_str);
    g_free(port_str);

-    qdict_put(options, "path", qstring_from_str(uri->path));
+    qdict_put_str(options, "path", uri->path);

    /* Pick out any query parameters that we understand, and ignore
     * the rest.
     */
    for (i = 0; i < qp->n; ++i) {
        if (strcmp(qp->p[i].name, "host_key_check") == 0) {
-            qdict_put(options, "host_key_check",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "host_key_check", qp->p[i].value);
        }
    }

@@ -574,9 +573,8 @@ static bool ssh_process_legacy_socket_options(QDict *output_opts,
    }

    if (host) {
-        qdict_put(output_opts, "server.host", qstring_from_str(host));
-        qdict_put(output_opts, "server.port",
-                  qstring_from_str(port ?: stringify(22)));
+        qdict_put_str(output_opts, "server.host", host);
+        qdict_put_str(output_opts, "server.port", port ?: stringify(22));
    }

    return true;
@@ -681,7 +679,7 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
    }

    /* Open the socket and connect. */
-    s->sock = inet_connect_saddr(s->inet, errp, NULL, NULL);
+    s->sock = inet_connect_saddr(s->inet, NULL, NULL, errp);
    if (s->sock < 0) {
        ret = -EIO;
        goto err;
--- a/block/trace-events
+++ b/block/trace-events
@@ -110,3 +110,20 @@ qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s
 qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
 qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
 qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
+
+# block/vxhs.c
+vxhs_iio_callback(int error) "ctx is NULL: error %d"
+vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o %d, %d"
+vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno %d"
+vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d"
+vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %"PRIu64" offset = %"PRIu64" ACB = %p. Error = %d, errno = %d"
+vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl failed, ret = %d, errno = %d"
+vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat ioctl returned size %"PRIu64
+vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %"PRIu64
+vxhs_parse_uri_filename(const char *filename) "URI passed via bdrv_parse_filename %s"
+vxhs_open_vdiskid(const char *vdisk_id) "Opening vdisk-id %s"
+vxhs_open_hostinfo(char *of_vsa_addr, int port) "Adding host %s:%d to BDRVVXHSState"
+vxhs_open_iio_open(const char *host) "Failed to connect to storage agent on host %s"
+vxhs_parse_uri_hostinfo(char *host, int port) "Host: IP %s, Port %d"
+vxhs_close(char *vdisk_guid) "Closing vdisk %s"
+vxhs_get_creds(const char *cacert, const char *client_key, const char *client_cert) "cacert %s, client_key %s, client_cert %s"
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -55,7 +55,7 @@
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
-#include "migration/migration.h"
+#include "migration/blocker.h"
 #include "qemu/coroutine.h"
 #include "qemu/cutils.h"
 #include "qemu/uuid.h"
@@ -832,9 +832,9 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    if (image_type == VDI_TYPE_STATIC) {
-        ret = blk_truncate(blk, offset + blocks * block_size);
+        ret = blk_truncate(blk, offset + blocks * block_size, errp);
        if (ret < 0) {
-            error_setg(errp, "Failed to statically allocate %s", filename);
+            error_prepend(errp, "Failed to statically allocate %s", filename);
            goto exit;
        }
    }
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -548,7 +548,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
            if (new_file_size % (1024*1024)) {
                /* round up to nearest 1MB boundary */
                new_file_size = ((new_file_size >> 20) + 1) << 20;
-                bdrv_truncate(bs->file, new_file_size);
+                bdrv_truncate(bs->file, new_file_size, NULL);
            }
        }
        qemu_vfree(desc_entries);
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -24,7 +24,7 @@
 #include "qemu/crc32c.h"
 #include "qemu/bswap.h"
 #include "block/vhdx.h"
-#include "migration/migration.h"
+#include "migration/blocker.h"
 #include "qemu/uuid.h"

 /* Options for VHDX creation */
@@ -1171,7 +1171,7 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
    /* per the spec, the address for a block is in units of 1MB */
    *new_offset = ROUND_UP(*new_offset, 1024 * 1024);

-    return bdrv_truncate(bs->file, *new_offset + s->block_size);
+    return bdrv_truncate(bs->file, *new_offset + s->block_size, NULL);
 }

 /*
@@ -1586,7 +1586,7 @@ exit:
 static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
                           uint64_t image_size, VHDXImageType type,
                           bool use_zero_blocks, uint64_t file_offset,
-                           uint32_t length)
+                           uint32_t length, Error **errp)
 {
    int ret = 0;
    uint64_t data_file_offset;
@@ -1607,16 +1607,17 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
    if (type == VHDX_TYPE_DYNAMIC) {
        /* All zeroes, so we can just extend the file - the end of the BAT
         * is the furthest thing we have written yet */
-        ret = blk_truncate(blk, data_file_offset);
+        ret = blk_truncate(blk, data_file_offset, errp);
        if (ret < 0) {
            goto exit;
        }
    } else if (type == VHDX_TYPE_FIXED) {
-        ret = blk_truncate(blk, data_file_offset + image_size);
+        ret = blk_truncate(blk, data_file_offset + image_size, errp);
        if (ret < 0) {
            goto exit;
        }
    } else {
+        error_setg(errp, "Unsupported image type");
        ret = -ENOTSUP;
        goto exit;
    }
@@ -1627,6 +1628,7 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
        /* for a fixed file, the default BAT entry is not zero */
        s->bat = g_try_malloc0(length);
        if (length && s->bat == NULL) {
+            error_setg(errp, "Failed to allocate memory for the BAT");
            ret = -ENOMEM;
            goto exit;
        }
@@ -1646,6 +1648,7 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
        }
        ret = blk_pwrite(blk, file_offset, s->bat, length, 0);
        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to write the BAT");
            goto exit;
        }
    }
@@ -1671,7 +1674,8 @@ static int vhdx_create_new_region_table(BlockBackend *blk,
                                        uint32_t log_size,
                                        bool use_zero_blocks,
                                        VHDXImageType type,
-                                        uint64_t *metadata_offset)
+                                        uint64_t *metadata_offset,
+                                        Error **errp)
 {
    int ret = 0;
    uint32_t offset = 0;
@@ -1740,7 +1744,7 @@ static int vhdx_create_new_region_table(BlockBackend *blk,
    /* The region table gives us the data we need to create the BAT,
     * so do that now */
    ret = vhdx_create_bat(blk, s, image_size, type, use_zero_blocks,
-                          bat_file_offset, bat_length);
+                          bat_file_offset, bat_length, errp);
    if (ret < 0) {
        goto exit;
    }
@@ -1749,12 +1753,14 @@ static int vhdx_create_new_region_table(BlockBackend *blk,
    ret = blk_pwrite(blk, VHDX_REGION_TABLE_OFFSET, buffer,
                     VHDX_HEADER_BLOCK_SIZE, 0);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to write first region table");
        goto exit;
    }

    ret = blk_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, buffer,
                     VHDX_HEADER_BLOCK_SIZE, 0);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to write second region table");
        goto exit;
    }

@@ -1825,6 +1831,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
        ret = -ENOTSUP;
        goto exit;
    } else {
+        error_setg(errp, "Invalid subformat '%s'", type);
        ret = -EINVAL;
        goto exit;
    }
@@ -1879,12 +1886,14 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature),
                     0);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to write file signature");
        goto delete_and_exit;
    }
    if (creator) {
        ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
                         creator, creator_items * sizeof(gunichar2), 0);
        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to write creator field");
            goto delete_and_exit;
        }
    }
@@ -1893,13 +1902,14 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    /* Creates (B),(C) */
    ret = vhdx_create_new_headers(blk, image_size, log_size);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to write image headers");
        goto delete_and_exit;
    }

    /* Creates (D),(E),(G) explicitly. (F) created as by-product */
    ret = vhdx_create_new_region_table(blk, image_size, block_size, 512,
                                       log_size, use_zero_blocks, image_type,
-                                       &metadata_offset);
+                                       &metadata_offset, errp);
    if (ret < 0) {
        goto delete_and_exit;
    }
@@ -1908,6 +1918,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    ret = vhdx_create_new_metadata(blk, image_size, block_size, 512,
                                   metadata_offset, image_type);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to initialize metadata");
        goto delete_and_exit;
    }

--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -31,7 +31,7 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
-#include "migration/migration.h"
+#include "migration/blocker.h"
 #include "qemu/cutils.h"
 #include <zlib.h>

@@ -1714,10 +1714,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    blk_set_allow_write_beyond_eof(blk, true);

    if (flat) {
-        ret = blk_truncate(blk, filesize);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not truncate file");
-        }
+        ret = blk_truncate(blk, filesize, errp);
        goto exit;
    }
    magic = cpu_to_be32(VMDK4_MAGIC);
@@ -1780,9 +1777,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
        goto exit;
    }

-    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9);
+    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, errp);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not truncate file");
        goto exit;
    }

@@ -2090,10 +2086,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    /* bdrv_pwrite write padding zeros to align to sector, we don't need that
     * for description file */
    if (desc_offset == 0) {
-        ret = blk_truncate(new_blk, desc_len);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not truncate file");
-        }
+        ret = blk_truncate(new_blk, desc_len, errp);
    }
 exit:
    if (new_blk) {
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -28,7 +28,7 @@
 #include "block/block_int.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
-#include "migration/migration.h"
+#include "migration/blocker.h"
 #include "qemu/bswap.h"
 #include "qemu/uuid.h"

@@ -851,20 +851,21 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
 }

 static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
-                             int64_t total_size)
+                             int64_t total_size, Error **errp)
 {
    int ret;

    /* Add footer to total size */
    total_size += HEADER_SIZE;

-    ret = blk_truncate(blk, total_size);
+    ret = blk_truncate(blk, total_size, errp);
    if (ret < 0) {
        return ret;
    }

    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Unable to write VHD header");
        return ret;
    }

@@ -996,11 +997,11 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)

    if (disk_type == VHD_DYNAMIC) {
        ret = create_dynamic_disk(blk, buf, total_sectors);
+        if (ret < 0) {
+            error_setg(errp, "Unable to create or write VHD header");
+        }
    } else {
-        ret = create_fixed_disk(blk, buf, total_size);
-    }
-    if (ret < 0) {
-        error_setg(errp, "Unable to create or write VHD header");
+        ret = create_fixed_disk(blk, buf, total_size, errp);
    }

 out:
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -28,7 +28,7 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
-#include "migration/migration.h"
+#include "migration/blocker.h"
 #include "qapi/qmp/qint.h"
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qstring.h"
@@ -1057,10 +1057,10 @@ static void vvfat_parse_filename(const char *filename, QDict *options,
    }

    /* Fill in the options QDict */
-    qdict_put(options, "dir", qstring_from_str(filename));
-    qdict_put(options, "fat-type", qint_from_int(fat_type));
-    qdict_put(options, "floppy", qbool_from_bool(floppy));
-    qdict_put(options, "rw", qbool_from_bool(rw));
+    qdict_put_str(options, "dir", filename);
+    qdict_put_int(options, "fat-type", fat_type);
+    qdict_put_bool(options, "floppy", floppy);
+    qdict_put_bool(options, "rw", rw);
 }

 static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
@@ -1156,8 +1156,6 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,

    s->current_cluster=0xffffffff;

-    /* read only is the default for safety */
-    bs->read_only = true;
    s->qcow = NULL;
    s->qcow_filename = NULL;
    s->fat2 = NULL;
@@ -1169,11 +1167,24 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
    s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);

    if (qemu_opt_get_bool(opts, "rw", false)) {
-        ret = enable_write_target(bs, errp);
-        if (ret < 0) {
+        if (!bdrv_is_read_only(bs)) {
+            ret = enable_write_target(bs, errp);
+            if (ret < 0) {
+                goto fail;
+            }
+        } else {
+            ret = -EPERM;
+            error_setg(errp,
+                       "Unable to set VVFAT to 'rw' when drive is read-only");
+            goto fail;
+        }
+    } else  {
+        /* read only is the default for safety */
+        ret = bdrv_set_read_only(bs, true, &local_err);
+        if (ret < 0) {
+            error_propagate(errp, local_err);
            goto fail;
        }
-        bs->read_only = false;
    }

    bs->total_sectors = cyls * heads * secs;
@@ -3040,7 +3051,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
    }

    options = qdict_new();
-    qdict_put(options, "write-target.driver", qstring_from_str("qcow"));
+    qdict_put_str(options, "write-target.driver", "qcow");
    s->qcow = bdrv_open_child(s->qcow_filename, options, "write-target", bs,
                              &child_vvfat_qcow, false, errp);
    QDECREF(options);
--- a/block/vxhs.c
+++ b/block/vxhs.c
@@ -0,0 +1,575 @@
+/*
+ * QEMU Block driver for Veritas HyperScale (VxHS)
+ *
+ * Copyright (c) 2017 Veritas Technologies LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <qnio/qnio_api.h>
+#include <sys/param.h>
+#include "block/block_int.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "trace.h"
+#include "qemu/uri.h"
+#include "qapi/error.h"
+#include "qemu/uuid.h"
+#include "crypto/tlscredsx509.h"
+
+#define VXHS_OPT_FILENAME           "filename"
+#define VXHS_OPT_VDISK_ID           "vdisk-id"
+#define VXHS_OPT_SERVER             "server"
+#define VXHS_OPT_HOST               "host"
+#define VXHS_OPT_PORT               "port"
+
+/* Only accessed under QEMU global mutex */
+static uint32_t vxhs_ref;
+
+typedef enum {
+    VDISK_AIO_READ,
+    VDISK_AIO_WRITE,
+} VDISKAIOCmd;
+
+/*
+ * HyperScale AIO callbacks structure
+ */
+typedef struct VXHSAIOCB {
+    BlockAIOCB common;
+    int err;
+} VXHSAIOCB;
+
+typedef struct VXHSvDiskHostsInfo {
+    void *dev_handle; /* Device handle */
+    char *host; /* Host name or IP */
+    int port; /* Host's port number */
+} VXHSvDiskHostsInfo;
+
+/*
+ * Structure per vDisk maintained for state
+ */
+typedef struct BDRVVXHSState {
+    VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */
+    char *vdisk_guid;
+    char *tlscredsid; /* tlscredsid */
+} BDRVVXHSState;
+
+static void vxhs_complete_aio_bh(void *opaque)
+{
+    VXHSAIOCB *acb = opaque;
+    BlockCompletionFunc *cb = acb->common.cb;
+    void *cb_opaque = acb->common.opaque;
+    int ret = 0;
+
+    if (acb->err != 0) {
+        trace_vxhs_complete_aio(acb, acb->err);
+        ret = (-EIO);
+    }
+
+    qemu_aio_unref(acb);
+    cb(cb_opaque, ret);
+}
+
+/*
+ * Called from a libqnio thread
+ */
+static void vxhs_iio_callback(void *ctx, uint32_t opcode, uint32_t error)
+{
+    VXHSAIOCB *acb = NULL;
+
+    switch (opcode) {
+    case IRP_READ_REQUEST:
+    case IRP_WRITE_REQUEST:
+
+        /*
+         * ctx is VXHSAIOCB*
+         * ctx is NULL if error is QNIOERROR_CHANNEL_HUP
+         */
+        if (ctx) {
+            acb = ctx;
+        } else {
+            trace_vxhs_iio_callback(error);
+            goto out;
+        }
+
+        if (error) {
+            if (!acb->err) {
+                acb->err = error;
+            }
+            trace_vxhs_iio_callback(error);
+        }
+
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
+                                vxhs_complete_aio_bh, acb);
+        break;
+
+    default:
+        if (error == QNIOERROR_HUP) {
+            /*
+             * Channel failed, spontaneous notification,
+             * not in response to I/O
+             */
+            trace_vxhs_iio_callback_chnfail(error, errno);
+        } else {
+            trace_vxhs_iio_callback_unknwn(opcode, error);
+        }
+        break;
+    }
+out:
+    return;
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "vxhs",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = VXHS_OPT_FILENAME,
+            .type = QEMU_OPT_STRING,
+            .help = "URI to the Veritas HyperScale image",
+        },
+        {
+            .name = VXHS_OPT_VDISK_ID,
+            .type = QEMU_OPT_STRING,
+            .help = "UUID of the VxHS vdisk",
+        },
+        {
+            .name = "tls-creds",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of the TLS/SSL credentials to use",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList runtime_tcp_opts = {
+    .name = "vxhs_tcp",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
+    .desc = {
+        {
+            .name = VXHS_OPT_HOST,
+            .type = QEMU_OPT_STRING,
+            .help = "host address (ipv4 addresses)",
+        },
+        {
+            .name = VXHS_OPT_PORT,
+            .type = QEMU_OPT_NUMBER,
+            .help = "port number on which VxHSD is listening (default 9999)",
+            .def_value_str = "9999"
+        },
+        { /* end of list */ }
+    },
+};
+
+/*
+ * Parse incoming URI and populate *options with the host
+ * and device information
+ */
+static int vxhs_parse_uri(const char *filename, QDict *options)
+{
+    URI *uri = NULL;
+    char *port;
+    int ret = 0;
+
+    trace_vxhs_parse_uri_filename(filename);
+    uri = uri_parse(filename);
+    if (!uri || !uri->server || !uri->path) {
+        uri_free(uri);
+        return -EINVAL;
+    }
+
+    qdict_put_str(options, VXHS_OPT_SERVER ".host", uri->server);
+
+    if (uri->port) {
+        port = g_strdup_printf("%d", uri->port);
+        qdict_put_str(options, VXHS_OPT_SERVER ".port", port);
+        g_free(port);
+    }
+
+    qdict_put_str(options, "vdisk-id", uri->path);
+
+    trace_vxhs_parse_uri_hostinfo(uri->server, uri->port);
+    uri_free(uri);
+
+    return ret;
+}
+
+static void vxhs_parse_filename(const char *filename, QDict *options,
+                                Error **errp)
+{
+    if (qdict_haskey(options, "vdisk-id") || qdict_haskey(options, "server")) {
+        error_setg(errp, "vdisk-id/server and a file name may not be specified "
+                         "at the same time");
+        return;
+    }
+
+    if (strstr(filename, "://")) {
+        int ret = vxhs_parse_uri(filename, options);
+        if (ret < 0) {
+            error_setg(errp, "Invalid URI. URI should be of the form "
+                       "  vxhs://<host_ip>:<port>/<vdisk-id>");
+        }
+    }
+}
+
+static int vxhs_init_and_ref(void)
+{
+    if (vxhs_ref++ == 0) {
+        if (iio_init(QNIO_VERSION, vxhs_iio_callback)) {
+            return -ENODEV;
+        }
+    }
+    return 0;
+}
+
+static void vxhs_unref(void)
+{
+    if (--vxhs_ref == 0) {
+        iio_fini();
+    }
+}
+
+static void vxhs_get_tls_creds(const char *id, char **cacert,
+                               char **key, char **cert, Error **errp)
+{
+    Object *obj;
+    QCryptoTLSCreds *creds;
+    QCryptoTLSCredsX509 *creds_x509;
+
+    obj = object_resolve_path_component(
+        object_get_objects_root(), id);
+
+    if (!obj) {
+        error_setg(errp, "No TLS credentials with id '%s'",
+                   id);
+        return;
+    }
+
+    creds_x509 = (QCryptoTLSCredsX509 *)
+        object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS_X509);
+
+    if (!creds_x509) {
+        error_setg(errp, "Object with id '%s' is not TLS credentials",
+                   id);
+        return;
+    }
+
+    creds = &creds_x509->parent_obj;
+
+    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
+        error_setg(errp,
+                   "Expecting TLS credentials with a client endpoint");
+        return;
+    }
+
+    /*
+     * Get the cacert, client_cert and client_key file names.
+     */
+    if (!creds->dir) {
+        error_setg(errp, "TLS object missing 'dir' property value");
+        return;
+    }
+
+    *cacert = g_strdup_printf("%s/%s", creds->dir,
+                              QCRYPTO_TLS_CREDS_X509_CA_CERT);
+    *cert = g_strdup_printf("%s/%s", creds->dir,
+                            QCRYPTO_TLS_CREDS_X509_CLIENT_CERT);
+    *key = g_strdup_printf("%s/%s", creds->dir,
+                           QCRYPTO_TLS_CREDS_X509_CLIENT_KEY);
+}
+
+static int vxhs_open(BlockDriverState *bs, QDict *options,
+                     int bdrv_flags, Error **errp)
+{
+    BDRVVXHSState *s = bs->opaque;
+    void *dev_handlep;
+    QDict *backing_options = NULL;
+    QemuOpts *opts = NULL;
+    QemuOpts *tcp_opts = NULL;
+    char *of_vsa_addr = NULL;
+    Error *local_err = NULL;
+    const char *vdisk_id_opt;
+    const char *server_host_opt;
+    int ret = 0;
+    char *cacert = NULL;
+    char *client_key = NULL;
+    char *client_cert = NULL;
+
+    ret = vxhs_init_and_ref();
+    if (ret < 0) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* Create opts info from runtime_opts and runtime_tcp_opts list */
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
+
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* vdisk-id is the disk UUID */
+    vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID);
+    if (!vdisk_id_opt) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* vdisk-id may contain a leading '/' */
+    if (strlen(vdisk_id_opt) > UUID_FMT_LEN + 1) {
+        error_setg(&local_err, "vdisk-id cannot be more than %d characters",
+                   UUID_FMT_LEN);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    s->vdisk_guid = g_strdup(vdisk_id_opt);
+    trace_vxhs_open_vdiskid(vdisk_id_opt);
+
+    /* get the 'server.' arguments */
+    qdict_extract_subqdict(options, &backing_options, VXHS_OPT_SERVER".");
+
+    qemu_opts_absorb_qdict(tcp_opts, backing_options, &local_err);
+    if (local_err != NULL) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST);
+    if (!server_host_opt) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER,
+                   VXHS_OPT_SERVER"."VXHS_OPT_HOST);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (strlen(server_host_opt) > MAXHOSTNAMELEN) {
+        error_setg(&local_err, "server.host cannot be more than %d characters",
+                   MAXHOSTNAMELEN);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* check if we got tls-creds via the --object argument */
+    s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
+    if (s->tlscredsid) {
+        vxhs_get_tls_creds(s->tlscredsid, &cacert, &client_key,
+                           &client_cert, &local_err);
+        if (local_err != NULL) {
+            ret = -EINVAL;
+            goto out;
+        }
+        trace_vxhs_get_creds(cacert, client_key, client_cert);
+    }
+
+    s->vdisk_hostinfo.host = g_strdup(server_host_opt);
+    s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts,
+                                                          VXHS_OPT_PORT),
+                                                          NULL, 0);
+
+    trace_vxhs_open_hostinfo(s->vdisk_hostinfo.host,
+                             s->vdisk_hostinfo.port);
+
+    of_vsa_addr = g_strdup_printf("of://%s:%d",
+                                  s->vdisk_hostinfo.host,
+                                  s->vdisk_hostinfo.port);
+
+    /*
+     * Open qnio channel to storage agent if not opened before
+     */
+    dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0,
+                           cacert, client_key, client_cert);
+    if (dev_handlep == NULL) {
+        trace_vxhs_open_iio_open(of_vsa_addr);
+        ret = -ENODEV;
+        goto out;
+    }
+    s->vdisk_hostinfo.dev_handle = dev_handlep;
+
+out:
+    g_free(of_vsa_addr);
+    QDECREF(backing_options);
+    qemu_opts_del(tcp_opts);
+    qemu_opts_del(opts);
+    g_free(cacert);
+    g_free(client_key);
+    g_free(client_cert);
+
+    if (ret < 0) {
+        vxhs_unref();
+        error_propagate(errp, local_err);
+        g_free(s->vdisk_hostinfo.host);
+        g_free(s->vdisk_guid);
+        g_free(s->tlscredsid);
+        s->vdisk_guid = NULL;
+    }
+
+    return ret;
+}
+
+static const AIOCBInfo vxhs_aiocb_info = {
+    .aiocb_size = sizeof(VXHSAIOCB)
+};
+
+/*
+ * This allocates QEMU-VXHS callback for each IO
+ * and is passed to QNIO. When QNIO completes the work,
+ * it will be passed back through the callback.
+ */
+static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
+                               QEMUIOVector *qiov, int nb_sectors,
+                               BlockCompletionFunc *cb, void *opaque,
+                               VDISKAIOCmd iodir)
+{
+    VXHSAIOCB *acb = NULL;
+    BDRVVXHSState *s = bs->opaque;
+    size_t size;
+    uint64_t offset;
+    int iio_flags = 0;
+    int ret = 0;
+    void *dev_handle = s->vdisk_hostinfo.dev_handle;
+
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque);
+
+    /*
+     * Initialize VXHSAIOCB.
+     */
+    acb->err = 0;
+
+    iio_flags = IIO_FLAG_ASYNC;
+
+    switch (iodir) {
+    case VDISK_AIO_WRITE:
+            ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov,
+                             offset, (uint64_t)size, iio_flags);
+            break;
+    case VDISK_AIO_READ:
+            ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov,
+                            offset, (uint64_t)size, iio_flags);
+            break;
+    default:
+            trace_vxhs_aio_rw_invalid(iodir);
+            goto errout;
+    }
+
+    if (ret != 0) {
+        trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset,
+                                acb, ret, errno);
+        goto errout;
+    }
+    return &acb->common;
+
+errout:
+    qemu_aio_unref(acb);
+    return NULL;
+}
+
+static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs,
+                                   int64_t sector_num, QEMUIOVector *qiov,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb, void *opaque)
+{
+    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
+                       opaque, VDISK_AIO_READ);
+}
+
+static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs,
+                                   int64_t sector_num, QEMUIOVector *qiov,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb, void *opaque)
+{
+    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors,
+                       cb, opaque, VDISK_AIO_WRITE);
+}
+
+static void vxhs_close(BlockDriverState *bs)
+{
+    BDRVVXHSState *s = bs->opaque;
+
+    trace_vxhs_close(s->vdisk_guid);
+
+    g_free(s->vdisk_guid);
+    s->vdisk_guid = NULL;
+
+    /*
+     * Close vDisk device
+     */
+    if (s->vdisk_hostinfo.dev_handle) {
+        iio_close(s->vdisk_hostinfo.dev_handle);
+        s->vdisk_hostinfo.dev_handle = NULL;
+    }
+
+    vxhs_unref();
+
+    /*
+     * Free the dynamically allocated host string etc
+     */
+    g_free(s->vdisk_hostinfo.host);
+    g_free(s->tlscredsid);
+    s->tlscredsid = NULL;
+    s->vdisk_hostinfo.host = NULL;
+    s->vdisk_hostinfo.port = 0;
+}
+
+static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s)
+{
+    int64_t vdisk_size = -1;
+    int ret = 0;
+    void *dev_handle = s->vdisk_hostinfo.dev_handle;
+
+    ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0);
+    if (ret < 0) {
+        trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno);
+        return -EIO;
+    }
+
+    trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size);
+    return vdisk_size;
+}
+
+/*
+ * Returns the size of vDisk in bytes. This is required
+ * by QEMU block upper block layer so that it is visible
+ * to guest.
+ */
+static int64_t vxhs_getlength(BlockDriverState *bs)
+{
+    BDRVVXHSState *s = bs->opaque;
+    int64_t vdisk_size;
+
+    vdisk_size = vxhs_get_vdisk_stat(s);
+    if (vdisk_size < 0) {
+        return -EIO;
+    }
+
+    return vdisk_size;
+}
+
+static BlockDriver bdrv_vxhs = {
+    .format_name                  = "vxhs",
+    .protocol_name                = "vxhs",
+    .instance_size                = sizeof(BDRVVXHSState),
+    .bdrv_file_open               = vxhs_open,
+    .bdrv_parse_filename          = vxhs_parse_filename,
+    .bdrv_close                   = vxhs_close,
+    .bdrv_getlength               = vxhs_getlength,
+    .bdrv_aio_readv               = vxhs_aio_readv,
+    .bdrv_aio_writev              = vxhs_aio_writev,
+};
+
+static void bdrv_vxhs_init(void)
+{
+    bdrv_register(&bdrv_vxhs);
+}
+
+block_init(bdrv_vxhs_init);
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -99,9 +99,8 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
 }


-void qmp_nbd_server_start(SocketAddress *addr,
-                          bool has_tls_creds, const char *tls_creds,
-                          Error **errp)
+void nbd_server_start(SocketAddress *addr, const char *tls_creds,
+                      Error **errp)
 {
    if (nbd_server) {
        error_setg(errp, "NBD server already running");
@@ -118,14 +117,14 @@ void qmp_nbd_server_start(SocketAddress *addr,
        goto error;
    }

-    if (has_tls_creds) {
+    if (tls_creds) {
        nbd_server->tlscreds = nbd_get_tls_creds(tls_creds, errp);
        if (!nbd_server->tlscreds) {
            goto error;
        }

-        /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */
-        if (addr->type != SOCKET_ADDRESS_KIND_INET) {
+        /* TODO SOCKET_ADDRESS_TYPE_FD where fd has AF_INET or AF_INET6 */
+        if (addr->type != SOCKET_ADDRESS_TYPE_INET) {
            error_setg(errp, "TLS is only supported with IPv4/IPv6");
            goto error;
        }
@@ -145,6 +144,16 @@ void qmp_nbd_server_start(SocketAddress *addr,
    nbd_server = NULL;
 }

+void qmp_nbd_server_start(SocketAddressLegacy *addr,
+                          bool has_tls_creds, const char *tls_creds,
+                          Error **errp)
+{
+    SocketAddress *addr_flat = socket_address_flatten(addr);
+
+    nbd_server_start(addr_flat, tls_creds, errp);
+    qapi_free_SocketAddress(addr_flat);
+}
+
 void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
                        Error **errp)
 {
--- a/blockdev.c
+++ b/blockdev.c
@@ -527,7 +527,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
            error_setg(errp, "Cannot specify both 'driver' and 'format'");
            goto early_err;
        }
-        qdict_put(bs_opts, "driver", qstring_from_str(buf));
+        qdict_put_str(bs_opts, "driver", buf);
    }

    on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
@@ -903,10 +903,8 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
        copy_on_read = false;
    }

-    qdict_put(bs_opts, BDRV_OPT_READ_ONLY,
-              qstring_from_str(read_only ? "on" : "off"));
-    qdict_put(bs_opts, "copy-on-read",
-              qstring_from_str(copy_on_read ? "on" :"off"));
+    qdict_put_str(bs_opts, BDRV_OPT_READ_ONLY, read_only ? "on" : "off");
+    qdict_put_str(bs_opts, "copy-on-read", copy_on_read ? "on" : "off");

    /* Controller type */
    value = qemu_opt_get(legacy_opts, "if");
@@ -1030,7 +1028,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
            new_id = g_strdup_printf("%s%s%i", if_name[type],
                                     mediastr, unit_id);
        }
-        qdict_put(bs_opts, "id", qstring_from_str(new_id));
+        qdict_put_str(bs_opts, "id", new_id);
        g_free(new_id);
    }

@@ -1067,7 +1065,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
            error_report("werror is not supported by this bus type");
            goto fail;
        }
-        qdict_put(bs_opts, "werror", qstring_from_str(werror));
+        qdict_put_str(bs_opts, "werror", werror);
    }

    rerror = qemu_opt_get(legacy_opts, "rerror");
@@ -1077,7 +1075,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
            error_report("rerror is not supported by this bus type");
            goto fail;
        }
-        qdict_put(bs_opts, "rerror", qstring_from_str(rerror));
+        qdict_put_str(bs_opts, "rerror", rerror);
    }

    /* Actual block device init: Functionality shared with blockdev-add */
@@ -1728,7 +1726,7 @@ static void external_snapshot_prepare(BlkActionState *common,
            bdrv_img_create(new_image_file, format,
                            state->old_bs->filename,
                            state->old_bs->drv->format_name,
-                            NULL, size, flags, &local_err, false);
+                            NULL, size, flags, false, &local_err);
            if (local_err) {
                error_propagate(errp, local_err);
                return;
@@ -1737,10 +1735,9 @@ static void external_snapshot_prepare(BlkActionState *common,

        options = qdict_new();
        if (s->has_snapshot_node_name) {
-            qdict_put(options, "node-name",
-                      qstring_from_str(snapshot_node_name));
+            qdict_put_str(options, "node-name", snapshot_node_name);
        }
-        qdict_put(options, "driver", qstring_from_str(format));
+        qdict_put_str(options, "driver", format);

        flags |= BDRV_O_NO_BACKING;
    }
@@ -2579,11 +2576,10 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,

    options = qdict_new();
    detect_zeroes = blk_get_detect_zeroes_from_root_state(blk);
-    qdict_put(options, "detect-zeroes",
-              qstring_from_str(detect_zeroes ? "on" : "off"));
+    qdict_put_str(options, "detect-zeroes", detect_zeroes ? "on" : "off");

    if (has_format) {
-        qdict_put(options, "driver", qstring_from_str(format));
+        qdict_put_str(options, "driver", format);
    }

    medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp);
@@ -2927,29 +2923,9 @@ void qmp_block_resize(bool has_device, const char *device,
        goto out;
    }

-    /* complete all in-flight operations before resizing the device */
-    bdrv_drain_all();
-
-    ret = blk_truncate(blk, size);
-    switch (ret) {
-    case 0:
-        break;
-    case -ENOMEDIUM:
-        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
-        break;
-    case -ENOTSUP:
-        error_setg(errp, QERR_UNSUPPORTED);
-        break;
-    case -EACCES:
-        error_setg(errp, "Device '%s' is read only", device);
-        break;
-    case -EBUSY:
-        error_setg(errp, QERR_DEVICE_IN_USE, device);
-        break;
-    default:
-        error_setg_errno(errp, -ret, "Could not resize");
-        break;
-    }
+    bdrv_drained_begin(bs);
+    ret = blk_truncate(blk, size, errp);
+    bdrv_drained_end(bs);

 out:
    blk_unref(blk);
@@ -3142,7 +3118,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
        }
        commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
                            BLOCK_JOB_DEFAULT, speed, on_error,
-                            filter_node_name, NULL, NULL, &local_err, false);
+                            filter_node_name, NULL, NULL, false, &local_err);
    } else {
        BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
        if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
@@ -3174,6 +3150,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
    Error *local_err = NULL;
    int flags;
    int64_t size;
+    bool set_backing_hd = false;

    if (!backup->has_speed) {
        backup->speed = 0;
@@ -3224,6 +3201,8 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
    }
    if (backup->sync == MIRROR_SYNC_MODE_NONE) {
        source = bs;
+        flags |= BDRV_O_NO_BACKING;
+        set_backing_hd = true;
    }

    size = bdrv_getlength(bs);
@@ -3237,10 +3216,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
        if (source) {
            bdrv_img_create(backup->target, backup->format, source->filename,
                            source->drv->format_name, NULL,
-                            size, flags, &local_err, false);
+                            size, flags, false, &local_err);
        } else {
            bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL,
-                            size, flags, &local_err, false);
+                            size, flags, false, &local_err);
        }
    }

@@ -3250,8 +3229,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
    }

    if (backup->format) {
-        options = qdict_new();
-        qdict_put(options, "driver", qstring_from_str(backup->format));
+        if (!options) {
+            options = qdict_new();
+        }
+        qdict_put_str(options, "driver", backup->format);
    }

    target_bs = bdrv_open(backup->target, NULL, options, flags, errp);
@@ -3261,6 +3242,14 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,

    bdrv_set_aio_context(target_bs, aio_context);

+    if (set_backing_hd) {
+        bdrv_set_backing_hd(target_bs, source, &local_err);
+        if (local_err) {
+            bdrv_unref(target_bs);
+            goto out;
+        }
+    }
+
    if (backup->has_bitmap) {
        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
        if (!bmap) {
@@ -3531,7 +3520,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
        /* create new image w/o backing file */
        assert(format);
        bdrv_img_create(arg->target, format,
-                        NULL, NULL, NULL, size, flags, &local_err, false);
+                        NULL, NULL, NULL, size, flags, false, &local_err);
    } else {
        switch (arg->mode) {
        case NEW_IMAGE_MODE_EXISTING:
@@ -3541,7 +3530,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
            bdrv_img_create(arg->target, format,
                            source->filename,
                            source->drv->format_name,
-                            NULL, size, flags, &local_err, false);
+                            NULL, size, flags, false, &local_err);
            break;
        default:
            abort();
@@ -3555,10 +3544,10 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)

    options = qdict_new();
    if (arg->has_node_name) {
-        qdict_put(options, "node-name", qstring_from_str(arg->node_name));
+        qdict_put_str(options, "node-name", arg->node_name);
    }
    if (format) {
-        qdict_put(options, "driver", qstring_from_str(format));
+        qdict_put_str(options, "driver", format);
    }

    /* Mirroring takes care of copy-on-write using the source's backing
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -744,10 +744,7 @@ int main(int argc, char **argv)
    qemu_init_cpu_list();
    module_call_init(MODULE_INIT_QOM);

-    if ((envlist = envlist_create()) == NULL) {
-        (void) fprintf(stderr, "Unable to allocate envlist\n");
-        exit(1);
-    }
+    envlist = envlist_create();

    /* add current environment into the list */
    for (wrk = environ; *wrk != NULL; wrk++) {
@@ -785,10 +782,7 @@ int main(int argc, char **argv)
                usage();
        } else if (!strcmp(r, "ignore-environment")) {
            envlist_free(envlist);
-            if ((envlist = envlist_create()) == NULL) {
-                (void) fprintf(stderr, "Unable to allocate envlist\n");
-                exit(1);
-            }
+            envlist = envlist_create();
        } else if (!strcmp(r, "U")) {
            r = argv[optind++];
            if (envlist_unsetenv(envlist, r) != 0)
@@ -956,10 +950,10 @@ int main(int argc, char **argv)
    }

    for (wrk = target_environ; *wrk; wrk++) {
-        free(*wrk);
+        g_free(*wrk);
    }

-    free(target_environ);
+    g_free(target_environ);

    if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
        qemu_log("guest_base  0x%lx\n", guest_base);
--- a/chardev/char-fd.c
+++ b/chardev/char-fd.c
@@ -58,7 +58,7 @@ static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
    ret = qio_channel_read(
        chan, (gchar *)buf, len, NULL);
    if (ret == 0) {
-        remove_fd_in_watch(chr, NULL);
+        remove_fd_in_watch(chr);
        qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
        return FALSE;
    }
@@ -89,9 +89,9 @@ static void fd_chr_update_read_handler(Chardev *chr,
 {
    FDChardev *s = FD_CHARDEV(chr);

-    remove_fd_in_watch(chr, NULL);
+    remove_fd_in_watch(chr);
    if (s->ioc_in) {
-        chr->fd_in_tag = io_add_watch_poll(chr, s->ioc_in,
+        chr->gsource = io_add_watch_poll(chr, s->ioc_in,
                                           fd_chr_read_poll,
                                           fd_chr_read, chr,
                                           context);
@@ -103,7 +103,7 @@ static void char_fd_finalize(Object *obj)
    Chardev *chr = CHARDEV(obj);
    FDChardev *s = FD_CHARDEV(obj);

-    remove_fd_in_watch(chr, NULL);
+    remove_fd_in_watch(chr);
    if (s->ioc_in) {
        object_unref(OBJECT(s->ioc_in));
    }
--- a/chardev/char-io.c
+++ b/chardev/char-io.c
@@ -98,7 +98,7 @@ static GSourceFuncs io_watch_poll_funcs = {
    .finalize = io_watch_poll_finalize,
 };

-guint io_add_watch_poll(Chardev *chr,
+GSource *io_add_watch_poll(Chardev *chr,
                        QIOChannel *ioc,
                        IOCanReadHandler *fd_can_read,
                        QIOChannelFunc fd_read,
@@ -106,7 +106,6 @@ guint io_add_watch_poll(Chardev *chr,
                        GMainContext *context)
 {
    IOWatchPoll *iwp;
-    int tag;
    char *name;

    iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs,
@@ -122,21 +121,15 @@ guint io_add_watch_poll(Chardev *chr,
    g_source_set_name((GSource *)iwp, name);
    g_free(name);

-    tag = g_source_attach(&iwp->parent, context);
+    g_source_attach(&iwp->parent, context);
    g_source_unref(&iwp->parent);
-    return tag;
+    return (GSource *)iwp;
 }

-static void io_remove_watch_poll(guint tag, GMainContext *context)
+static void io_remove_watch_poll(GSource *source)
 {
-    GSource *source;
    IOWatchPoll *iwp;

-    g_return_if_fail(tag > 0);
-
-    source = g_main_context_find_source_by_id(context, tag);
-    g_return_if_fail(source != NULL);
-
    iwp = io_watch_poll_from_source(source);
    if (iwp->src) {
        g_source_destroy(iwp->src);
@@ -146,11 +139,11 @@ static void io_remove_watch_poll(guint tag, GMainContext *context)
    g_source_destroy(&iwp->parent);
 }

-void remove_fd_in_watch(Chardev *chr, GMainContext *context)
+void remove_fd_in_watch(Chardev *chr)
 {
-    if (chr->fd_in_tag) {
-        io_remove_watch_poll(chr->fd_in_tag, context);
-        chr->fd_in_tag = 0;
+    if (chr->gsource) {
+        io_remove_watch_poll(chr->gsource);
+        chr->gsource = NULL;
    }
 }

--- a/chardev/char-io.h
+++ b/chardev/char-io.h
@@ -29,14 +29,14 @@
 #include "sysemu/char.h"

 /* Can only be used for read */
-guint io_add_watch_poll(Chardev *chr,
+GSource *io_add_watch_poll(Chardev *chr,
                        QIOChannel *ioc,
                        IOCanReadHandler *fd_can_read,
                        QIOChannelFunc fd_read,
                        gpointer user_data,
                        GMainContext *context);

-void remove_fd_in_watch(Chardev *chr, GMainContext *context);
+void remove_fd_in_watch(Chardev *chr);

 int io_channel_send(QIOChannel *ioc, const void *buf, size_t len);

--- a/chardev/char-mux.c
+++ b/chardev/char-mux.c
@@ -114,7 +114,7 @@ static void mux_print_help(Chardev *chr)
    }
 }

-void mux_chr_send_event(MuxChardev *d, int mux_nr, int event)
+static void mux_chr_send_event(MuxChardev *d, int mux_nr, int event)
 {
    CharBackend *be = d->backends[mux_nr];

@@ -222,9 +222,9 @@ static void mux_chr_read(void *opaque, const uint8_t *buf, int size)

 bool muxes_realized;

-static void mux_chr_event(void *opaque, int event)
+void mux_chr_send_all_event(Chardev *chr, int event)
 {
-    MuxChardev *d = MUX_CHARDEV(opaque);
+    MuxChardev *d = MUX_CHARDEV(chr);
    int i;

    if (!muxes_realized) {
@@ -237,6 +237,11 @@ static void mux_chr_event(void *opaque, int event)
    }
 }

+static void mux_chr_event(void *opaque, int event)
+{
+    mux_chr_send_all_event(CHARDEV(opaque), event);
+}
+
 static GSource *mux_chr_add_watch(Chardev *s, GIOCondition cond)
 {
    MuxChardev *d = MUX_CHARDEV(s);
--- a/chardev/char-mux.h
+++ b/chardev/char-mux.h
@@ -58,6 +58,6 @@ typedef struct MuxChardev {

 void mux_chr_set_handlers(Chardev *chr, GMainContext *context);
 void mux_set_focus(Chardev *chr, int focus);
-void mux_chr_send_event(MuxChardev *d, int mux_nr, int event);
+void mux_chr_send_all_event(Chardev *chr, int event);

 #endif /* CHAR_MUX_H */
--- a/chardev/char-pty.c
+++ b/chardev/char-pty.c
@@ -185,7 +185,7 @@ static gboolean qemu_chr_be_generic_open_func(gpointer opaque)
    PtyChardev *s = PTY_CHARDEV(opaque);

    s->open_tag = 0;
-    qemu_chr_be_generic_open(chr);
+    qemu_chr_be_event(chr, CHR_EVENT_OPENED);
    return FALSE;
 }

@@ -199,7 +199,7 @@ static void pty_chr_state(Chardev *chr, int connected)
            g_source_remove(s->open_tag);
            s->open_tag = 0;
        }
-        remove_fd_in_watch(chr, NULL);
+        remove_fd_in_watch(chr);
        s->connected = 0;
        /* (re-)connect poll interval for idle guests: once per second.
         * We check more frequently in case the guests sends data to
@@ -215,8 +215,8 @@ static void pty_chr_state(Chardev *chr, int connected)
            s->connected = 1;
            s->open_tag = g_idle_add(qemu_chr_be_generic_open_func, chr);
        }
-        if (!chr->fd_in_tag) {
-            chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
+        if (!chr->gsource) {
+            chr->gsource = io_add_watch_poll(chr, s->ioc,
                                               pty_chr_read_poll,
                                               pty_chr_read,
                                               chr, NULL);
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -55,6 +55,7 @@ typedef struct {
    SocketAddress *addr;
    bool is_listen;
    bool is_telnet;
+    bool is_tn3270;

    guint reconnect_timer;
    int64_t reconnect_time;
@@ -141,19 +142,25 @@ static int tcp_chr_read_poll(void *opaque)
    return s->max_size;
 }

-#define IAC 255
-#define IAC_BREAK 243
 static void tcp_chr_process_IAC_bytes(Chardev *chr,
                                      SocketChardev *s,
                                      uint8_t *buf, int *size)
 {
-    /* Handle any telnet client's basic IAC options to satisfy char by
-     * char mode with no echo.  All IAC options will be removed from
-     * the buf and the do_telnetopt variable will be used to track the
-     * state of the width of the IAC information.
+    /* Handle any telnet or tn3270 client's basic IAC options.
+     * For telnet options, it satisfies char by char mode with no echo.
+     * For tn3270 options, it satisfies binary mode with EOR.
+     * All IAC options will be removed from the buf and the do_opt
+     * pointer will be used to track the state of the width of the
+     * IAC information.
     *
-     * IAC commands come in sets of 3 bytes with the exception of the
-     * "IAC BREAK" command and the double IAC.
+     * RFC854: "All TELNET commands consist of at least a two byte sequence.
+     * The commands dealing with option negotiation are three byte sequences,
+     * the third byte being the code for the option referenced."
+     * "IAC BREAK", "IAC IP", "IAC NOP" and the double IAC are two bytes.
+     * "IAC SB", "IAC SE" and "IAC EOR" are saved to split up data boundary
+     * for tn3270.
+     * NOP, Break and Interrupt Process(IP) might be encountered during a TN3270
+     * session, and NOP and IP need to be done later.
     */

    int i;
@@ -174,6 +181,18 @@ static void tcp_chr_process_IAC_bytes(Chardev *chr,
                    /* Handle IAC break commands by sending a serial break */
                    qemu_chr_be_event(chr, CHR_EVENT_BREAK);
                    s->do_telnetopt++;
+                } else if (s->is_tn3270 && ((unsigned char)buf[i] == IAC_EOR
+                           || (unsigned char)buf[i] == IAC_SB
+                           || (unsigned char)buf[i] == IAC_SE)
+                           && s->do_telnetopt == 2) {
+                    buf[j++] = IAC;
+                    buf[j++] = buf[i];
+                    s->do_telnetopt++;
+                } else if (s->is_tn3270 && ((unsigned char)buf[i] == IAC_IP
+                           || (unsigned char)buf[i] == IAC_NOP)
+                           && s->do_telnetopt == 2) {
+                    /* TODO: IP and NOP need to be implemented later. */
+                    s->do_telnetopt++;
                }
                s->do_telnetopt++;
            }
@@ -327,7 +346,7 @@ static void tcp_chr_free_connection(Chardev *chr)
    }

    tcp_set_msgfds(chr, NULL, 0);
-    remove_fd_in_watch(chr, NULL);
+    remove_fd_in_watch(chr);
    object_unref(OBJECT(s->sioc));
    s->sioc = NULL;
    object_unref(OBJECT(s->ioc));
@@ -341,31 +360,40 @@ static char *SocketAddress_to_str(const char *prefix, SocketAddress *addr,
                                  bool is_listen, bool is_telnet)
 {
    switch (addr->type) {
-    case SOCKET_ADDRESS_KIND_INET:
+    case SOCKET_ADDRESS_TYPE_INET:
        return g_strdup_printf("%s%s:%s:%s%s", prefix,
                               is_telnet ? "telnet" : "tcp",
-                               addr->u.inet.data->host,
-                               addr->u.inet.data->port,
+                               addr->u.inet.host,
+                               addr->u.inet.port,
                               is_listen ? ",server" : "");
        break;
-    case SOCKET_ADDRESS_KIND_UNIX:
+    case SOCKET_ADDRESS_TYPE_UNIX:
        return g_strdup_printf("%sunix:%s%s", prefix,
-                               addr->u.q_unix.data->path,
+                               addr->u.q_unix.path,
                               is_listen ? ",server" : "");
        break;
-    case SOCKET_ADDRESS_KIND_FD:
-        return g_strdup_printf("%sfd:%s%s", prefix, addr->u.fd.data->str,
+    case SOCKET_ADDRESS_TYPE_FD:
+        return g_strdup_printf("%sfd:%s%s", prefix, addr->u.fd.str,
                               is_listen ? ",server" : "");
        break;
-    case SOCKET_ADDRESS_KIND_VSOCK:
+    case SOCKET_ADDRESS_TYPE_VSOCK:
        return g_strdup_printf("%svsock:%s:%s", prefix,
-                               addr->u.vsock.data->cid,
-                               addr->u.vsock.data->port);
+                               addr->u.vsock.cid,
+                               addr->u.vsock.port);
    default:
        abort();
    }
 }

+static void update_disconnected_filename(SocketChardev *s)
+{
+    Chardev *chr = CHARDEV(s);
+
+    g_free(chr->filename);
+    chr->filename = SocketAddress_to_str("disconnected:", s->addr,
+                                         s->is_listen, s->is_telnet);
+}
+
 static void tcp_chr_disconnect(Chardev *chr)
 {
    SocketChardev *s = SOCKET_CHARDEV(chr);
@@ -380,8 +408,7 @@ static void tcp_chr_disconnect(Chardev *chr)
        s->listen_tag = qio_channel_add_watch(
            QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL);
    }
-    chr->filename = SocketAddress_to_str("disconnected:", s->addr,
-                                         s->is_listen, s->is_telnet);
+    update_disconnected_filename(s);
    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
    if (s->reconnect_time) {
        qemu_chr_socket_restart_timer(chr);
@@ -484,12 +511,12 @@ static void tcp_chr_connect(void *opaque)

    s->connected = 1;
    if (s->ioc) {
-        chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
+        chr->gsource = io_add_watch_poll(chr, s->ioc,
                                           tcp_chr_read_poll,
                                           tcp_chr_read,
                                           chr, NULL);
    }
-    qemu_chr_be_generic_open(chr);
+    qemu_chr_be_event(chr, CHR_EVENT_OPENED);
 }

 static void tcp_chr_update_read_handler(Chardev *chr,
@@ -501,9 +528,9 @@ static void tcp_chr_update_read_handler(Chardev *chr,
        return;
    }

-    remove_fd_in_watch(chr, NULL);
+    remove_fd_in_watch(chr);
    if (s->ioc) {
-        chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
+        chr->gsource = io_add_watch_poll(chr, s->ioc,
                                           tcp_chr_read_poll,
                                           tcp_chr_read, chr,
                                           context);
@@ -512,7 +539,7 @@ static void tcp_chr_update_read_handler(Chardev *chr,

 typedef struct {
    Chardev *chr;
-    char buf[12];
+    char buf[21];
    size_t buflen;
 } TCPChardevTelnetInit;

@@ -550,9 +577,6 @@ static void tcp_chr_telnet_init(Chardev *chr)
    TCPChardevTelnetInit *init = g_new0(TCPChardevTelnetInit, 1);
    size_t n = 0;

-    init->chr = chr;
-    init->buflen = 12;
-
 #define IACSET(x, a, b, c)                      \
    do {                                        \
        x[n++] = a;                             \
@@ -560,12 +584,26 @@ static void tcp_chr_telnet_init(Chardev *chr)
        x[n++] = c;                             \
    } while (0)

-    /* Prep the telnet negotion to put telnet in binary,
-     * no echo, single char mode */
-    IACSET(init->buf, 0xff, 0xfb, 0x01);  /* IAC WILL ECHO */
-    IACSET(init->buf, 0xff, 0xfb, 0x03);  /* IAC WILL Suppress go ahead */
-    IACSET(init->buf, 0xff, 0xfb, 0x00);  /* IAC WILL Binary */
-    IACSET(init->buf, 0xff, 0xfd, 0x00);  /* IAC DO Binary */
+    init->chr = chr;
+    if (!s->is_tn3270) {
+        init->buflen = 12;
+        /* Prep the telnet negotion to put telnet in binary,
+         * no echo, single char mode */
+        IACSET(init->buf, 0xff, 0xfb, 0x01);  /* IAC WILL ECHO */
+        IACSET(init->buf, 0xff, 0xfb, 0x03);  /* IAC WILL Suppress go ahead */
+        IACSET(init->buf, 0xff, 0xfb, 0x00);  /* IAC WILL Binary */
+        IACSET(init->buf, 0xff, 0xfd, 0x00);  /* IAC DO Binary */
+    } else {
+        init->buflen = 21;
+        /* Prep the TN3270 negotion based on RFC1576 */
+        IACSET(init->buf, 0xff, 0xfd, 0x19);  /* IAC DO EOR */
+        IACSET(init->buf, 0xff, 0xfb, 0x19);  /* IAC WILL EOR */
+        IACSET(init->buf, 0xff, 0xfd, 0x00);  /* IAC DO BINARY */
+        IACSET(init->buf, 0xff, 0xfb, 0x00);  /* IAC WILL BINARY */
+        IACSET(init->buf, 0xff, 0xfd, 0x18);  /* IAC DO TERMINAL TYPE */
+        IACSET(init->buf, 0xff, 0xfa, 0x18);  /* IAC SB TERMINAL TYPE */
+        IACSET(init->buf, 0x01, 0xff, 0xf0);  /* SEND IAC SE */
+    }

 #undef IACSET

@@ -585,7 +623,8 @@ static void tcp_chr_tls_handshake(QIOTask *task,
    if (qio_task_propagate_error(task, NULL)) {
        tcp_chr_disconnect(chr);
    } else {
-        if (s->do_telnetopt) {
+        /* tn3270 does not support TLS yet */
+        if (s->do_telnetopt && !s->is_tn3270) {
            tcp_chr_telnet_init(chr);
        } else {
            tcp_chr_connect(chr);
@@ -609,7 +648,7 @@ static void tcp_chr_tls_init(Chardev *chr)
    } else {
        tioc = qio_channel_tls_new_client(
            s->ioc, s->tls_creds,
-            s->addr->u.inet.data->host,
+            s->addr->u.inet.host,
            &err);
    }
    if (tioc == NULL) {
@@ -820,16 +859,18 @@ static void qmp_chardev_open_socket(Chardev *chr,
 {
    SocketChardev *s = SOCKET_CHARDEV(chr);
    ChardevSocket *sock = backend->u.socket.data;
-    SocketAddress *addr = sock->addr;
    bool do_nodelay     = sock->has_nodelay ? sock->nodelay : false;
    bool is_listen      = sock->has_server  ? sock->server  : true;
    bool is_telnet      = sock->has_telnet  ? sock->telnet  : false;
+    bool is_tn3270      = sock->has_tn3270  ? sock->tn3270  : false;
    bool is_waitconnect = sock->has_wait    ? sock->wait    : false;
    int64_t reconnect   = sock->has_reconnect ? sock->reconnect : 0;
    QIOChannelSocket *sioc = NULL;
+    SocketAddress *addr;

    s->is_listen = is_listen;
    s->is_telnet = is_telnet;
+    s->is_tn3270 = is_tn3270;
    s->do_nodelay = do_nodelay;
    if (sock->tls_creds) {
        Object *creds;
@@ -864,22 +905,21 @@ static void qmp_chardev_open_socket(Chardev *chr,
        }
    }

-    s->addr = QAPI_CLONE(SocketAddress, sock->addr);
+    s->addr = addr = socket_address_flatten(sock->addr);

    qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_RECONNECTABLE);
    /* TODO SOCKET_ADDRESS_FD where fd has AF_UNIX */
-    if (addr->type == SOCKET_ADDRESS_KIND_UNIX) {
+    if (addr->type == SOCKET_ADDRESS_TYPE_UNIX) {
        qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_FD_PASS);
    }

    /* be isn't opened until we get a connection */
    *be_opened = false;

-    chr->filename = SocketAddress_to_str("disconnected:",
-                                         addr, is_listen, is_telnet);
+    update_disconnected_filename(s);

    if (is_listen) {
-        if (is_telnet) {
+        if (is_telnet || is_tn3270) {
            s->do_telnetopt = 1;
        }
    } else if (reconnect > 0) {
@@ -904,6 +944,11 @@ static void qmp_chardev_open_socket(Chardev *chr,
            if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) {
                goto error;
            }
+
+            qapi_free_SocketAddress(s->addr);
+            s->addr = socket_local_address(sioc->fd, errp);
+            update_disconnected_filename(s);
+
            s->listen_ioc = sioc;
            if (is_waitconnect &&
                qemu_chr_wait_connected(chr, errp) < 0) {
@@ -933,13 +978,14 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
    bool is_listen      = qemu_opt_get_bool(opts, "server", false);
    bool is_waitconnect = is_listen && qemu_opt_get_bool(opts, "wait", true);
    bool is_telnet      = qemu_opt_get_bool(opts, "telnet", false);
+    bool is_tn3270      = qemu_opt_get_bool(opts, "tn3270", false);
    bool do_nodelay     = !qemu_opt_get_bool(opts, "delay", true);
    int64_t reconnect   = qemu_opt_get_number(opts, "reconnect", 0);
    const char *path = qemu_opt_get(opts, "path");
    const char *host = qemu_opt_get(opts, "host");
    const char *port = qemu_opt_get(opts, "port");
    const char *tls_creds = qemu_opt_get(opts, "tls-creds");
-    SocketAddress *addr;
+    SocketAddressLegacy *addr;
    ChardevSocket *sock;

    backend->type = CHARDEV_BACKEND_KIND_SOCKET;
@@ -968,20 +1014,22 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
    sock->server = is_listen;
    sock->has_telnet = true;
    sock->telnet = is_telnet;
+    sock->has_tn3270 = true;
+    sock->tn3270 = is_tn3270;
    sock->has_wait = true;
    sock->wait = is_waitconnect;
    sock->has_reconnect = true;
    sock->reconnect = reconnect;
    sock->tls_creds = g_strdup(tls_creds);

-    addr = g_new0(SocketAddress, 1);
+    addr = g_new0(SocketAddressLegacy, 1);
    if (path) {
        UnixSocketAddress *q_unix;
-        addr->type = SOCKET_ADDRESS_KIND_UNIX;
+        addr->type = SOCKET_ADDRESS_LEGACY_KIND_UNIX;
        q_unix = addr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
        q_unix->path = g_strdup(path);
    } else {
-        addr->type = SOCKET_ADDRESS_KIND_INET;
+        addr->type = SOCKET_ADDRESS_LEGACY_KIND_INET;
        addr->u.inet.data = g_new(InetSocketAddress, 1);
        *addr->u.inet.data = (InetSocketAddress) {
            .host = g_strdup(host),
@@ -997,6 +1045,23 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
    sock->addr = addr;
 }

+static void
+char_socket_get_addr(Object *obj, Visitor *v, const char *name,
+                     void *opaque, Error **errp)
+{
+    SocketChardev *s = SOCKET_CHARDEV(obj);
+
+    visit_type_SocketAddress(v, name, &s->addr, errp);
+}
+
+static bool
+char_socket_get_connected(Object *obj, Error **errp)
+{
+    SocketChardev *s = SOCKET_CHARDEV(obj);
+
+    return s->connected;
+}
+
 static void char_socket_class_init(ObjectClass *oc, void *data)
 {
    ChardevClass *cc = CHARDEV_CLASS(oc);
@@ -1012,6 +1077,13 @@ static void char_socket_class_init(ObjectClass *oc, void *data)
    cc->chr_add_client = tcp_chr_add_client;
    cc->chr_add_watch = tcp_chr_add_watch;
    cc->chr_update_read_handler = tcp_chr_update_read_handler;
+
+    object_class_property_add(oc, "addr", "SocketAddress",
+                              char_socket_get_addr, NULL,
+                              NULL, NULL, &error_abort);
+
+    object_class_property_add_bool(oc, "connected", char_socket_get_connected,
+                                   NULL, &error_abort);
 }

 static const TypeInfo char_socket_type_info = {
--- a/chardev/char-udp.c
+++ b/chardev/char-udp.c
@@ -51,6 +51,18 @@ static int udp_chr_write(Chardev *chr, const uint8_t *buf, int len)
        s->ioc, (const char *)buf, len, NULL);
 }

+static void udp_chr_flush_buffer(UdpChardev *s)
+{
+    Chardev *chr = CHARDEV(s);
+
+    while (s->max_size > 0 && s->bufptr < s->bufcnt) {
+        int n = MIN(s->max_size, s->bufcnt - s->bufptr);
+        qemu_chr_be_write(chr, &s->buf[s->bufptr], n);
+        s->bufptr += n;
+        s->max_size = qemu_chr_be_can_write(chr);
+    }
+}
+
 static int udp_chr_read_poll(void *opaque)
 {
    Chardev *chr = CHARDEV(opaque);
@@ -61,11 +73,8 @@ static int udp_chr_read_poll(void *opaque)
    /* If there were any stray characters in the queue process them
     * first
     */
-    while (s->max_size > 0 && s->bufptr < s->bufcnt) {
-        qemu_chr_be_write(chr, &s->buf[s->bufptr], 1);
-        s->bufptr++;
-        s->max_size = qemu_chr_be_can_write(chr);
-    }
+    udp_chr_flush_buffer(s);
+
    return s->max_size;
 }

@@ -81,17 +90,12 @@ static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
    ret = qio_channel_read(
        s->ioc, (char *)s->buf, sizeof(s->buf), NULL);
    if (ret <= 0) {
-        remove_fd_in_watch(chr, NULL);
+        remove_fd_in_watch(chr);
        return FALSE;
    }
    s->bufcnt = ret;
-
    s->bufptr = 0;
-    while (s->max_size > 0 && s->bufptr < s->bufcnt) {
-        qemu_chr_be_write(chr, &s->buf[s->bufptr], 1);
-        s->bufptr++;
-        s->max_size = qemu_chr_be_can_write(chr);
-    }
+    udp_chr_flush_buffer(s);

    return TRUE;
 }
@@ -101,9 +105,9 @@ static void udp_chr_update_read_handler(Chardev *chr,
 {
    UdpChardev *s = UDP_CHARDEV(chr);

-    remove_fd_in_watch(chr, NULL);
+    remove_fd_in_watch(chr);
    if (s->ioc) {
-        chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
+        chr->gsource = io_add_watch_poll(chr, s->ioc,
                                           udp_chr_read_poll,
                                           udp_chr_read, chr,
                                           context);
@@ -115,7 +119,7 @@ static void char_udp_finalize(Object *obj)
    Chardev *chr = CHARDEV(obj);
    UdpChardev *s = UDP_CHARDEV(obj);

-    remove_fd_in_watch(chr, NULL);
+    remove_fd_in_watch(chr);
    if (s->ioc) {
        object_unref(OBJECT(s->ioc));
    }
@@ -130,7 +134,7 @@ static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend,
    const char *localaddr = qemu_opt_get(opts, "localaddr");
    const char *localport = qemu_opt_get(opts, "localport");
    bool has_local = false;
-    SocketAddress *addr;
+    SocketAddressLegacy *addr;
    ChardevUdp *udp;

    backend->type = CHARDEV_BACKEND_KIND_UDP;
@@ -155,8 +159,8 @@ static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend,
    udp = backend->u.udp.data = g_new0(ChardevUdp, 1);
    qemu_chr_parse_common(opts, qapi_ChardevUdp_base(udp));

-    addr = g_new0(SocketAddress, 1);
-    addr->type = SOCKET_ADDRESS_KIND_INET;
+    addr = g_new0(SocketAddressLegacy, 1);
+    addr->type = SOCKET_ADDRESS_LEGACY_KIND_INET;
    addr->u.inet.data = g_new(InetSocketAddress, 1);
    *addr->u.inet.data = (InetSocketAddress) {
        .host = g_strdup(host),
@@ -170,8 +174,8 @@ static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend,

    if (has_local) {
        udp->has_local = true;
-        addr = g_new0(SocketAddress, 1);
-        addr->type = SOCKET_ADDRESS_KIND_INET;
+        addr = g_new0(SocketAddressLegacy, 1);
+        addr->type = SOCKET_ADDRESS_LEGACY_KIND_INET;
        addr->u.inet.data = g_new(InetSocketAddress, 1);
        *addr->u.inet.data = (InetSocketAddress) {
            .host = g_strdup(localaddr),
@@ -187,13 +191,17 @@ static void qmp_chardev_open_udp(Chardev *chr,
                                 Error **errp)
 {
    ChardevUdp *udp = backend->u.udp.data;
+    SocketAddress *local_addr = socket_address_flatten(udp->local);
+    SocketAddress *remote_addr = socket_address_flatten(udp->remote);
    QIOChannelSocket *sioc = qio_channel_socket_new();
    char *name;
    UdpChardev *s = UDP_CHARDEV(chr);
+    int ret;

-    if (qio_channel_socket_dgram_sync(sioc,
-                                      udp->local, udp->remote,
-                                      errp) < 0) {
+    ret = qio_channel_socket_dgram_sync(sioc, local_addr, remote_addr, errp);
+    qapi_free_SocketAddress(local_addr);
+    qapi_free_SocketAddress(remote_addr);
+    if (ret < 0) {
        object_unref(OBJECT(sioc));
        return;
    }
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -42,8 +42,10 @@
 /***********************************************************/
 /* character device */

-static QTAILQ_HEAD(ChardevHead, Chardev) chardevs =
-    QTAILQ_HEAD_INITIALIZER(chardevs);
+static Object *get_chardevs_root(void)
+{
+    return container_get(object_get_root(), "/chardevs");
+}

 void qemu_chr_be_event(Chardev *s, int event)
 {
@@ -66,12 +68,6 @@ void qemu_chr_be_event(Chardev *s, int event)
    be->chr_event(be->opaque, event);
 }

-void qemu_chr_be_generic_open(Chardev *s)
-{
-    qemu_chr_be_event(s, CHR_EVENT_OPENED);
-}
-
-
 /* Not reporting errors from writing to logfile, as logs are
 * defined to be "best effort" only */
 static void qemu_chr_fe_write_log(Chardev *s,
@@ -453,26 +449,24 @@ static const TypeInfo char_type_info = {
 * mux will receive CHR_EVENT_OPENED notifications for the BE
 * immediately.
 */
+static int open_muxes(Object *child, void *opaque)
+{
+    if (CHARDEV_IS_MUX(child)) {
+        /* send OPENED to all already-attached FEs */
+        mux_chr_send_all_event(CHARDEV(child), CHR_EVENT_OPENED);
+        /* mark mux as OPENED so any new FEs will immediately receive
+         * OPENED event
+         */
+        qemu_chr_be_event(CHARDEV(child), CHR_EVENT_OPENED);
+    }
+
+    return 0;
+}
+
 static void muxes_realize_done(Notifier *notifier, void *unused)
 {
-    Chardev *chr;
-
-    QTAILQ_FOREACH(chr, &chardevs, next) {
-        if (CHARDEV_IS_MUX(chr)) {
-            MuxChardev *d = MUX_CHARDEV(chr);
-            int i;
-
-            /* send OPENED to all already-attached FEs */
-            for (i = 0; i < d->mux_cnt; i++) {
-                mux_chr_send_event(d, i, CHR_EVENT_OPENED);
-            }
-            /* mark mux as OPENED so any new FEs will immediately receive
-             * OPENED event
-             */
-            qemu_chr_be_generic_open(chr);
-        }
-    }
    muxes_realized = true;
+    object_child_foreach(get_chardevs_root(), open_muxes, NULL);
 }

 static Notifier muxes_realize_notify = {
@@ -560,7 +554,7 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
    cc = CHARDEV_GET_CLASS(s);
    if (!opaque && !fd_can_read && !fd_read && !fd_event) {
        fe_open = 0;
-        remove_fd_in_watch(s, context);
+        remove_fd_in_watch(s);
    } else {
        fe_open = 1;
    }
@@ -581,7 +575,7 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
        /* We're connecting to an already opened device, so let's make sure we
           also get the open event */
        if (s->be_open) {
-            qemu_chr_be_generic_open(s);
+            qemu_chr_be_event(s, CHR_EVENT_OPENED);
        }
    }

@@ -696,7 +690,8 @@ QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
        return opts;
    }
    if (strstart(filename, "tcp:", &p) ||
-        strstart(filename, "telnet:", &p)) {
+        strstart(filename, "telnet:", &p) ||
+        strstart(filename, "tn3270:", &p)) {
        if (sscanf(p, "%64[^:]:%32[^,]%n", host, port, &pos) < 2) {
            host[0] = 0;
            if (sscanf(p, ":%32[^,]%n", port, &pos) < 1)
@@ -712,8 +707,11 @@ QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
                goto fail;
            }
        }
-        if (strstart(filename, "telnet:", &p))
+        if (strstart(filename, "telnet:", &p)) {
            qemu_opt_set(opts, "telnet", "on", &error_abort);
+        } else if (strstart(filename, "tn3270:", &p)) {
+            qemu_opt_set(opts, "tn3270", "on", &error_abort);
+        }
        return opts;
    }
    if (strstart(filename, "udp:", &p)) {
@@ -770,7 +768,7 @@ void qemu_chr_parse_common(QemuOpts *opts, ChardevCommon *backend)
    const char *logfile = qemu_opt_get(opts, "logfile");

    backend->has_logfile = logfile != NULL;
-    backend->logfile = logfile ? g_strdup(logfile) : NULL;
+    backend->logfile = g_strdup(logfile);

    backend->has_logappend = true;
    backend->logappend = qemu_opt_get_bool(opts, "logappend", false);
@@ -805,26 +803,6 @@ static const ChardevClass *char_get_class(const char *driver, Error **errp)
    return cc;
 }

-static Chardev *qemu_chardev_add(const char *id, const char *typename,
-                                 ChardevBackend *backend, Error **errp)
-{
-    Chardev *chr;
-
-    chr = qemu_chr_find(id);
-    if (chr) {
-        error_setg(errp, "Chardev '%s' already exists", id);
-        return NULL;
-    }
-
-    chr = qemu_chardev_new(id, typename, backend, errp);
-    if (!chr) {
-        return NULL;
-    }
-
-    QTAILQ_INSERT_TAIL(&chardevs, chr, next);
-    return chr;
-}
-
 static const struct ChardevAlias {
    const char *typename;
    const char *alias;
@@ -941,9 +919,10 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
        backend->u.null.data = ccom; /* Any ChardevCommon member would work */
    }

-    chr = qemu_chardev_add(bid ? bid : id,
+    chr = qemu_chardev_new(bid ? bid : id,
                           object_class_get_name(OBJECT_CLASS(cc)),
                           backend, errp);
+
    if (chr == NULL) {
        goto out;
    }
@@ -955,9 +934,9 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
        backend->type = CHARDEV_BACKEND_KIND_MUX;
        backend->u.mux.data = g_new0(ChardevMux, 1);
        backend->u.mux.data->chardev = g_strdup(bid);
-        mux = qemu_chardev_add(id, TYPE_CHARDEV_MUX, backend, errp);
+        mux = qemu_chardev_new(id, TYPE_CHARDEV_MUX, backend, errp);
        if (mux == NULL) {
-            qemu_chr_delete(chr);
+            object_unparent(OBJECT(chr));
            chr = NULL;
            goto out;
        }
@@ -1071,27 +1050,29 @@ void qemu_chr_fe_disconnect(CharBackend *be)
    }
 }

-void qemu_chr_delete(Chardev *chr)
+static int qmp_query_chardev_foreach(Object *obj, void *data)
 {
-    QTAILQ_REMOVE(&chardevs, chr, next);
-    object_unref(OBJECT(chr));
+    Chardev *chr = CHARDEV(obj);
+    ChardevInfoList **list = data;
+    ChardevInfoList *info = g_malloc0(sizeof(*info));
+
+    info->value = g_malloc0(sizeof(*info->value));
+    info->value->label = g_strdup(chr->label);
+    info->value->filename = g_strdup(chr->filename);
+    info->value->frontend_open = chr->be && chr->be->fe_open;
+
+    info->next = *list;
+    *list = info;
+
+    return 0;
 }

 ChardevInfoList *qmp_query_chardev(Error **errp)
 {
    ChardevInfoList *chr_list = NULL;
-    Chardev *chr;

-    QTAILQ_FOREACH(chr, &chardevs, next) {
-        ChardevInfoList *info = g_malloc0(sizeof(*info));
-        info->value = g_malloc0(sizeof(*info->value));
-        info->value->label = g_strdup(chr->label);
-        info->value->filename = g_strdup(chr->filename);
-        info->value->frontend_open = chr->be && chr->be->fe_open;
-
-        info->next = chr_list;
-        chr_list = info;
-    }
+    object_child_foreach(get_chardevs_root(),
+                         qmp_query_chardev_foreach, &chr_list);

    return chr_list;
 }
@@ -1119,14 +1100,9 @@ ChardevBackendInfoList *qmp_query_chardev_backends(Error **errp)

 Chardev *qemu_chr_find(const char *name)
 {
-    Chardev *chr;
+    Object *obj = object_resolve_path_component(get_chardevs_root(), name);

-    QTAILQ_FOREACH(chr, &chardevs, next) {
-        if (strcmp(chr->label, name) != 0)
-            continue;
-        return chr;
-    }
-    return NULL;
+    return obj ? CHARDEV(obj) : NULL;
 }

 QemuOptsList qemu_chardev_opts = {
@@ -1176,6 +1152,9 @@ QemuOptsList qemu_chardev_opts = {
        },{
            .name = "telnet",
            .type = QEMU_OPT_BOOL,
+        },{
+            .name = "tn3270",
+            .type = QEMU_OPT_BOOL,
        },{
            .name = "tls-creds",
            .type = QEMU_OPT_STRING,
@@ -1236,22 +1215,23 @@ void qemu_chr_set_feature(Chardev *chr,
 }

 Chardev *qemu_chardev_new(const char *id, const char *typename,
-                          ChardevBackend *backend, Error **errp)
+                          ChardevBackend *backend,
+                          Error **errp)
 {
+    Object *obj;
    Chardev *chr = NULL;
    Error *local_err = NULL;
    bool be_opened = true;

    assert(g_str_has_prefix(typename, "chardev-"));

-    chr = CHARDEV(object_new(typename));
+    obj = object_new(typename);
+    chr = CHARDEV(obj);
    chr->label = g_strdup(id);

    qemu_char_open(chr, backend, &be_opened, &local_err);
    if (local_err) {
-        error_propagate(errp, local_err);
-        object_unref(OBJECT(chr));
-        return NULL;
+        goto end;
    }

    if (!chr->filename) {
@@ -1261,6 +1241,21 @@ Chardev *qemu_chardev_new(const char *id, const char *typename,
        qemu_chr_be_event(chr, CHR_EVENT_OPENED);
    }

+    if (id) {
+        object_property_add_child(get_chardevs_root(), id, obj, &local_err);
+        if (local_err) {
+            goto end;
+        }
+        object_unref(obj);
+    }
+
+end:
+    if (local_err) {
+        error_propagate(errp, local_err);
+        object_unref(obj);
+        return NULL;
+    }
+
    return chr;
 }

@@ -1276,7 +1271,7 @@ ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend,
        return NULL;
    }

-    chr = qemu_chardev_add(id, object_class_get_name(OBJECT_CLASS(cc)),
+    chr = qemu_chardev_new(id, object_class_get_name(OBJECT_CLASS(cc)),
                           backend, errp);
    if (!chr) {
        return NULL;
@@ -1309,16 +1304,12 @@ void qmp_chardev_remove(const char *id, Error **errp)
            "Chardev '%s' cannot be unplugged in record/replay mode", id);
        return;
    }
-    qemu_chr_delete(chr);
+    object_unparent(OBJECT(chr));
 }

 void qemu_chr_cleanup(void)
 {
-    Chardev *chr, *tmp;
-
-    QTAILQ_FOREACH_SAFE(chr, &chardevs, next, tmp) {
-        qemu_chr_delete(chr);
-    }
+    object_unparent(get_chardevs_root());
 }

 static void register_types(void)
--- a/278
+++ b/278
@@ -320,6 +320,7 @@ numa=""
 tcmalloc="no"
 jemalloc="no"
 replication="yes"
+vxhs=""

 supported_cpu="no"
 supported_os="no"
@@ -610,6 +611,7 @@ NetBSD)
  audio_possible_drivers="oss sdl"
  oss_lib="-lossaudio"
  HOST_VARIANT_DIR="netbsd"
+  supported_os="yes"
 ;;
 OpenBSD)
  bsd="yes"
@@ -742,7 +744,7 @@ if test "$mingw32" = "yes" ; then
  sysconfdir="\${prefix}"
  local_statedir=
  confsuffix=""
-  libs_qga="-lws2_32 -lwinmm -lpowrprof -liphlpapi -lnetapi32 $libs_qga"
+  libs_qga="-lws2_32 -lwinmm -lpowrprof -lwtsapi32 -liphlpapi -lnetapi32 $libs_qga"
 fi

 werror=""
@@ -1183,6 +1185,10 @@ for opt do
  ;;
  --enable-replication) replication="yes"
  ;;
+  --disable-vxhs) vxhs="no"
+  ;;
+  --enable-vxhs) vxhs="yes"
+  ;;
  *)
      echo "ERROR: unknown option $opt"
      echo "Try '$0 --help' for more information"
@@ -1191,21 +1197,6 @@ for opt do
  esac
 done

-if ! has $python; then
-  error_exit "Python not found. Use --python=/path/to/python"
-fi
-
-# Note that if the Python conditional here evaluates True we will exit
-# with status 1 which is a shell 'false' value.
-if ! $python -c 'import sys; sys.exit(sys.version_info < (2,6) or sys.version_info >= (3,))'; then
-  error_exit "Cannot use '$python', Python 2.6 or later is required." \
-      "Note that Python 3 or later is not yet supported." \
-      "Use --python=/path/to/python to specify a supported Python."
-fi
-
-# Suppress writing compiled files
-python="$python -B"
-
 case "$cpu" in
    ppc)
           CPU_CFLAGS="-m32"
@@ -1280,6 +1271,9 @@ for config in $mak_wilds; do
    default_target_list="${default_target_list} $(basename "$config" .mak)"
 done

+# Enumerate public trace backends for --help output
+trace_backend_list=$(echo $(grep -le '^PUBLIC = True$' "$source_path"/scripts/tracetool/backend/*.py | sed -e 's/^.*\/\(.*\)\.py$/\1/'))
+
 if test x"$show_help" = x"yes" ; then
 cat << EOF

@@ -1333,7 +1327,7 @@ Advanced options (experts only):
                           set block driver read-only whitelist
                           (affects only QEMU, not qemu-img)
  --enable-trace-backends=B Set trace backend
-                           Available backends: $($python $source_path/scripts/tracetool.py --list-backends)
+                           Available backends: $trace_backend_list
  --with-trace-file=NAME   Full PATH,NAME of file to store traces
                           Default:trace-<pid>
  --disable-slirp          disable SLIRP userspace network connectivity
@@ -1341,7 +1335,7 @@ Advanced options (experts only):
  --oss-lib                path to OSS library
  --cpu=CPU                Build for host CPU [$cpu]
  --with-coroutine=BACKEND coroutine backend. Supported options:
-                           gthread, ucontext, sigaltstack, windows
+                           ucontext, sigaltstack, windows
  --enable-gcov            enable test coverage analysis with gcov
  --gcov=GCOV              use specified gcov [$gcov_tool]
  --disable-blobs          disable installing provided firmware blobs
@@ -1427,12 +1421,28 @@ disabled with --disable-FEATURE, default is enabled if available:
  xfsctl          xfsctl support
  qom-cast-debug  cast debugging support
  tools           build qemu-io, qemu-nbd and qemu-image tools
+  vxhs            Veritas HyperScale vDisk backend support

 NOTE: The object files are built at the place where configure is launched
 EOF
 exit 0
 fi

+if ! has $python; then
+  error_exit "Python not found. Use --python=/path/to/python"
+fi
+
+# Note that if the Python conditional here evaluates True we will exit
+# with status 1 which is a shell 'false' value.
+if ! $python -c 'import sys; sys.exit(sys.version_info < (2,6) or sys.version_info >= (3,))'; then
+  error_exit "Cannot use '$python', Python 2.6 or later is required." \
+      "Note that Python 3 or later is not yet supported." \
+      "Use --python=/path/to/python to specify a supported Python."
+fi
+
+# Suppress writing compiled files
+python="$python -B"
+
 # Now we have handled --enable-tcg-interpreter and know we're not just
 # printing the help message, bail out if the host CPU isn't supported.
 if test "$ARCH" = "unknown"; then
@@ -1988,30 +1998,65 @@ fi
 # xen probe

 if test "$xen" != "no" ; then
-  xen_libs="-lxenstore -lxenctrl -lxenguest"
-  xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn"
+  # Check whether Xen library path is specified via --extra-ldflags to avoid
+  # overriding this setting with pkg-config output. If not, try pkg-config
+  # to obtain all needed flags.

-  # First we test whether Xen headers and libraries are available.
-  # If no, we are done and there is no Xen support.
-  # If yes, more tests are run to detect the Xen version.
+  if ! echo $EXTRA_LDFLAGS | grep tools/libxc > /dev/null && \
+     $pkg_config --exists xencontrol ; then
+    xen_ctrl_version="$(printf '%d%02d%02d' \
+      $($pkg_config --modversion xencontrol | sed 's/\./ /g') )"
+    xen=yes
+    xen_pc="xencontrol xenstore xenguest xenforeignmemory xengnttab"
+    xen_pc="$xen_pc xenevtchn xendevicemodel"
+    QEMU_CFLAGS="$QEMU_CFLAGS $($pkg_config --cflags $xen_pc)"
+    libs_softmmu="$($pkg_config --libs $xen_pc) $libs_softmmu"
+    LDFLAGS="$($pkg_config --libs $xen_pc) $LDFLAGS"
+  else

-  # Xen (any)
-  cat > $TMPC <<EOF
+    xen_libs="-lxenstore -lxenctrl -lxenguest"
+    xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn"
+
+    # First we test whether Xen headers and libraries are available.
+    # If no, we are done and there is no Xen support.
+    # If yes, more tests are run to detect the Xen version.
+
+    # Xen (any)
+    cat > $TMPC <<EOF
 #include <xenctrl.h>
 int main(void) {
  return 0;
 }
 EOF
-  if ! compile_prog "" "$xen_libs" ; then
-    # Xen not found
-    if test "$xen" = "yes" ; then
-      feature_not_found "xen" "Install xen devel"
-    fi
-    xen=no
+    if ! compile_prog "" "$xen_libs" ; then
+      # Xen not found
+      if test "$xen" = "yes" ; then
+        feature_not_found "xen" "Install xen devel"
+      fi
+      xen=no

-  # Xen unstable
-  elif
-      cat > $TMPC <<EOF &&
+    # Xen unstable
+    elif
+        cat > $TMPC <<EOF &&
+#undef XC_WANT_COMPAT_DEVICEMODEL_API
+#define __XEN_TOOLS__
+#include <xendevicemodel.h>
+int main(void) {
+  xendevicemodel_handle *xd;
+
+  xd = xendevicemodel_open(0, 0);
+  xendevicemodel_close(xd);
+
+  return 0;
+}
+EOF
+        compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs"
+      then
+      xen_stable_libs="-lxendevicemodel $xen_stable_libs"
+      xen_ctrl_version=40900
+      xen=yes
+    elif
+        cat > $TMPC <<EOF &&
 /*
 * If we have stable libs the we don't want the libxc compat
 * layers, regardless of what CFLAGS we may have been given.
@@ -2061,12 +2106,12 @@ int main(void) {
  return 0;
 }
 EOF
-      compile_prog "" "$xen_libs $xen_stable_libs"
-    then
-    xen_ctrl_version=480
-    xen=yes
-  elif
-      cat > $TMPC <<EOF &&
+        compile_prog "" "$xen_libs $xen_stable_libs"
+      then
+      xen_ctrl_version=40800
+      xen=yes
+    elif
+        cat > $TMPC <<EOF &&
 /*
 * If we have stable libs the we don't want the libxc compat
 * layers, regardless of what CFLAGS we may have been given.
@@ -2112,12 +2157,12 @@ int main(void) {
  return 0;
 }
 EOF
-      compile_prog "" "$xen_libs $xen_stable_libs"
-    then
-    xen_ctrl_version=471
-    xen=yes
-  elif
-      cat > $TMPC <<EOF &&
+        compile_prog "" "$xen_libs $xen_stable_libs"
+      then
+      xen_ctrl_version=40701
+      xen=yes
+    elif
+        cat > $TMPC <<EOF &&
 #include <xenctrl.h>
 #include <stdint.h>
 int main(void) {
@@ -2127,14 +2172,14 @@ int main(void) {
  return 0;
 }
 EOF
-      compile_prog "" "$xen_libs"
-    then
-    xen_ctrl_version=470
-    xen=yes
+        compile_prog "" "$xen_libs"
+      then
+      xen_ctrl_version=40700
+      xen=yes

-  # Xen 4.6
-  elif
-      cat > $TMPC <<EOF &&
+    # Xen 4.6
+    elif
+        cat > $TMPC <<EOF &&
 #include <xenctrl.h>
 #include <xenstore.h>
 #include <stdint.h>
@@ -2155,14 +2200,14 @@ int main(void) {
  return 0;
 }
 EOF
-      compile_prog "" "$xen_libs"
-    then
-    xen_ctrl_version=460
-    xen=yes
+        compile_prog "" "$xen_libs"
+      then
+      xen_ctrl_version=40600
+      xen=yes

-  # Xen 4.5
-  elif
-      cat > $TMPC <<EOF &&
+    # Xen 4.5
+    elif
+        cat > $TMPC <<EOF &&
 #include <xenctrl.h>
 #include <xenstore.h>
 #include <stdint.h>
@@ -2182,13 +2227,13 @@ int main(void) {
  return 0;
 }
 EOF
-      compile_prog "" "$xen_libs"
-    then
-    xen_ctrl_version=450
-    xen=yes
+        compile_prog "" "$xen_libs"
+      then
+      xen_ctrl_version=40500
+      xen=yes

-  elif
-      cat > $TMPC <<EOF &&
+    elif
+        cat > $TMPC <<EOF &&
 #include <xenctrl.h>
 #include <xenstore.h>
 #include <stdint.h>
@@ -2207,24 +2252,25 @@ int main(void) {
  return 0;
 }
 EOF
-      compile_prog "" "$xen_libs"
-    then
-    xen_ctrl_version=420
-    xen=yes
+        compile_prog "" "$xen_libs"
+      then
+      xen_ctrl_version=40200
+      xen=yes

-  else
-    if test "$xen" = "yes" ; then
-      feature_not_found "xen (unsupported version)" \
-                        "Install a supported xen (xen 4.2 or newer)"
+    else
+      if test "$xen" = "yes" ; then
+        feature_not_found "xen (unsupported version)" \
+                          "Install a supported xen (xen 4.2 or newer)"
+      fi
+      xen=no
    fi
-    xen=no
-  fi

-  if test "$xen" = yes; then
-    if test $xen_ctrl_version -ge 471  ; then
-      libs_softmmu="$xen_stable_libs $libs_softmmu"
+    if test "$xen" = yes; then
+      if test $xen_ctrl_version -ge 40701  ; then
+        libs_softmmu="$xen_stable_libs $libs_softmmu"
+      fi
+      libs_softmmu="$xen_libs $libs_softmmu"
    fi
-    libs_softmmu="$xen_libs $libs_softmmu"
  fi
 fi

@@ -4373,10 +4419,8 @@ fi
 # check and set a backend for coroutine

 # We prefer ucontext, but it's not always possible. The fallback
-# is sigcontext. gthread is not selectable except explicitly, because
-# it is not functional enough to run QEMU proper. (It is occasionally
-# useful for debugging purposes.)  On Windows the only valid backend
-# is the Windows-specific one.
+# is sigcontext. On Windows the only valid backend is the Windows
+# specific one.

 ucontext_works=no
 if test "$darwin" != "yes"; then
@@ -4415,7 +4459,7 @@ else
      feature_not_found "ucontext"
    fi
    ;;
-  gthread|sigaltstack)
+  sigaltstack)
    if test "$mingw32" = "yes"; then
      error_exit "only the 'windows' coroutine backend is valid for Windows"
    fi
@@ -4427,14 +4471,7 @@ else
 fi

 if test "$coroutine_pool" = ""; then
-  if test "$coroutine" = "gthread"; then
-    coroutine_pool=no
-  else
-    coroutine_pool=yes
-  fi
-fi
-if test "$coroutine" = "gthread" -a "$coroutine_pool" = "yes"; then
-  error_exit "'gthread' coroutine backend does not support pool (use --disable-coroutine-pool)"
+  coroutine_pool=yes
 fi

 if test "$debug_stack_usage" = "yes"; then
@@ -4780,6 +4817,47 @@ if compile_prog "" "" ; then
    have_sysmacros=yes
 fi

+##########################################
+# Veritas HyperScale block driver VxHS
+# Check if libvxhs is installed
+
+if test "$vxhs" != "no" ; then
+  cat > $TMPC <<EOF
+#include <stdint.h>
+#include <qnio/qnio_api.h>
+
+void *vxhs_callback;
+
+int main(void) {
+    iio_init(QNIO_VERSION, vxhs_callback);
+    return 0;
+}
+EOF
+  vxhs_libs="-lvxhs -lssl"
+  if compile_prog "" "$vxhs_libs" ; then
+    vxhs=yes
+  else
+    if test "$vxhs" = "yes" ; then
+      feature_not_found "vxhs block device" "Install libvxhs See github"
+    fi
+    vxhs=no
+  fi
+fi
+
+##########################################
+# check for _Static_assert()
+
+have_static_assert=no
+cat > $TMPC << EOF
+_Static_assert(1, "success");
+int main(void) {
+    return 0;
+}
+EOF
+if compile_prog "" "" ; then
+    have_static_assert=yes
+fi
+
 ##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
@@ -5146,6 +5224,7 @@ echo "tcmalloc support  $tcmalloc"
 echo "jemalloc support  $jemalloc"
 echo "avx2 optimization $avx2_opt"
 echo "replication support $replication"
+echo "VxHS block device $vxhs"

 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5775,6 +5854,10 @@ if test "$have_sysmacros" = "yes" ; then
  echo "CONFIG_SYSMACROS=y" >> $config_host_mak
 fi

+if test "$have_static_assert" = "yes" ; then
+  echo "CONFIG_STATIC_ASSERT=y" >> $config_host_mak
+fi
+
 # Hold two types of flag:
 #   CONFIG_THREAD_SETNAME_BYTHREAD  - we've got a way of setting the name on
 #                                     a thread we have a handle to
@@ -5785,6 +5868,11 @@ if test "$pthread_setname_np" = "yes" ; then
  echo "CONFIG_PTHREAD_SETNAME_NP=y" >> $config_host_mak
 fi

+if test "$vxhs" = "yes" ; then
+  echo "CONFIG_VXHS=y" >> $config_host_mak
+  echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak
+fi
+
 if test "$tcg_interpreter" = "yes"; then
  QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
 elif test "$ARCH" = "sparc64" ; then
@@ -5945,9 +6033,11 @@ TARGET_ABI_DIR=""

 case "$target_name" in
  i386)
+    gdb_xml_files="i386-32bit-core.xml"
  ;;
  x86_64)
    TARGET_BASE_ARCH=i386
+    gdb_xml_files="i386-64bit-core.xml"
  ;;
  alpha)
    mttcg="yes"
@@ -6012,12 +6102,14 @@ case "$target_name" in
  ppc64)
    TARGET_BASE_ARCH=ppc
    TARGET_ABI_DIR=ppc
+    mttcg=yes
    gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml"
  ;;
  ppc64le)
    TARGET_ARCH=ppc64
    TARGET_BASE_ARCH=ppc
    TARGET_ABI_DIR=ppc
+    mttcg=yes
    gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml"
  ;;
  ppc64abi32)
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -81,7 +81,7 @@ vu_panic(VuDev *dev, const char *msg, ...)
    va_list ap;

    va_start(ap, msg);
-    (void)vasprintf(&buf, msg, ap);
+    buf = g_strdup_vprintf(msg, ap);
    va_end(ap);

    dev->broken = true;
@@ -1031,6 +1031,11 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
    idx = vq->last_avail_idx;

    total_bufs = in_total = out_total = 0;
+    if (unlikely(dev->broken) ||
+        unlikely(!vq->vring.avail)) {
+        goto done;
+    }
+
    while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
        unsigned int max, num_bufs, indirect = 0;
        struct vring_desc *desc;
@@ -1121,11 +1126,16 @@ vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,

 /* Fetch avail_idx from VQ memory only when we really need to know if
 * guest has added some buffers. */
-int
+bool
 vu_queue_empty(VuDev *dev, VuVirtq *vq)
 {
+    if (unlikely(dev->broken) ||
+        unlikely(!vq->vring.avail)) {
+        return true;
+    }
+
    if (vq->shadow_avail_idx != vq->last_avail_idx) {
-        return 0;
+        return false;
    }

    return vring_avail_idx(vq) == vq->last_avail_idx;
@@ -1174,7 +1184,8 @@ vring_notify(VuDev *dev, VuVirtq *vq)
 void
 vu_queue_notify(VuDev *dev, VuVirtq *vq)
 {
-    if (unlikely(dev->broken)) {
+    if (unlikely(dev->broken) ||
+        unlikely(!vq->vring.avail)) {
        return;
    }

@@ -1291,7 +1302,8 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
    struct vring_desc *desc;
    int rc;

-    if (unlikely(dev->broken)) {
+    if (unlikely(dev->broken) ||
+        unlikely(!vq->vring.avail)) {
        return NULL;
    }

@@ -1445,7 +1457,8 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq,
 {
    struct vring_used_elem uelem;

-    if (unlikely(dev->broken)) {
+    if (unlikely(dev->broken) ||
+        unlikely(!vq->vring.avail)) {
        return;
    }

@@ -1474,7 +1487,8 @@ vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count)
 {
    uint16_t old, new;

-    if (unlikely(dev->broken)) {
+    if (unlikely(dev->broken) ||
+        unlikely(!vq->vring.avail)) {
        return;
    }

--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -327,13 +327,13 @@ void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
 bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);

 /**
- * vu_queue_enabled:
+ * vu_queue_empty:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 *
- * Returns: whether the queue is empty.
+ * Returns: true if the queue is empty or not ready.
 */
-int vu_queue_empty(VuDev *dev, VuVirtq *vq);
+bool vu_queue_empty(VuDev *dev, VuVirtq *vq);

 /**
 * vu_queue_notify:
--- a/cpus.c
+++ b/cpus.c
@@ -50,6 +50,7 @@
 #include "qapi-event.h"
 #include "hw/nmi.h"
 #include "sysemu/replay.h"
+#include "hw/boards.h"

 #ifdef CONFIG_LINUX

@@ -1483,6 +1484,12 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
                /* Ignore everything else? */
                break;
            }
+        } else if (cpu->unplug) {
+            qemu_tcg_destroy_vcpu(cpu);
+            cpu->created = false;
+            qemu_cond_signal(&qemu_cpu_cond);
+            qemu_mutex_unlock_iothread();
+            return NULL;
        }

        atomic_mb_set(&cpu->exit_request, 0);
@@ -1859,6 +1866,8 @@ void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)

 CpuInfoList *qmp_query_cpus(Error **errp)
 {
+    MachineState *ms = MACHINE(qdev_get_machine());
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
    CpuInfoList *head = NULL, *cur_item = NULL;
    CPUState *cpu;

@@ -1909,6 +1918,13 @@ CpuInfoList *qmp_query_cpus(Error **errp)
 #else
        info->value->arch = CPU_INFO_ARCH_OTHER;
 #endif
+        info->value->has_props = !!mc->cpu_index_to_instance_props;
+        if (info->value->has_props) {
+            CpuInstanceProperties *props;
+            props = g_malloc0(sizeof(*props));
+            *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
+            info->value->props = props;
+        }

        /* XXX: waiting for the qapi to support GSList */
        if (!cur_item) {
--- a/cputlb.c
+++ b/cputlb.c
@@ -930,7 +930,13 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
        tlb_addr = tlbe->addr_write;
    }

-    /* Notice an IO access, or a notdirty page.  */
+    /* Check notdirty */
+    if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
+        tlb_set_dirty(ENV_GET_CPU(env), addr);
+        tlb_addr = tlb_addr & ~TLB_NOTDIRTY;
+    }
+
+    /* Notice an IO access  */
    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
        /* There's really nothing that can be done to
           support this apart from stop-the-world.  */
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -475,8 +475,8 @@ qcrypto_block_luks_load_key(QCryptoBlock *block,
    rv = readfunc(block,
                  slot->key_offset * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
                  splitkey, splitkeylen,
-                  errp,
-                  opaque);
+                  opaque,
+                  errp);
    if (rv < 0) {
        goto cleanup;
    }
@@ -679,8 +679,8 @@ qcrypto_block_luks_open(QCryptoBlock *block,
    rv = readfunc(block, 0,
                  (uint8_t *)&luks->header,
                  sizeof(luks->header),
-                  errp,
-                  opaque);
+                  opaque,
+                  errp);
    if (rv < 0) {
        ret = rv;
        goto fail;
@@ -1246,7 +1246,7 @@ qcrypto_block_luks_create(QCryptoBlock *block,
        QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;

    /* Reserve header space to match payload offset */
-    initfunc(block, block->payload_offset, &local_err, opaque);
+    initfunc(block, block->payload_offset, opaque, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto error;
@@ -1271,8 +1271,8 @@ qcrypto_block_luks_create(QCryptoBlock *block,
    writefunc(block, 0,
              (const uint8_t *)&luks->header,
              sizeof(luks->header),
-              &local_err,
-              opaque);
+              opaque,
+              &local_err);

    /* Delay checking local_err until we've byte-swapped */

@@ -1301,8 +1301,8 @@ qcrypto_block_luks_create(QCryptoBlock *block,
                  luks->header.key_slots[0].key_offset *
                  QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
                  splitkey, splitkeylen,
-                  errp,
-                  opaque) != splitkeylen) {
+                  opaque,
+                  errp) != splitkeylen) {
        goto error;
    }

--- a/crypto/init.c
+++ b/crypto/init.c
@@ -32,6 +32,8 @@
 #include <gcrypt.h>
 #endif

+#include "crypto/random.h"
+
 /* #define DEBUG_GNUTLS */

 /*
@@ -146,5 +148,9 @@ int qcrypto_init(Error **errp)
    gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0);
 #endif

+    if (qcrypto_random_init(errp) < 0) {
+        return -1;
+    }
+
    return 0;
 }
--- a/crypto/random-gcrypt.c
+++ b/crypto/random-gcrypt.c
@@ -31,3 +31,5 @@ int qcrypto_random_bytes(uint8_t *buf,
    gcry_randomize(buf, buflen, GCRY_STRONG_RANDOM);
    return 0;
 }
+
+int qcrypto_random_init(Error **errp G_GNUC_UNUSED) { return 0; }
--- a/crypto/random-gnutls.c
+++ b/crypto/random-gnutls.c
@@ -41,3 +41,6 @@ int qcrypto_random_bytes(uint8_t *buf,

    return 0;
 }
+
+
+int qcrypto_random_init(Error **errp G_GNUC_UNUSED) { return 0; }
--- a/crypto/random-platform.c
+++ b/crypto/random-platform.c
@@ -22,14 +22,16 @@

 #include "crypto/random.h"

-int qcrypto_random_bytes(uint8_t *buf G_GNUC_UNUSED,
-                         size_t buflen G_GNUC_UNUSED,
-                         Error **errp)
-{
-    int fd;
-    int ret = -1;
-    int got;
+#ifdef _WIN32
+#include <wincrypt.h>
+static HCRYPTPROV hCryptProv;
+#else
+static int fd; /* a file handle to either /dev/urandom or /dev/random */
+#endif

+int qcrypto_random_init(Error **errp)
+{
+#ifndef _WIN32
    /* TBD perhaps also add support for BSD getentropy / Linux
     * getrandom syscalls directly */
    fd = open("/dev/urandom", O_RDONLY);
@@ -41,6 +43,25 @@ int qcrypto_random_bytes(uint8_t *buf G_GNUC_UNUSED,
        error_setg(errp, "No /dev/urandom or /dev/random found");
        return -1;
    }
+#else
+    if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL,
+                             CRYPT_SILENT | CRYPT_VERIFYCONTEXT)) {
+        error_setg_win32(errp, GetLastError(),
+                         "Unable to create cryptographic provider");
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+int qcrypto_random_bytes(uint8_t *buf G_GNUC_UNUSED,
+                         size_t buflen G_GNUC_UNUSED,
+                         Error **errp)
+{
+#ifndef _WIN32
+    int ret = -1;
+    int got;

    while (buflen > 0) {
        got = read(fd, buf, buflen);
@@ -59,6 +80,14 @@ int qcrypto_random_bytes(uint8_t *buf G_GNUC_UNUSED,

    ret = 0;
 cleanup:
-    close(fd);
    return ret;
+#else
+    if (!CryptGenRandom(hCryptProv, buflen, buf)) {
+        error_setg_win32(errp, GetLastError(),
+                         "Unable to read random bytes");
+        return -1;
+    }
+
+    return 0;
+#endif
 }
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -29,6 +29,7 @@ CONFIG_LAN9118=y
 CONFIG_SMC91C111=y
 CONFIG_ALLWINNER_EMAC=y
 CONFIG_IMX_FEC=y
+CONFIG_FTGMAC100=y
 CONFIG_DS1338=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_PFLASH_CFI02=y
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -39,7 +39,6 @@ CONFIG_TPM_TIS=$(CONFIG_TPM)
 CONFIG_MC146818RTC=y
 CONFIG_PCI_PIIX=y
 CONFIG_WDT_IB700=y
-CONFIG_XEN_I386=$(CONFIG_XEN)
 CONFIG_ISA_DEBUG=y
 CONFIG_ISA_TESTDEV=y
 CONFIG_VMPORT=y
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -45,6 +45,7 @@ CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
 CONFIG_PLATFORM_BUS=y
 CONFIG_ETSEC=y
 CONFIG_LIBDECNUMBER=y
+CONFIG_SM501=y
 # For PReP
 CONFIG_SERIAL_ISA=y
 CONFIG_MC146818RTC=y
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -6,6 +6,10 @@ include usb.mak
 CONFIG_VIRTIO_VGA=y
 CONFIG_ESCC=y
 CONFIG_M48T59=y
+CONFIG_IPMI=y
+CONFIG_IPMI_LOCAL=y
+CONFIG_IPMI_EXTERN=y
+CONFIG_ISA_IPMI_BT=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
@@ -47,6 +51,7 @@ CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
 CONFIG_PLATFORM_BUS=y
 CONFIG_ETSEC=y
 CONFIG_LIBDECNUMBER=y
+CONFIG_SM501=y
 # For pSeries
 CONFIG_XICS=$(CONFIG_PSERIES)
 CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
--- a/default-configs/ppcemb-softmmu.mak
+++ b/default-configs/ppcemb-softmmu.mak
@@ -15,3 +15,4 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_LIBDECNUMBER=y
+CONFIG_SM501=y
--- a/default-configs/s390x-softmmu.mak
+++ b/default-configs/s390x-softmmu.mak
@@ -2,6 +2,7 @@ CONFIG_PCI=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO=y
 CONFIG_SCLPCONSOLE=y
+CONFIG_TERMINAL3270=y
 CONFIG_S390_FLIC=y
 CONFIG_S390_FLIC_KVM=$(CONFIG_KVM)
 CONFIG_WDT_DIAG288=y
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -39,7 +39,6 @@ CONFIG_TPM_TIS=$(CONFIG_TPM)
 CONFIG_MC146818RTC=y
 CONFIG_PCI_PIIX=y
 CONFIG_WDT_IB700=y
-CONFIG_XEN_I386=$(CONFIG_XEN)
 CONFIG_ISA_DEBUG=y
 CONFIG_ISA_TESTDEV=y
 CONFIG_VMPORT=y
--- a/device_tree.c
+++ b/device_tree.c
@@ -148,6 +148,7 @@ static void read_fstree(void *fdt, const char *dirname)
    d = opendir(dirname);
    if (!d) {
        error_setg(&error_fatal, "%s cannot open %s", __func__, dirname);
+        return;
    }

    while ((de = readdir(d)) != NULL) {
--- a/docs/qdev-device-use.txt
+++ b/docs/qdev-device-use.txt
@@ -182,15 +182,13 @@ The appropriate DEVNAME depends on the machine type.  For type "pc":

  This lets you control I/O ports and IRQs.

-* -usbdevice serial:vendorid=VID,productid=PRID becomes
-  -device usb-serial,vendorid=VID,productid=PRID
+* -usbdevice serial::chardev becomes -device usb-serial,chardev=dev.

 * -usbdevice braille doesn't support LEGACY-CHARDEV syntax.  It always
  uses "braille".  With -device, this useful default is gone, so you
  have to use something like

-  -device usb-braille,chardev=braille,vendorid=VID,productid=PRID
-  -chardev braille,id=braille
+  -device usb-braille,chardev=braille -chardev braille,id=braille

 * -virtioconsole becomes
  -device virtio-serial-pci,class=C,vectors=V,ioeventfd=IOEVENTFD,max_ports=N
--- a/dump.c
+++ b/dump.c
@@ -77,7 +77,13 @@ static int dump_cleanup(DumpState *s)
    memory_mapping_list_free(&s->list);
    close(s->fd);
    if (s->resume) {
+        if (s->detached) {
+            qemu_mutex_lock_iothread();
+        }
        vm_start();
+        if (s->detached) {
+            qemu_mutex_unlock_iothread();
+        }
    }

    return 0;
@@ -1804,6 +1810,7 @@ void qmp_dump_guest_memory(bool paging, const char *file,

    if (detach_p) {
        /* detached dump */
+        s->detached = true;
        qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
                           s, QEMU_THREAD_DETACHED);
    } else {
--- a/exec.c
+++ b/exec.c
@@ -71,6 +71,8 @@
 #include "qemu/mmap-alloc.h"
 #endif

+#include "monitor/monitor.h"
+
 //#define DEBUG_SUBPAGE

 #if !defined(CONFIG_USER_ONLY)
@@ -223,6 +225,12 @@ struct CPUAddressSpace {
    MemoryListener tcg_as_listener;
 };

+struct DirtyBitmapSnapshot {
+    ram_addr_t start;
+    ram_addr_t end;
+    unsigned long dirty[];
+};
+
 #endif

 #if !defined(CONFIG_USER_ONLY)
@@ -457,42 +465,12 @@ address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *x
 }

 /* Called from RCU critical section */
-IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
-                                            bool is_write)
-{
-    IOMMUTLBEntry iotlb = {0};
-    MemoryRegionSection *section;
-    MemoryRegion *mr;
-
-    for (;;) {
-        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
-        section = address_space_lookup_region(d, addr, false);
-        addr = addr - section->offset_within_address_space
-               + section->offset_within_region;
-        mr = section->mr;
-
-        if (!mr->iommu_ops) {
-            break;
-        }
-
-        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
-        if (!(iotlb.perm & (1 << is_write))) {
-            iotlb.target_as = NULL;
-            break;
-        }
-
-        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
-                | (addr & iotlb.addr_mask));
-        as = iotlb.target_as;
-    }
-
-    return iotlb;
-}
-
-/* Called from RCU critical section */
-MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
-                                      hwaddr *xlat, hwaddr *plen,
-                                      bool is_write)
+static MemoryRegionSection address_space_do_translate(AddressSpace *as,
+                                                      hwaddr addr,
+                                                      hwaddr *xlat,
+                                                      hwaddr *plen,
+                                                      bool is_write,
+                                                      bool is_mmio)
 {
    IOMMUTLBEntry iotlb;
    MemoryRegionSection *section;
@@ -500,7 +478,7 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,

    for (;;) {
        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
-        section = address_space_translate_internal(d, addr, &addr, plen, true);
+        section = address_space_translate_internal(d, addr, &addr, plen, is_mmio);
        mr = section->mr;

        if (!mr->iommu_ops) {
@@ -512,19 +490,84 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                | (addr & iotlb.addr_mask));
        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
        if (!(iotlb.perm & (1 << is_write))) {
-            mr = &io_mem_unassigned;
-            break;
+            goto translate_fail;
        }

        as = iotlb.target_as;
    }

+    *xlat = addr;
+
+    return *section;
+
+translate_fail:
+    return (MemoryRegionSection) { .mr = &io_mem_unassigned };
+}
+
+/* Called from RCU critical section */
+IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
+                                            bool is_write)
+{
+    MemoryRegionSection section;
+    hwaddr xlat, plen;
+
+    /* Try to get maximum page mask during translation. */
+    plen = (hwaddr)-1;
+
+    /* This can never be MMIO. */
+    section = address_space_do_translate(as, addr, &xlat, &plen,
+                                         is_write, false);
+
+    /* Illegal translation */
+    if (section.mr == &io_mem_unassigned) {
+        goto iotlb_fail;
+    }
+
+    /* Convert memory region offset into address space offset */
+    xlat += section.offset_within_address_space -
+        section.offset_within_region;
+
+    if (plen == (hwaddr)-1) {
+        /*
+         * We use default page size here. Logically it only happens
+         * for identity mappings.
+         */
+        plen = TARGET_PAGE_SIZE;
+    }
+
+    /* Convert to address mask */
+    plen -= 1;
+
+    return (IOMMUTLBEntry) {
+        .target_as = section.address_space,
+        .iova = addr & ~plen,
+        .translated_addr = xlat & ~plen,
+        .addr_mask = plen,
+        /* IOTLBs are for DMAs, and DMA only allows on RAMs. */
+        .perm = IOMMU_RW,
+    };
+
+iotlb_fail:
+    return (IOMMUTLBEntry) {0};
+}
+
+/* Called from RCU critical section */
+MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
+                                      hwaddr *xlat, hwaddr *plen,
+                                      bool is_write)
+{
+    MemoryRegion *mr;
+    MemoryRegionSection section;
+
+    /* This can be MMIO, so setup MMIO bit. */
+    section = address_space_do_translate(as, addr, xlat, plen, is_write, true);
+    mr = section.mr;
+
    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
        *plen = MIN(page, *plen);
    }

-    *xlat = addr;
    return mr;
 }

@@ -972,7 +1015,7 @@ static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
    if (block && addr - block->offset < block->max_length) {
        return block;
    }
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+    RAMBLOCK_FOREACH(block) {
        if (addr - block->offset < block->max_length) {
            goto found;
        }
@@ -1061,6 +1104,75 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
    return dirty;
 }

+DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
+     (ram_addr_t start, ram_addr_t length, unsigned client)
+{
+    DirtyMemoryBlocks *blocks;
+    unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL);
+    ram_addr_t first = QEMU_ALIGN_DOWN(start, align);
+    ram_addr_t last  = QEMU_ALIGN_UP(start + length, align);
+    DirtyBitmapSnapshot *snap;
+    unsigned long page, end, dest;
+
+    snap = g_malloc0(sizeof(*snap) +
+                     ((last - first) >> (TARGET_PAGE_BITS + 3)));
+    snap->start = first;
+    snap->end   = last;
+
+    page = first >> TARGET_PAGE_BITS;
+    end  = last  >> TARGET_PAGE_BITS;
+    dest = 0;
+
+    rcu_read_lock();
+
+    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+
+    while (page < end) {
+        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
+
+        assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
+        assert(QEMU_IS_ALIGNED(num,    (1 << BITS_PER_LEVEL)));
+        offset >>= BITS_PER_LEVEL;
+
+        bitmap_copy_and_clear_atomic(snap->dirty + dest,
+                                     blocks->blocks[idx] + offset,
+                                     num);
+        page += num;
+        dest += num >> BITS_PER_LEVEL;
+    }
+
+    rcu_read_unlock();
+
+    if (tcg_enabled()) {
+        tlb_reset_dirty_range_all(start, length);
+    }
+
+    return snap;
+}
+
+bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
+                                            ram_addr_t start,
+                                            ram_addr_t length)
+{
+    unsigned long page, end;
+
+    assert(start >= snap->start);
+    assert(start + length <= snap->end);
+
+    end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS;
+    page = (start - snap->start) >> TARGET_PAGE_BITS;
+
+    while (page < end) {
+        if (test_bit(page, snap->dirty)) {
+            return true;
+        }
+        page++;
+    }
+    return false;
+}
+
 /* Called from RCU critical section */
 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
                                       MemoryRegionSection *section,
@@ -1258,6 +1370,26 @@ void qemu_mutex_unlock_ramlist(void)
    qemu_mutex_unlock(&ram_list.mutex);
 }

+void ram_block_dump(Monitor *mon)
+{
+    RAMBlock *block;
+    char *psize;
+
+    rcu_read_lock();
+    monitor_printf(mon, "%24s %8s  %18s %18s %18s\n",
+                   "Block Name", "PSize", "Offset", "Used", "Total");
+    RAMBLOCK_FOREACH(block) {
+        psize = size_to_str(block->page_size);
+        monitor_printf(mon, "%24s %8s  0x%016" PRIx64 " 0x%016" PRIx64
+                       " 0x%016" PRIx64 "\n", block->idstr, psize,
+                       (uint64_t)block->offset,
+                       (uint64_t)block->used_length,
+                       (uint64_t)block->max_length);
+        g_free(psize);
+    }
+    rcu_read_unlock();
+}
+
 #ifdef __linux__
 /*
 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
@@ -1503,12 +1635,12 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
        return 0;
    }

-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+    RAMBLOCK_FOREACH(block) {
        ram_addr_t end, next = RAM_ADDR_MAX;

        end = block->offset + block->max_length;

-        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
+        RAMBLOCK_FOREACH(next_block) {
            if (next_block->offset >= end) {
                next = MIN(next, next_block->offset);
            }
@@ -1528,17 +1660,17 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
    return offset;
 }

-ram_addr_t last_ram_offset(void)
+unsigned long last_ram_page(void)
 {
    RAMBlock *block;
    ram_addr_t last = 0;

    rcu_read_lock();
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+    RAMBLOCK_FOREACH(block) {
        last = MAX(last, block->offset + block->max_length);
    }
    rcu_read_unlock();
-    return last;
+    return last >> TARGET_PAGE_BITS;
 }

 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
@@ -1584,7 +1716,7 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);

    rcu_read_lock();
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+    RAMBLOCK_FOREACH(block) {
        if (block != new_block &&
            !strcmp(block->idstr, new_block->idstr)) {
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
@@ -1618,7 +1750,7 @@ size_t qemu_ram_pagesize_largest(void)
    RAMBlock *block;
    size_t largest = 0;

-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+    RAMBLOCK_FOREACH(block) {
        largest = MAX(largest, qemu_ram_pagesize(block));
    }

@@ -1727,7 +1859,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
    ram_addr_t old_ram_size, new_ram_size;
    Error *err = NULL;

-    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
+    old_ram_size = last_ram_page();

    qemu_mutex_lock_ramlist();
    new_block->offset = find_ram_offset(new_block->max_length);
@@ -1758,14 +1890,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
    new_ram_size = MAX(old_ram_size,
              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
    if (new_ram_size > old_ram_size) {
-        migration_bitmap_extend(old_ram_size, new_ram_size);
        dirty_memory_extend(old_ram_size, new_ram_size);
    }
    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
     * QLIST (which has an RCU-friendly variant) does not have insertion at
     * tail, so save the last element in last_block.
     */
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+    RAMBLOCK_FOREACH(block) {
        last_block = block;
        if (block->max_length < new_block->max_length) {
            break;
@@ -1947,7 +2078,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
    int flags;
    void *area, *vaddr;

-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+    RAMBLOCK_FOREACH(block) {
        offset = addr - block->offset;
        if (offset < block->max_length) {
            vaddr = ramblock_ptr(block, offset);
@@ -2010,10 +2141,10 @@ void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
         * In that case just map until the end of the page.
         */
        if (block->offset == 0) {
-            return xen_map_cache(addr, 0, 0);
+            return xen_map_cache(addr, 0, 0, false);
        }

-        block->host = xen_map_cache(block->offset, block->max_length, 1);
+        block->host = xen_map_cache(block->offset, block->max_length, 1, false);
    }
    return ramblock_ptr(block, addr);
 }
@@ -2043,10 +2174,10 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
         * In that case just map the requested area.
         */
        if (block->offset == 0) {
-            return xen_map_cache(addr, *size, 1);
+            return xen_map_cache(addr, *size, 1, true);
        }

-        block->host = xen_map_cache(block->offset, block->max_length, 1);
+        block->host = xen_map_cache(block->offset, block->max_length, 1, true);
    }

    return ramblock_ptr(block, addr);
@@ -2093,7 +2224,7 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
        goto found;
    }

-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+    RAMBLOCK_FOREACH(block) {
        /* This case append when the block is not mapped. */
        if (block->host == NULL) {
            continue;
@@ -2126,7 +2257,7 @@ RAMBlock *qemu_ram_block_by_name(const char *name)
 {
    RAMBlock *block;

-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+    RAMBLOCK_FOREACH(block) {
        if (!strcmp(name, block->idstr)) {
            return block;
        }
@@ -3307,9 +3438,9 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
 * Allows code that needs to deal with migration bitmaps etc to still be built
 * target independent.
 */
-size_t qemu_target_page_bits(void)
+size_t qemu_target_page_size(void)
 {
-    return TARGET_PAGE_BITS;
+    return TARGET_PAGE_SIZE;
 }

 #endif
@@ -3350,7 +3481,7 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
    int ret = 0;

    rcu_read_lock();
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+    RAMBLOCK_FOREACH(block) {
        ret = func(block->idstr, block->host, block->offset,
                   block->used_length, opaque);
        if (ret) {
--- a/gdb-xml/i386-32bit-core.xml
+++ b/gdb-xml/i386-32bit-core.xml
@@ -0,0 +1,65 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.core">
+  <flags id="i386_eflags" size="4">
+    <field name="CF" start="0" end="0"/>
+    <field name="" start="1" end="1"/>
+    <field name="PF" start="2" end="2"/>
+    <field name="AF" start="4" end="4"/>
+    <field name="ZF" start="6" end="6"/>
+    <field name="SF" start="7" end="7"/>
+    <field name="TF" start="8" end="8"/>
+    <field name="IF" start="9" end="9"/>
+    <field name="DF" start="10" end="10"/>
+    <field name="OF" start="11" end="11"/>
+    <field name="NT" start="14" end="14"/>
+    <field name="RF" start="16" end="16"/>
+    <field name="VM" start="17" end="17"/>
+    <field name="AC" start="18" end="18"/>
+    <field name="VIF" start="19" end="19"/>
+    <field name="VIP" start="20" end="20"/>
+    <field name="ID" start="21" end="21"/>
+  </flags>
+
+  <reg name="eax" bitsize="32" type="int32"/>
+  <reg name="ecx" bitsize="32" type="int32"/>
+  <reg name="edx" bitsize="32" type="int32"/>
+  <reg name="ebx" bitsize="32" type="int32"/>
+  <reg name="esp" bitsize="32" type="data_ptr"/>
+  <reg name="ebp" bitsize="32" type="data_ptr"/>
+  <reg name="esi" bitsize="32" type="int32"/>
+  <reg name="edi" bitsize="32" type="int32"/>
+
+  <reg name="eip" bitsize="32" type="code_ptr"/>
+  <reg name="eflags" bitsize="32" type="i386_eflags"/>
+  <reg name="cs" bitsize="32" type="int32"/>
+  <reg name="ss" bitsize="32" type="int32"/>
+  <reg name="ds" bitsize="32" type="int32"/>
+  <reg name="es" bitsize="32" type="int32"/>
+  <reg name="fs" bitsize="32" type="int32"/>
+  <reg name="gs" bitsize="32" type="int32"/>
+
+  <reg name="st0" bitsize="80" type="i387_ext"/>
+  <reg name="st1" bitsize="80" type="i387_ext"/>
+  <reg name="st2" bitsize="80" type="i387_ext"/>
+  <reg name="st3" bitsize="80" type="i387_ext"/>
+  <reg name="st4" bitsize="80" type="i387_ext"/>
+  <reg name="st5" bitsize="80" type="i387_ext"/>
+  <reg name="st6" bitsize="80" type="i387_ext"/>
+  <reg name="st7" bitsize="80" type="i387_ext"/>
+
+  <reg name="fctrl" bitsize="32" type="int" group="float"/>
+  <reg name="fstat" bitsize="32" type="int" group="float"/>
+  <reg name="ftag" bitsize="32" type="int" group="float"/>
+  <reg name="fiseg" bitsize="32" type="int" group="float"/>
+  <reg name="fioff" bitsize="32" type="int" group="float"/>
+  <reg name="foseg" bitsize="32" type="int" group="float"/>
+  <reg name="fooff" bitsize="32" type="int" group="float"/>
+  <reg name="fop" bitsize="32" type="int" group="float"/>
+</feature>
--- a/gdb-xml/i386-64bit-core.xml
+++ b/gdb-xml/i386-64bit-core.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.core">
+  <flags id="i386_eflags" size="4">
+    <field name="CF" start="0" end="0"/>
+    <field name="" start="1" end="1"/>
+    <field name="PF" start="2" end="2"/>
+    <field name="AF" start="4" end="4"/>
+    <field name="ZF" start="6" end="6"/>
+    <field name="SF" start="7" end="7"/>
+    <field name="TF" start="8" end="8"/>
+    <field name="IF" start="9" end="9"/>
+    <field name="DF" start="10" end="10"/>
+    <field name="OF" start="11" end="11"/>
+    <field name="NT" start="14" end="14"/>
+    <field name="RF" start="16" end="16"/>
+    <field name="VM" start="17" end="17"/>
+    <field name="AC" start="18" end="18"/>
+    <field name="VIF" start="19" end="19"/>
+    <field name="VIP" start="20" end="20"/>
+    <field name="ID" start="21" end="21"/>
+  </flags>
+
+  <reg name="rax" bitsize="64" type="int64"/>
+  <reg name="rbx" bitsize="64" type="int64"/>
+  <reg name="rcx" bitsize="64" type="int64"/>
+  <reg name="rdx" bitsize="64" type="int64"/>
+  <reg name="rsi" bitsize="64" type="int64"/>
+  <reg name="rdi" bitsize="64" type="int64"/>
+  <reg name="rbp" bitsize="64" type="data_ptr"/>
+  <reg name="rsp" bitsize="64" type="data_ptr"/>
+  <reg name="r8" bitsize="64" type="int64"/>
+  <reg name="r9" bitsize="64" type="int64"/>
+  <reg name="r10" bitsize="64" type="int64"/>
+  <reg name="r11" bitsize="64" type="int64"/>
+  <reg name="r12" bitsize="64" type="int64"/>
+  <reg name="r13" bitsize="64" type="int64"/>
+  <reg name="r14" bitsize="64" type="int64"/>
+  <reg name="r15" bitsize="64" type="int64"/>
+
+  <reg name="rip" bitsize="64" type="code_ptr"/>
+  <reg name="eflags" bitsize="32" type="i386_eflags"/>
+  <reg name="cs" bitsize="32" type="int32"/>
+  <reg name="ss" bitsize="32" type="int32"/>
+  <reg name="ds" bitsize="32" type="int32"/>
+  <reg name="es" bitsize="32" type="int32"/>
+  <reg name="fs" bitsize="32" type="int32"/>
+  <reg name="gs" bitsize="32" type="int32"/>
+
+  <reg name="st0" bitsize="80" type="i387_ext"/>
+  <reg name="st1" bitsize="80" type="i387_ext"/>
+  <reg name="st2" bitsize="80" type="i387_ext"/>
+  <reg name="st3" bitsize="80" type="i387_ext"/>
+  <reg name="st4" bitsize="80" type="i387_ext"/>
+  <reg name="st5" bitsize="80" type="i387_ext"/>
+  <reg name="st6" bitsize="80" type="i387_ext"/>
+  <reg name="st7" bitsize="80" type="i387_ext"/>
+
+  <reg name="fctrl" bitsize="32" type="int" group="float"/>
+  <reg name="fstat" bitsize="32" type="int" group="float"/>
+  <reg name="ftag" bitsize="32" type="int" group="float"/>
+  <reg name="fiseg" bitsize="32" type="int" group="float"/>
+  <reg name="fioff" bitsize="32" type="int" group="float"/>
+  <reg name="foseg" bitsize="32" type="int" group="float"/>
+  <reg name="fooff" bitsize="32" type="int" group="float"/>
+  <reg name="fop" bitsize="32" type="int" group="float"/>
+</feature>
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -286,6 +286,8 @@ enum RSState {
    RS_INACTIVE,
    RS_IDLE,
    RS_GETLINE,
+    RS_GETLINE_ESC,
+    RS_GETLINE_RLE,
    RS_CHKSUM1,
    RS_CHKSUM2,
 };
@@ -296,7 +298,8 @@ typedef struct GDBState {
    enum RSState state; /* parsing state */
    char line_buf[MAX_PACKET_LENGTH];
    int line_buf_index;
-    int line_csum;
+    int line_sum; /* running checksum */
+    int line_csum; /* checksum at the end of the packet */
    uint8_t last_packet[MAX_PACKET_LENGTH + 4];
    int last_packet_len;
    int signal;
@@ -1508,7 +1511,6 @@ void gdb_do_syscall(gdb_syscall_complete_cb cb, const char *fmt, ...)

 static void gdb_read_byte(GDBState *s, int ch)
 {
-    int i, csum;
    uint8_t reply;

 #ifndef CONFIG_USER_ONLY
@@ -1542,35 +1544,123 @@ static void gdb_read_byte(GDBState *s, int ch)
        switch(s->state) {
        case RS_IDLE:
            if (ch == '$') {
+                /* start of command packet */
                s->line_buf_index = 0;
+                s->line_sum = 0;
                s->state = RS_GETLINE;
+            } else {
+#ifdef DEBUG_GDB
+                printf("gdbstub received garbage between packets: 0x%x\n", ch);
+#endif
            }
            break;
        case RS_GETLINE:
-            if (ch == '#') {
-            s->state = RS_CHKSUM1;
+            if (ch == '}') {
+                /* start escape sequence */
+                s->state = RS_GETLINE_ESC;
+                s->line_sum += ch;
+            } else if (ch == '*') {
+                /* start run length encoding sequence */
+                s->state = RS_GETLINE_RLE;
+                s->line_sum += ch;
+            } else if (ch == '#') {
+                /* end of command, start of checksum*/
+                s->state = RS_CHKSUM1;
            } else if (s->line_buf_index >= sizeof(s->line_buf) - 1) {
+#ifdef DEBUG_GDB
+                printf("gdbstub command buffer overrun, dropping command\n");
+#endif
                s->state = RS_IDLE;
            } else {
-            s->line_buf[s->line_buf_index++] = ch;
+                /* unescaped command character */
+                s->line_buf[s->line_buf_index++] = ch;
+                s->line_sum += ch;
+            }
+            break;
+        case RS_GETLINE_ESC:
+            if (ch == '#') {
+                /* unexpected end of command in escape sequence */
+                s->state = RS_CHKSUM1;
+            } else if (s->line_buf_index >= sizeof(s->line_buf) - 1) {
+                /* command buffer overrun */
+#ifdef DEBUG_GDB
+                printf("gdbstub command buffer overrun, dropping command\n");
+#endif
+                s->state = RS_IDLE;
+            } else {
+                /* parse escaped character and leave escape state */
+                s->line_buf[s->line_buf_index++] = ch ^ 0x20;
+                s->line_sum += ch;
+                s->state = RS_GETLINE;
+            }
+            break;
+        case RS_GETLINE_RLE:
+            if (ch < ' ') {
+                /* invalid RLE count encoding */
+#ifdef DEBUG_GDB
+                printf("gdbstub got invalid RLE count: 0x%x\n", ch);
+#endif
+                s->state = RS_GETLINE;
+            } else {
+                /* decode repeat length */
+                int repeat = (unsigned char)ch - ' ' + 3;
+                if (s->line_buf_index + repeat >= sizeof(s->line_buf) - 1) {
+                    /* that many repeats would overrun the command buffer */
+#ifdef DEBUG_GDB
+                    printf("gdbstub command buffer overrun,"
+                           " dropping command\n");
+#endif
+                    s->state = RS_IDLE;
+                } else if (s->line_buf_index < 1) {
+                    /* got a repeat but we have nothing to repeat */
+#ifdef DEBUG_GDB
+                    printf("gdbstub got invalid RLE sequence\n");
+#endif
+                    s->state = RS_GETLINE;
+                } else {
+                    /* repeat the last character */
+                    memset(s->line_buf + s->line_buf_index,
+                           s->line_buf[s->line_buf_index - 1], repeat);
+                    s->line_buf_index += repeat;
+                    s->line_sum += ch;
+                    s->state = RS_GETLINE;
+                }
            }
            break;
        case RS_CHKSUM1:
+            /* get high hex digit of checksum */
+            if (!isxdigit(ch)) {
+#ifdef DEBUG_GDB
+                printf("gdbstub got invalid command checksum digit\n");
+#endif
+                s->state = RS_GETLINE;
+                break;
+            }
            s->line_buf[s->line_buf_index] = '\0';
            s->line_csum = fromhex(ch) << 4;
            s->state = RS_CHKSUM2;
            break;
        case RS_CHKSUM2:
-            s->line_csum |= fromhex(ch);
-            csum = 0;
-            for(i = 0; i < s->line_buf_index; i++) {
-                csum += s->line_buf[i];
+            /* get low hex digit of checksum */
+            if (!isxdigit(ch)) {
+#ifdef DEBUG_GDB
+                printf("gdbstub got invalid command checksum digit\n");
+#endif
+                s->state = RS_GETLINE;
+                break;
            }
-            if (s->line_csum != (csum & 0xff)) {
+            s->line_csum |= fromhex(ch);
+
+            if (s->line_csum != (s->line_sum & 0xff)) {
+                /* send NAK reply */
                reply = '-';
                put_buffer(s, &reply, 1);
+#ifdef DEBUG_GDB
+                printf("gdbstub got command packet with incorrect checksum\n");
+#endif
                s->state = RS_IDLE;
            } else {
+                /* send ACK reply */
                reply = '+';
                put_buffer(s, &reply, 1);
                s->state = gdb_handle_packet(s, s->line_buf);
@@ -1611,7 +1701,7 @@ void gdb_exit(CPUArchState *env, int code)

 #ifndef CONFIG_USER_ONLY
  qemu_chr_fe_deinit(&s->chr);
-  qemu_chr_delete(chr);
+  object_unparent(OBJECT(chr));
 #endif
 }

@@ -1912,7 +2002,7 @@ int gdbserver_start(const char *device)
        monitor_init(mon_chr, 0);
    } else {
        if (qemu_chr_fe_get_driver(&s->chr)) {
-            qemu_chr_delete(qemu_chr_fe_get_driver(&s->chr));
+            object_unparent(OBJECT(qemu_chr_fe_get_driver(&s->chr)));
        }
        mon_chr = s->mon_chr;
        memset(s, 0, sizeof(GDBState));
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -785,6 +785,20 @@ STEXI
@item info dump
@findex dump
 Display the latest dump status.
+ETEXI
+
+    {
+        .name       = "ramblock",
+        .args_type  = "",
+        .params     = "",
+        .help       = "Display system ramblock information",
+        .cmd        = hmp_info_ramblock,
+    },
+
+STEXI
+@item info ramblock
+@findex ramblock
+Dump all the ramblocks of the system.
 ETEXI

    {
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -523,6 +523,38 @@ Dump 80 16 bit values at the start of the video memory.
 0x000b8090: 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720
@end smallexample
@end itemize
+ETEXI
+
+    {
+        .name       = "gpa2hva",
+        .args_type  = "addr:l",
+        .params     = "addr",
+        .help       = "print the host virtual address corresponding to a guest physical address",
+        .cmd        = hmp_gpa2hva,
+    },
+
+STEXI
+@item gpa2hva @var{addr}
+@findex gpa2hva
+Print the host virtual address at which the guest's physical address @var{addr}
+is mapped.
+ETEXI
+
+#ifdef CONFIG_LINUX
+    {
+        .name       = "gpa2hpa",
+        .args_type  = "addr:l",
+        .params     = "addr",
+        .help       = "print the host physical address corresponding to a guest physical address",
+        .cmd        = hmp_gpa2hpa,
+    },
+#endif
+
+STEXI
+@item gpa2hpa @var{addr}
+@findex gpa2hpa
+Print the host physical address at which the guest's physical address @var{addr}
+is mapped.
 ETEXI

    {
--- a/hmp.c
+++ b/hmp.c
@@ -19,6 +19,7 @@
 #include "net/eth.h"
 #include "sysemu/char.h"
 #include "sysemu/block-backend.h"
+#include "sysemu/sysemu.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "qemu/timer.h"
@@ -33,10 +34,12 @@
 #include "qapi-visit.h"
 #include "qom/object_interfaces.h"
 #include "ui/console.h"
+#include "block/nbd.h"
 #include "block/qapi.h"
 #include "qemu-io.h"
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
+#include "exec/ramlist.h"
 #include "hw/intc/intc.h"

 #ifdef CONFIG_SPICE
@@ -215,6 +218,9 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
                       info->ram->normal_bytes >> 10);
        monitor_printf(mon, "dirty sync count: %" PRIu64 "\n",
                       info->ram->dirty_sync_count);
+        monitor_printf(mon, "page size: %" PRIu64 " kbytes\n",
+                       info->ram->page_size >> 10);
+
        if (info->ram->dirty_pages_rate) {
            monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n",
                           info->ram->dirty_pages_rate);
@@ -265,13 +271,11 @@ void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict)
    caps = qmp_query_migrate_capabilities(NULL);

    if (caps) {
-        monitor_printf(mon, "capabilities: ");
        for (cap = caps; cap; cap = cap->next) {
-            monitor_printf(mon, "%s: %s ",
+            monitor_printf(mon, "%s: %s\n",
                           MigrationCapability_lookup[cap->value->capability],
                           cap->value->state ? "on" : "off");
        }
-        monitor_printf(mon, "\n");
    }

    qapi_free_MigrationCapabilityStatusList(caps);
@@ -284,46 +288,44 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
    params = qmp_query_migrate_parameters(NULL);

    if (params) {
-        monitor_printf(mon, "parameters:");
        assert(params->has_compress_level);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_LEVEL],
            params->compress_level);
        assert(params->has_compress_threads);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_THREADS],
            params->compress_threads);
        assert(params->has_decompress_threads);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_DECOMPRESS_THREADS],
            params->decompress_threads);
        assert(params->has_cpu_throttle_initial);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL],
            params->cpu_throttle_initial);
        assert(params->has_cpu_throttle_increment);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT],
            params->cpu_throttle_increment);
-        monitor_printf(mon, " %s: '%s'",
+        monitor_printf(mon, "%s: '%s'\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_CREDS],
            params->has_tls_creds ? params->tls_creds : "");
-        monitor_printf(mon, " %s: '%s'",
+        monitor_printf(mon, "%s: '%s'\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_HOSTNAME],
            params->has_tls_hostname ? params->tls_hostname : "");
        assert(params->has_max_bandwidth);
-        monitor_printf(mon, " %s: %" PRId64 " bytes/second",
+        monitor_printf(mon, "%s: %" PRId64 " bytes/second\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_MAX_BANDWIDTH],
            params->max_bandwidth);
        assert(params->has_downtime_limit);
-        monitor_printf(mon, " %s: %" PRId64 " milliseconds",
+        monitor_printf(mon, "%s: %" PRId64 " milliseconds\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_DOWNTIME_LIMIT],
            params->downtime_limit);
        assert(params->has_x_checkpoint_delay);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY],
            params->x_checkpoint_delay);
-        monitor_printf(mon, "\n");
    }

    qapi_free_MigrationParameters(params);
@@ -1269,6 +1271,184 @@ void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict)
    hmp_handle_error(mon, &err);
 }

+void hmp_loadvm(Monitor *mon, const QDict *qdict)
+{
+    int saved_vm_running  = runstate_is_running();
+    const char *name = qdict_get_str(qdict, "name");
+    Error *err = NULL;
+
+    vm_stop(RUN_STATE_RESTORE_VM);
+
+    if (load_vmstate(name, &err) == 0 && saved_vm_running) {
+        vm_start();
+    }
+    hmp_handle_error(mon, &err);
+}
+
+void hmp_savevm(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    save_vmstate(qdict_get_try_str(qdict, "name"), &err);
+    hmp_handle_error(mon, &err);
+}
+
+void hmp_delvm(Monitor *mon, const QDict *qdict)
+{
+    BlockDriverState *bs;
+    Error *err;
+    const char *name = qdict_get_str(qdict, "name");
+
+    if (bdrv_all_delete_snapshot(name, &bs, &err) < 0) {
+        error_reportf_err(err,
+                          "Error while deleting snapshot on device '%s': ",
+                          bdrv_get_device_name(bs));
+    }
+}
+
+void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
+{
+    BlockDriverState *bs, *bs1;
+    BdrvNextIterator it1;
+    QEMUSnapshotInfo *sn_tab, *sn;
+    bool no_snapshot = true;
+    int nb_sns, i;
+    int total;
+    int *global_snapshots;
+    AioContext *aio_context;
+
+    typedef struct SnapshotEntry {
+        QEMUSnapshotInfo sn;
+        QTAILQ_ENTRY(SnapshotEntry) next;
+    } SnapshotEntry;
+
+    typedef struct ImageEntry {
+        const char *imagename;
+        QTAILQ_ENTRY(ImageEntry) next;
+        QTAILQ_HEAD(, SnapshotEntry) snapshots;
+    } ImageEntry;
+
+    QTAILQ_HEAD(, ImageEntry) image_list =
+        QTAILQ_HEAD_INITIALIZER(image_list);
+
+    ImageEntry *image_entry, *next_ie;
+    SnapshotEntry *snapshot_entry;
+
+    bs = bdrv_all_find_vmstate_bs();
+    if (!bs) {
+        monitor_printf(mon, "No available block device supports snapshots\n");
+        return;
+    }
+    aio_context = bdrv_get_aio_context(bs);
+
+    aio_context_acquire(aio_context);
+    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
+    aio_context_release(aio_context);
+
+    if (nb_sns < 0) {
+        monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns);
+        return;
+    }
+
+    for (bs1 = bdrv_first(&it1); bs1; bs1 = bdrv_next(&it1)) {
+        int bs1_nb_sns = 0;
+        ImageEntry *ie;
+        SnapshotEntry *se;
+        AioContext *ctx = bdrv_get_aio_context(bs1);
+
+        aio_context_acquire(ctx);
+        if (bdrv_can_snapshot(bs1)) {
+            sn = NULL;
+            bs1_nb_sns = bdrv_snapshot_list(bs1, &sn);
+            if (bs1_nb_sns > 0) {
+                no_snapshot = false;
+                ie = g_new0(ImageEntry, 1);
+                ie->imagename = bdrv_get_device_name(bs1);
+                QTAILQ_INIT(&ie->snapshots);
+                QTAILQ_INSERT_TAIL(&image_list, ie, next);
+                for (i = 0; i < bs1_nb_sns; i++) {
+                    se = g_new0(SnapshotEntry, 1);
+                    se->sn = sn[i];
+                    QTAILQ_INSERT_TAIL(&ie->snapshots, se, next);
+                }
+            }
+            g_free(sn);
+        }
+        aio_context_release(ctx);
+    }
+
+    if (no_snapshot) {
+        monitor_printf(mon, "There is no snapshot available.\n");
+        return;
+    }
+
+    global_snapshots = g_new0(int, nb_sns);
+    total = 0;
+    for (i = 0; i < nb_sns; i++) {
+        SnapshotEntry *next_sn;
+        if (bdrv_all_find_snapshot(sn_tab[i].name, &bs1) == 0) {
+            global_snapshots[total] = i;
+            total++;
+            QTAILQ_FOREACH(image_entry, &image_list, next) {
+                QTAILQ_FOREACH_SAFE(snapshot_entry, &image_entry->snapshots,
+                                    next, next_sn) {
+                    if (!strcmp(sn_tab[i].name, snapshot_entry->sn.name)) {
+                        QTAILQ_REMOVE(&image_entry->snapshots, snapshot_entry,
+                                      next);
+                        g_free(snapshot_entry);
+                    }
+                }
+            }
+        }
+    }
+
+    monitor_printf(mon, "List of snapshots present on all disks:\n");
+
+    if (total > 0) {
+        bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, NULL);
+        monitor_printf(mon, "\n");
+        for (i = 0; i < total; i++) {
+            sn = &sn_tab[global_snapshots[i]];
+            /* The ID is not guaranteed to be the same on all images, so
+             * overwrite it.
+             */
+            pstrcpy(sn->id_str, sizeof(sn->id_str), "--");
+            bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, sn);
+            monitor_printf(mon, "\n");
+        }
+    } else {
+        monitor_printf(mon, "None\n");
+    }
+
+    QTAILQ_FOREACH(image_entry, &image_list, next) {
+        if (QTAILQ_EMPTY(&image_entry->snapshots)) {
+            continue;
+        }
+        monitor_printf(mon,
+                       "\nList of partial (non-loadable) snapshots on '%s':\n",
+                       image_entry->imagename);
+        bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, NULL);
+        monitor_printf(mon, "\n");
+        QTAILQ_FOREACH(snapshot_entry, &image_entry->snapshots, next) {
+            bdrv_snapshot_dump((fprintf_function)monitor_printf, mon,
+                               &snapshot_entry->sn);
+            monitor_printf(mon, "\n");
+        }
+    }
+
+    QTAILQ_FOREACH_SAFE(image_entry, &image_list, next, next_ie) {
+        SnapshotEntry *next_sn;
+        QTAILQ_FOREACH_SAFE(snapshot_entry, &image_entry->snapshots, next,
+                            next_sn) {
+            g_free(snapshot_entry);
+        }
+        g_free(image_entry);
+    }
+    g_free(sn_tab);
+    g_free(global_snapshots);
+
+}
+
 void hmp_migrate_cancel(Monitor *mon, const QDict *qdict)
 {
    qmp_migrate_cancel(NULL);
@@ -1948,7 +2128,7 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
        goto exit;
    }

-    qmp_nbd_server_start(addr, false, NULL, &local_err);
+    nbd_server_start(addr, NULL, &local_err);
    qapi_free_SocketAddress(addr);
    if (local_err != NULL) {
        goto exit;
@@ -2564,6 +2744,11 @@ void hmp_info_dump(Monitor *mon, const QDict *qdict)
    qapi_free_DumpQueryResult(result);
 }

+void hmp_info_ramblock(Monitor *mon, const QDict *qdict)
+{
+    ram_block_dump(mon);
+}
+
 void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict)
 {
    Error *err = NULL;
--- a/hmp.h
+++ b/hmp.h
@@ -63,6 +63,10 @@ void hmp_snapshot_blkdev_internal(Monitor *mon, const QDict *qdict);
 void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict);
 void hmp_drive_mirror(Monitor *mon, const QDict *qdict);
 void hmp_drive_backup(Monitor *mon, const QDict *qdict);
+void hmp_loadvm(Monitor *mon, const QDict *qdict);
+void hmp_savevm(Monitor *mon, const QDict *qdict);
+void hmp_delvm(Monitor *mon, const QDict *qdict);
+void hmp_info_snapshots(Monitor *mon, const QDict *qdict);
 void hmp_migrate_cancel(Monitor *mon, const QDict *qdict);
 void hmp_migrate_incoming(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict);
@@ -136,6 +140,7 @@ void hmp_rocker_ports(Monitor *mon, const QDict *qdict);
 void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict);
 void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict);
 void hmp_info_dump(Monitor *mon, const QDict *qdict);
+void hmp_info_ramblock(Monitor *mon, const QDict *qdict);
 void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict);
 void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict);

--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -452,6 +452,11 @@ static off_t local_telldir(FsContext *ctx, V9fsFidOpenState *fs)
    return telldir(fs->dir.stream);
 }

+static bool local_is_mapped_file_metadata(FsContext *fs_ctx, const char *name)
+{
+    return !strcmp(name, VIRTFS_META_DIR);
+}
+
 static struct dirent *local_readdir(FsContext *ctx, V9fsFidOpenState *fs)
 {
    struct dirent *entry;
@@ -465,8 +470,8 @@ again:
    if (ctx->export_flags & V9FS_SM_MAPPED) {
        entry->d_type = DT_UNKNOWN;
    } else if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        if (!strcmp(entry->d_name, VIRTFS_META_DIR)) {
-            /* skp the meta data directory */
+        if (local_is_mapped_file_metadata(ctx, entry->d_name)) {
+            /* skip the meta data directory */
            goto again;
        }
        entry->d_type = DT_UNKNOWN;
@@ -559,6 +564,12 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path,
    int err = -1;
    int dirfd;

+    if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(fs_ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
    if (dirfd == -1) {
        return -1;
@@ -605,6 +616,12 @@ static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path,
    int err = -1;
    int dirfd;

+    if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(fs_ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
    if (dirfd == -1) {
        return -1;
@@ -694,6 +711,12 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
    int err = -1;
    int dirfd;

+    if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(fs_ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    /*
     * Mark all the open to not follow symlinks
     */
@@ -752,6 +775,12 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
    int err = -1;
    int dirfd;

+    if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(fs_ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
    if (dirfd == -1) {
        return -1;
@@ -826,6 +855,12 @@ static int local_link(FsContext *ctx, V9fsPath *oldpath,
    int ret = -1;
    int odirfd, ndirfd;

+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    odirfd = local_opendir_nofollow(ctx, odirpath);
    if (odirfd == -1) {
        goto out;
@@ -1096,6 +1131,12 @@ static int local_lremovexattr(FsContext *ctx, V9fsPath *fs_path,
 static int local_name_to_path(FsContext *ctx, V9fsPath *dir_path,
                              const char *name, V9fsPath *target)
 {
+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    if (dir_path) {
        v9fs_path_sprintf(target, "%s/%s", dir_path->data, name);
    } else if (strcmp(name, "/")) {
@@ -1116,6 +1157,13 @@ static int local_renameat(FsContext *ctx, V9fsPath *olddir,
    int ret;
    int odirfd, ndirfd;

+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        (local_is_mapped_file_metadata(ctx, old_name) ||
+         local_is_mapped_file_metadata(ctx, new_name))) {
+        errno = EINVAL;
+        return -1;
+    }
+
    odirfd = local_opendir_nofollow(ctx, olddir->data);
    if (odirfd == -1) {
        return -1;
@@ -1206,6 +1254,12 @@ static int local_unlinkat(FsContext *ctx, V9fsPath *dir,
    int ret;
    int dirfd;

+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    dirfd = local_opendir_nofollow(ctx, dir->data);
    if (dirfd == -1) {
        return -1;
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -23,7 +23,7 @@
 #include "9p-xattr.h"
 #include "coth.h"
 #include "trace.h"
-#include "migration/migration.h"
+#include "migration/blocker.h"

 int open_fd_hw;
 int total_open_fd;
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -119,6 +119,12 @@ static inline char *rpath(FsContext *ctx, const char *path)
 typedef struct V9fsPDU V9fsPDU;
 struct V9fsState;

+typedef struct {
+    uint32_t size_le;
+    uint8_t id;
+    uint16_t tag_le;
+} QEMU_PACKED P9MsgHeader;
+
 struct V9fsPDU
 {
    uint32_t size;
--- a/hw/9pfs/Makefile.objs
+++ b/hw/9pfs/Makefile.objs
@@ -5,5 +5,6 @@ common-obj-y += coth.o cofs.o codir.o cofile.o
 common-obj-y += coxattr.o 9p-synth.o
 common-obj-$(CONFIG_OPEN_BY_HANDLE) +=  9p-handle.o
 common-obj-y += 9p-proxy.o
+common-obj-$(CONFIG_XEN) += xen-9p-backend.o

 obj-y += virtio-9p-device.o
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -46,11 +46,7 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
    VirtQueueElement *elem;

    while ((pdu = pdu_alloc(s))) {
-        struct {
-            uint32_t size_le;
-            uint8_t id;
-            uint16_t tag_le;
-        } QEMU_PACKED out;
+        P9MsgHeader out;

        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
        if (!elem) {
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .9.0
 .9.50