input: Add trace event for empty keyboard queue

When driving QEMU from the outside, we have basically no chance to determine how quickly the guest OS picks up key events, so we usually have to limit ourselves to very slow keyboard presses to make sure the guest always has enough chance to pick them up. This patch adds a trace events when the keyboarde queue is drained. An external driver can use that as hint that new keys can be pressed. Signed-off-by: Alexander Graf <agraf@suse.de> Message-id: 1490883775-94658-1-git-send-email-agraf@suse.de Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
input: don't queue delay if paused
2017-05-03 14:20:12 +02:00 · 2017-05-03 14:19:40 +02:00 · 2017-05-03 14:18:21 +02:00 · 2017-05-02 15:16:29 +01:00 · 2017-04-29 18:44:16 +02:00 · 2017-04-29 18:44:16 +02:00
361 changed files with 13391 additions and 4424 deletions
--- a/8
+++ b/8
@@ -327,6 +327,7 @@ L: xen-devel@lists.xenproject.org
 S: Supported
 F: xen-*
 F: */xen*
+F: hw/9pfs/xen-9p-backend.c
 F: hw/char/xen_console.c
 F: hw/display/xenfb.c
 F: hw/net/xen_nic.c
@@ -645,7 +646,6 @@ F: hw/ppc/ppc440_bamboo.c

 e500
 M: Alexander Graf <agraf@suse.de>
-M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/e500.[hc]
@@ -656,7 +656,6 @@ F: pc-bios/u-boot.e500

 mpc8544ds
 M: Alexander Graf <agraf@suse.de>
-M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/mpc8544ds.c
@@ -933,7 +932,6 @@ F: include/hw/ppc/ppc4xx.h

 ppce500
 M: Alexander Graf <agraf@suse.de>
-M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/e500*
@@ -1817,8 +1815,8 @@ S: Supported
 F: tests/image-fuzzer/

 Replication
-M: Wen Congyang <wency@cn.fujitsu.com>
-M: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
+M: Wen Congyang <wencongyang2@huawei.com>
+M: Xie Changlong <xiechanglong.d@gmail.com>
 S: Supported
 F: replication*
 F: block/replication.c
--- a/6
+++ b/6
@@ -346,7 +346,7 @@ dtc/%:
 	mkdir -p $@

 $(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(chardev-obj-y) \
-	$(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY)) $(trace-obj-y)
+	$(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))

 ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
 # Only keep -O and -g cflags
@@ -366,11 +366,11 @@ Makefile: $(version-obj-y)
 # Build libraries

 libqemustub.a: $(stub-obj-y)
-libqemuutil.a: $(util-obj-y)
+libqemuutil.a: $(util-obj-y) $(trace-obj-y)

 ######################################################################

-COMMON_LDADDS = $(trace-obj-y) libqemuutil.a libqemustub.a
+COMMON_LDADDS = libqemuutil.a libqemustub.a

 qemu-img.o: qemu-img-cmds.h

--- a/Makefile.target
+++ b/Makefile.target
@@ -149,12 +149,6 @@ obj-y += dump.o
 obj-y += migration/ram.o migration/savevm.o
 LIBS := $(libs_softmmu) $(LIBS)

-# xen support
-obj-$(CONFIG_XEN) += xen-common.o
-obj-$(CONFIG_XEN_I386) += xen-hvm.o xen-mapcache.o
-obj-$(call lnot,$(CONFIG_XEN)) += xen-common-stub.o
-obj-$(call lnot,$(CONFIG_XEN_I386)) += xen-hvm-stub.o
-
 # Hardware support
 ifeq ($(TARGET_NAME), sparc64)
 obj-y += hw/sparc64/
@@ -188,8 +182,7 @@ dummy := $(call unnest-vars,.., \
               qom-obj-y \
               io-obj-y \
               common-obj-y \
-               common-obj-m \
-               trace-obj-y)
+               common-obj-m)
 target-obj-y := $(target-obj-y-save)
 all-obj-y += $(common-obj-y)
 all-obj-y += $(target-obj-y)
@@ -201,7 +194,7 @@ all-obj-$(CONFIG_SOFTMMU) += $(io-obj-y)

 $(QEMU_PROG_BUILD): config-devices.mak

-COMMON_LDADDS = $(trace-obj-y) ../libqemuutil.a ../libqemustub.a
+COMMON_LDADDS = ../libqemuutil.a ../libqemustub.a

 # build either PROG or PROGW
 $(QEMU_PROG_BUILD): $(all-obj-y) $(COMMON_LDADDS)
--- a/2
+++ b/2
@@ -1 +1 @@
-2.8.92
+2.9.50
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -51,7 +51,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 #ifndef CONFIG_LINUX
    error_setg(errp, "-mem-path not supported on this host");
 #else
-    if (!memory_region_size(&backend->mr)) {
+    if (!host_memory_backend_mr_inited(backend)) {
        gchar *path;
        backend->force_prealloc = mem_prealloc;
        path = object_get_canonical_path(OBJECT(backend));
@@ -76,7 +76,7 @@ static void set_mem_path(Object *o, const char *str, Error **errp)
    HostMemoryBackend *backend = MEMORY_BACKEND(o);
    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);

-    if (memory_region_size(&backend->mr)) {
+    if (host_memory_backend_mr_inited(backend)) {
        error_setg(errp, "cannot change property value");
        return;
    }
@@ -96,7 +96,7 @@ static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
    HostMemoryBackend *backend = MEMORY_BACKEND(o);
    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);

-    if (memory_region_size(&backend->mr)) {
+    if (host_memory_backend_mr_inited(backend)) {
        error_setg(errp, "cannot change property value");
        return;
    }
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -45,7 +45,7 @@ host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
    Error *local_err = NULL;
    uint64_t value;

-    if (memory_region_size(&backend->mr)) {
+    if (host_memory_backend_mr_inited(backend)) {
        error_setg(&local_err, "cannot change property value");
        goto out;
    }
@@ -146,7 +146,7 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(obj);

-    if (!memory_region_size(&backend->mr)) {
+    if (!host_memory_backend_mr_inited(backend)) {
        backend->merge = value;
        return;
    }
@@ -172,7 +172,7 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(obj);

-    if (!memory_region_size(&backend->mr)) {
+    if (!host_memory_backend_mr_inited(backend)) {
        backend->dump = value;
        return;
    }
@@ -208,7 +208,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
        }
    }

-    if (!memory_region_size(&backend->mr)) {
+    if (!host_memory_backend_mr_inited(backend)) {
        backend->prealloc = value;
        return;
    }
@@ -237,10 +237,19 @@ static void host_memory_backend_init(Object *obj)
    backend->prealloc = mem_prealloc;
 }

+bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
+{
+    /*
+     * NOTE: We forbid zero-length memory backend, so here zero means
+     * "we haven't inited the backend memory region yet".
+     */
+    return memory_region_size(&backend->mr) != 0;
+}
+
 MemoryRegion *
 host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
 {
-    return memory_region_size(&backend->mr) ? &backend->mr : NULL;
+    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 }

 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
--- a/block.c
+++ b/block.c
@@ -192,6 +192,43 @@ void path_combine(char *dest, int dest_size,
    }
 }

+bool bdrv_is_read_only(BlockDriverState *bs)
+{
+    return bs->read_only;
+}
+
+int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
+{
+    /* Do not set read_only if copy_on_read is enabled */
+    if (bs->copy_on_read && read_only) {
+        error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
+                   bdrv_get_device_or_node_name(bs));
+        return -EINVAL;
+    }
+
+    /* Do not clear read_only if it is prohibited */
+    if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR)) {
+        error_setg(errp, "Node '%s' is read only",
+                   bdrv_get_device_or_node_name(bs));
+        return -EPERM;
+    }
+
+    return 0;
+}
+
+int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
+{
+    int ret = 0;
+
+    ret = bdrv_can_set_read_only(bs, read_only, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    bs->read_only = read_only;
+    return 0;
+}
+
 void bdrv_get_full_backing_filename_from_filename(const char *backed,
                                                  const char *backing,
                                                  char *dest, size_t sz,
@@ -1157,6 +1194,13 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
    if (file != NULL) {
        filename = blk_bs(file)->filename;
    } else {
+        /*
+         * Caution: while qdict_get_try_str() is fine, getting
+         * non-string types would require more care.  When @options
+         * come from -blockdev or blockdev_add, its members are typed
+         * according to the QAPI schema, but when they come from
+         * -drive, they're all QString.
+         */
        filename = qdict_get_try_str(options, "filename");
    }

@@ -1324,6 +1368,13 @@ static int bdrv_fill_options(QDict **options, const char *filename,
    BlockDriver *drv = NULL;
    Error *local_err = NULL;

+    /*
+     * Caution: while qdict_get_try_str() is fine, getting non-string
+     * types would require more care.  When @options come from
+     * -blockdev or blockdev_add, its members are typed according to
+     * the QAPI schema, but when they come from -drive, they're all
+     * QString.
+     */
    drvname = qdict_get_try_str(*options, "driver");
    if (drvname) {
        drv = bdrv_find_format(drvname);
@@ -1358,6 +1409,7 @@ static int bdrv_fill_options(QDict **options, const char *filename,
    }

    /* Find the right block driver */
+    /* See cautionary note on accessing @options above */
    filename = qdict_get_try_str(*options, "filename");

    if (!drvname && protocol) {
@@ -1737,6 +1789,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 {
    BlockDriverState *old_bs = child->bs;

+    if (old_bs && new_bs) {
+        assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
+    }
    if (old_bs) {
        if (old_bs->quiesce_counter && child->role->drained_end) {
            child->role->drained_end(child);
@@ -1837,6 +1892,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
    bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);

    assert(parent_bs->drv);
+    assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs));
    parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role,
                                    perm, shared_perm, &perm, &shared_perm);

@@ -1987,6 +2043,13 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
    qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
    g_free(bdref_key_dot);

+    /*
+     * Caution: while qdict_get_try_str() is fine, getting non-string
+     * types would require more care.  When @parent_options come from
+     * -blockdev or blockdev_add, its members are typed according to
+     * the QAPI schema, but when they come from -drive, they're all
+     * QString.
+     */
    reference = qdict_get_try_str(parent_options, bdref_key);
    if (reference || qdict_haskey(options, "file.filename")) {
        backing_filename[0] = '\0';
@@ -2059,6 +2122,13 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
    qdict_extract_subqdict(options, &image_options, bdref_key_dot);
    g_free(bdref_key_dot);

+    /*
+     * Caution: while qdict_get_try_str() is fine, getting non-string
+     * types would require more care.  When @options come from
+     * -blockdev or blockdev_add, its members are typed according to
+     * the QAPI schema, but when they come from -drive, they're all
+     * QString.
+     */
    reference = qdict_get_try_str(options, bdref_key);
    if (!filename && !reference && !qdict_size(image_options)) {
        if (!allow_none) {
@@ -2274,9 +2344,13 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
        goto fail;
    }

-    /* Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
-     * FIXME: we're parsing the QDict to avoid having to create a
-     * QemuOpts just for this, but neither option is optimal. */
+    /*
+     * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
+     * Caution: getting a boolean member of @options requires care.
+     * When @options come from -blockdev or blockdev_add, members are
+     * typed according to the QAPI schema, but when they come from
+     * -drive, they're all QString.
+     */
    if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
        !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
        flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
@@ -2298,6 +2372,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
    options = qdict_clone_shallow(options);

    /* Find the right image format driver */
+    /* See cautionary note on accessing @options above */
    drvname = qdict_get_try_str(options, "driver");
    if (drvname) {
        drv = bdrv_find_format(drvname);
@@ -2309,6 +2384,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,

    assert(drvname || !(flags & BDRV_O_PROTOCOL));

+    /* See cautionary note on accessing @options above */
    backing = qdict_get_try_str(options, "backing");
    if (backing && *backing == '\0') {
        flags |= BDRV_O_NO_BACKING;
@@ -2713,6 +2789,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
    BlockDriver *drv;
    QemuOpts *opts;
    const char *value;
+    bool read_only;

    assert(reopen_state != NULL);
    assert(reopen_state->bs->drv != NULL);
@@ -2741,12 +2818,13 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
        qdict_put(reopen_state->options, "driver", qstring_from_str(value));
    }

-    /* if we are to stay read-only, do not allow permission change
-     * to r/w */
-    if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
-        reopen_state->flags & BDRV_O_RDWR) {
-        error_setg(errp, "Node '%s' is read only",
-                   bdrv_get_device_or_node_name(reopen_state->bs));
+    /* If we are to stay read-only, do not allow permission change
+     * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
+     * not set, or if the BDS still has copy_on_read enabled */
+    read_only = !(reopen_state->flags & BDRV_O_RDWR);
+    ret = bdrv_can_set_read_only(reopen_state->bs, read_only, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        goto error;
    }

@@ -2787,6 +2865,13 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
        do {
            QString *new_obj = qobject_to_qstring(entry->value);
            const char *new = qstring_get_str(new_obj);
+            /*
+             * Caution: while qdict_get_try_str() is fine, getting
+             * non-string types would require more care.  When
+             * bs->options come from -blockdev or blockdev_add, its
+             * members are typed according to the QAPI schema, but
+             * when they come from -drive, they're all QString.
+             */
            const char *old = qdict_get_try_str(reopen_state->bs->options,
                                                entry->key);

@@ -3228,7 +3313,11 @@ int bdrv_truncate(BdrvChild *child, int64_t offset)
    BlockDriver *drv = bs->drv;
    int ret;

-    assert(child->perm & BLK_PERM_RESIZE);
+    /* FIXME: Some format block drivers use this function instead of implicitly
+     *        growing their file by writing beyond its end.
+     *        See bdrv_aligned_pwritev() for an explanation why we currently
+     *        cannot assert this permission in that case. */
+    // assert(child->perm & BLK_PERM_RESIZE);

    if (!drv)
        return -ENOMEDIUM;
@@ -3305,11 +3394,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
    *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
 }

-bool bdrv_is_read_only(BlockDriverState *bs)
-{
-    return bs->read_only;
-}
-
 bool bdrv_is_sg(BlockDriverState *bs)
 {
    return bs->sg;
@@ -4111,8 +4195,8 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs)

 void bdrv_img_create(const char *filename, const char *fmt,
                     const char *base_filename, const char *base_fmt,
-                     char *options, uint64_t img_size, int flags,
-                     Error **errp, bool quiet)
+                     char *options, uint64_t img_size, int flags, bool quiet,
+                     Error **errp)
 {
    QemuOptsList *create_opts = NULL;
    QemuOpts *opts = NULL;
@@ -4278,6 +4362,11 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
    return bs->aio_context;
 }

+void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
+{
+    aio_co_enter(bdrv_get_aio_context(bs), co);
+}
+
 static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
 {
    QLIST_REMOVE(ban, list);
@@ -4350,11 +4439,12 @@ void bdrv_attach_aio_context(BlockDriverState *bs,

 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
 {
-    AioContext *ctx;
+    AioContext *ctx = bdrv_get_aio_context(bs);

+    aio_disable_external(ctx);
+    bdrv_parent_drained_begin(bs);
    bdrv_drain(bs); /* ensure there are no in-flight requests */

-    ctx = bdrv_get_aio_context(bs);
    while (aio_poll(ctx, false)) {
        /* wait for all bottom halves to execute */
    }
@@ -4366,6 +4456,8 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
     */
    aio_context_acquire(new_context);
    bdrv_attach_aio_context(bs, new_context);
+    bdrv_parent_drained_end(bs);
+    aio_enable_external(ctx);
    aio_context_release(new_context);
 }

--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -19,6 +19,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+block-obj-$(CONFIG_VXHS) += vxhs.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
@@ -38,6 +39,7 @@ rbd.o-cflags       := $(RBD_CFLAGS)
 rbd.o-libs         := $(RBD_LIBS)
 gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs     := $(GLUSTERFS_LIBS)
+vxhs.o-libs        := $(VXHS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
 block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -61,6 +61,7 @@ struct BlockBackend {

    uint64_t perm;
    uint64_t shared_perm;
+    bool disable_perm;

    bool allow_write_beyond_eof;

@@ -230,6 +231,9 @@ static void blk_delete(BlockBackend *blk)
    assert(!blk->refcnt);
    assert(!blk->name);
    assert(!blk->dev);
+    if (blk->public.throttle_state) {
+        blk_io_limits_disable(blk);
+    }
    if (blk->root) {
        blk_remove_bs(blk);
    }
@@ -578,7 +582,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
 {
    int ret;

-    if (blk->root) {
+    if (blk->root && !blk->disable_perm) {
        ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
        if (ret < 0) {
            return ret;
@@ -597,15 +601,52 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
    *shared_perm = blk->shared_perm;
 }

+/*
+ * Notifies the user of all BlockBackends that migration has completed. qdev
+ * devices can tighten their permissions in response (specifically revoke
+ * shared write permissions that we needed for storage migration).
+ *
+ * If an error is returned, the VM cannot be allowed to be resumed.
+ */
+void blk_resume_after_migration(Error **errp)
+{
+    BlockBackend *blk;
+    Error *local_err = NULL;
+
+    for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
+        if (!blk->disable_perm) {
+            continue;
+        }
+
+        blk->disable_perm = false;
+
+        blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            blk->disable_perm = true;
+            return;
+        }
+    }
+}
+
 static int blk_do_attach_dev(BlockBackend *blk, void *dev)
 {
    if (blk->dev) {
        return -EBUSY;
    }
+
+    /* While migration is still incoming, we don't need to apply the
+     * permissions of guest device BlockBackends. We might still have a block
+     * job or NBD server writing to the image for storage migration. */
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        blk->disable_perm = true;
+    }
+
    blk_ref(blk);
    blk->dev = dev;
    blk->legacy_dev = false;
    blk_iostatus_reset(blk);
+
    return 0;
 }

@@ -1007,7 +1048,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
        co_entry(&rwco);
    } else {
        Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
-        qemu_coroutine_enter(co);
+        bdrv_coroutine_enter(blk_bs(blk), co);
        BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
    }

@@ -1114,7 +1155,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
    acb->has_returned = false;

    co = qemu_coroutine_create(co_entry, acb);
-    qemu_coroutine_enter(co);
+    bdrv_coroutine_enter(blk_bs(blk), co);

    acb->has_returned = true;
    if (acb->rwco.ret != NOT_DONE) {
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -110,7 +110,10 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

-    bs->read_only = true; /* no write support yet */
+    ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */
+    if (ret < 0) {
+        return ret;
+    }

    ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
    if (ret < 0) {
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -72,7 +72,10 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

-    bs->read_only = true;
+    ret = bdrv_set_read_only(bs, true, errp);
+    if (ret < 0) {
+        return ret;
+    }

    /* read header */
    ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
--- a/block/commit.c
+++ b/block/commit.c
@@ -335,6 +335,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    if (commit_top_bs == NULL) {
        goto fail;
    }
+    commit_top_bs->total_sectors = top->total_sectors;
+    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(top));

    bdrv_set_backing_hd(commit_top_bs, top, &local_err);
    if (local_err) {
@@ -482,6 +484,7 @@ int bdrv_commit(BlockDriverState *bs)
        error_report_err(local_err);
        goto ro_cleanup;
    }
+    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(backing_file_bs));

    bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
    bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -56,11 +56,11 @@ static int block_crypto_probe_generic(QCryptoBlockFormat format,


 static ssize_t block_crypto_read_func(QCryptoBlock *block,
+                                      void *opaque,
                                      size_t offset,
                                      uint8_t *buf,
                                      size_t buflen,
-                                      Error **errp,
-                                      void *opaque)
+                                      Error **errp)
 {
    BlockDriverState *bs = opaque;
    ssize_t ret;
@@ -83,11 +83,11 @@ struct BlockCryptoCreateData {


 static ssize_t block_crypto_write_func(QCryptoBlock *block,
+                                       void *opaque,
                                       size_t offset,
                                       const uint8_t *buf,
                                       size_t buflen,
-                                       Error **errp,
-                                       void *opaque)
+                                       Error **errp)
 {
    struct BlockCryptoCreateData *data = opaque;
    ssize_t ret;
@@ -102,9 +102,9 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,


 static ssize_t block_crypto_init_func(QCryptoBlock *block,
+                                      void *opaque,
                                      size_t headerlen,
-                                      Error **errp,
-                                      void *opaque)
+                                      Error **errp)
 {
    struct BlockCryptoCreateData *data = opaque;
    int ret;
--- a/block/curl.c
+++ b/block/curl.c
@@ -659,6 +659,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    const char *cookie;
    double d;
    const char *secretid;
+    const char *protocol_delimiter;

    static int inited = 0;

@@ -700,6 +701,15 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
        goto out_noclean;
    }

+    if (!strstart(file, bs->drv->protocol_name, &protocol_delimiter) ||
+        !strstart(protocol_delimiter, "://", NULL))
+    {
+        error_setg(errp, "%s curl driver cannot handle the URL '%s' (does not "
+                   "start with '%s://')", bs->drv->protocol_name, file,
+                   bs->drv->protocol_name);
+        goto out_noclean;
+    }
+
    s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
    secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);

--- a/block/dmg.c
+++ b/block/dmg.c
@@ -419,8 +419,12 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

+    ret = bdrv_set_read_only(bs, true, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
    block_module_load_one("dmg-bz2");
-    bs->read_only = true;

    s->n_chunks = 0;
    s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2193,6 +2193,12 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;

 #if defined(__APPLE__) && defined(__MACH__)
+    /*
+     * Caution: while qdict_get_str() is fine, getting non-string types
+     * would require more care.  When @options come from -blockdev or
+     * blockdev_add, its members are typed according to the QAPI
+     * schema, but when they come from -drive, they're all QString.
+     */
    const char *filename = qdict_get_str(options, "filename");
    char bsd_path[MAXPATHLEN] = "";
    bool error_occurred = false;
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -412,10 +412,12 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
    glfs_set_preopened(gconf->volume, glfs);

    for (server = gconf->server; server; server = server->next) {
-        if (server->value->type  == SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
+        switch (server->value->type) {
+        case SOCKET_ADDRESS_FLAT_TYPE_UNIX:
            ret = glfs_set_volfile_server(glfs, "unix",
                                   server->value->u.q_unix.path, 0);
-        } else {
+            break;
+        case SOCKET_ADDRESS_FLAT_TYPE_INET:
            if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 ||
                port > 65535) {
                error_setg(errp, "'%s' is not a valid port number",
@@ -426,6 +428,11 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
            ret = glfs_set_volfile_server(glfs, "tcp",
                                   server->value->u.inet.host,
                                   (int)port);
+            break;
+        case SOCKET_ADDRESS_FLAT_TYPE_VSOCK:
+        case SOCKET_ADDRESS_FLAT_TYPE_FD:
+        default:
+            abort();
        }

        if (ret < 0) {
@@ -487,7 +494,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
    char *str = NULL;
    const char *ptr;
    size_t num_servers;
-    int i;
+    int i, type;

    /* create opts info from runtime_json_opts list */
    opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
@@ -539,16 +546,17 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
        if (!strcmp(ptr, "tcp")) {
            ptr = "inet";       /* accept legacy "tcp" */
        }
-        gsconf->type = qapi_enum_parse(SocketAddressFlatType_lookup, ptr,
-                                       SOCKET_ADDRESS_FLAT_TYPE__MAX, -1,
-                                       &local_err);
-        if (local_err) {
-            error_append_hint(&local_err,
-                              "Parameter '%s' may be 'inet' or 'unix'\n",
-                              GLUSTER_OPT_TYPE);
+        type = qapi_enum_parse(SocketAddressFlatType_lookup, ptr,
+                               SOCKET_ADDRESS_FLAT_TYPE__MAX, -1, NULL);
+        if (type != SOCKET_ADDRESS_FLAT_TYPE_INET
+            && type != SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
+            error_setg(&local_err,
+                       "Parameter '%s' may be 'inet' or 'unix'",
+                       GLUSTER_OPT_TYPE);
            error_append_hint(&local_err, GERR_INDEX_HINT, i);
            goto out;
        }
+        gsconf->type = type;
        qemu_opts_del(opts);

        if (gsconf->type == SOCKET_ADDRESS_FLAT_TYPE_INET) {
--- a/block/io.c
+++ b/block/io.c
@@ -44,7 +44,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque);
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
    int64_t offset, int count, BdrvRequestFlags flags);

-static void bdrv_parent_drained_begin(BlockDriverState *bs)
+void bdrv_parent_drained_begin(BlockDriverState *bs)
 {
    BdrvChild *c;

@@ -55,7 +55,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs)
    }
 }

-static void bdrv_parent_drained_end(BlockDriverState *bs)
+void bdrv_parent_drained_end(BlockDriverState *bs)
 {
    BdrvChild *c;

@@ -158,7 +158,7 @@ bool bdrv_requests_pending(BlockDriverState *bs)

 static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
-    BdrvChild *child;
+    BdrvChild *child, *tmp;
    bool waited;

    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
@@ -167,8 +167,25 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
        bs->drv->bdrv_drain(bs);
    }

-    QLIST_FOREACH(child, &bs->children, next) {
-        waited |= bdrv_drain_recurse(child->bs);
+    QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
+        BlockDriverState *bs = child->bs;
+        bool in_main_loop =
+            qemu_get_current_aio_context() == qemu_get_aio_context();
+        assert(bs->refcnt > 0);
+        if (in_main_loop) {
+            /* In case the recursive bdrv_drain_recurse processes a
+             * block_job_defer_to_main_loop BH and modifies the graph,
+             * let's hold a reference to bs until we are done.
+             *
+             * IOThread doesn't have such a BH, and it is not safe to call
+             * bdrv_unref without BQL, so skip doing it there.
+             */
+            bdrv_ref(bs);
+        }
+        waited |= bdrv_drain_recurse(bs);
+        if (in_main_loop) {
+            bdrv_unref(bs);
+        }
    }

    return waited;
@@ -616,7 +633,7 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
        bdrv_rw_co_entry(&rwco);
    } else {
        co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
-        qemu_coroutine_enter(co);
+        bdrv_coroutine_enter(child->bs, co);
        BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
    }
    return rwco.ret;
@@ -945,7 +962,14 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
    size_t skip_bytes;
    int ret;

-    assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
+    /* FIXME We cannot require callers to have write permissions when all they
+     * are doing is a read request. If we did things right, write permissions
+     * would be obtained anyway, but internally by the copy-on-read code. As
+     * long as it is implemented here rather than in a separat filter driver,
+     * the copy-on-read code doesn't have its own BdrvChild, however, for which
+     * it could request permissions. Therefore we have to bypass the permission
+     * system for the moment. */
+    // assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));

    /* Cover entire cluster so no additional backing file I/O is required when
     * allocating cluster in the image file.
@@ -1338,8 +1362,16 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
    assert(!waited || !req->serialising);
    assert(req->overlap_offset <= offset);
    assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
-    assert(child->perm & BLK_PERM_WRITE);
-    assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
+    /* FIXME: Block migration uses the BlockBackend of the guest device at a
+     *        point when it has not yet taken write permissions. This will be
+     *        fixed by a future patch, but for now we have to bypass this
+     *        assertion for block migration to work. */
+    // assert(child->perm & BLK_PERM_WRITE);
+    /* FIXME: Because of the above, we also cannot guarantee that all format
+     *        BDS take the BLK_PERM_RESIZE permission on their file BDS, since
+     *        they are not obligated to do so if they do not have any parent
+     *        that has taken the permission to write to them. */
+    // assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);

    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);

@@ -1873,7 +1905,7 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
    } else {
        co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry,
                                   &data);
-        qemu_coroutine_enter(co);
+        bdrv_coroutine_enter(bs, co);
        BDRV_POLL_WHILE(bs, !data.done);
    }
    return data.ret;
@@ -1999,7 +2031,7 @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
        };
        Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);

-        qemu_coroutine_enter(co);
+        bdrv_coroutine_enter(bs, co);
        while (data.ret == -EINPROGRESS) {
            aio_poll(bdrv_get_aio_context(bs), true);
        }
@@ -2216,7 +2248,7 @@ static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
    acb->is_write = is_write;

    co = qemu_coroutine_create(bdrv_co_do_rw, acb);
-    qemu_coroutine_enter(co);
+    bdrv_coroutine_enter(child->bs, co);

    bdrv_co_maybe_schedule_bh(acb);
    return &acb->common;
@@ -2247,7 +2279,7 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
    acb->req.error = -EINPROGRESS;

    co = qemu_coroutine_create(bdrv_aio_flush_co_entry, acb);
-    qemu_coroutine_enter(co);
+    bdrv_coroutine_enter(bs, co);

    bdrv_co_maybe_schedule_bh(acb);
    return &acb->common;
@@ -2271,16 +2303,17 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque)

 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 {
-    int ret;
-
-    if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
-        bdrv_is_sg(bs)) {
-        return 0;
-    }
+    int current_gen;
+    int ret = 0;

    bdrv_inc_in_flight(bs);

-    int current_gen = bs->write_gen;
+    if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
+        bdrv_is_sg(bs)) {
+        goto early_exit;
+    }
+
+    current_gen = bs->write_gen;

    /* Wait until any previous flushes are completed */
    while (bs->active_flush_req) {
@@ -2363,6 +2396,7 @@ out:
    /* Return value is ignored - it's ok if wait queue is empty */
    qemu_co_queue_next(&bs->flush_queue);

+early_exit:
    bdrv_dec_in_flight(bs);
    return ret;
 }
@@ -2380,7 +2414,7 @@ int bdrv_flush(BlockDriverState *bs)
        bdrv_flush_co_entry(&flush_co);
    } else {
        co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
-        qemu_coroutine_enter(co);
+        bdrv_coroutine_enter(bs, co);
        BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
    }

@@ -2527,7 +2561,7 @@ int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count)
        bdrv_pdiscard_co_entry(&rwco);
    } else {
        co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
-        qemu_coroutine_enter(co);
+        bdrv_coroutine_enter(bs, co);
        BDRV_POLL_WHILE(bs, rwco.ret == NOT_DONE);
    }

--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -103,7 +103,6 @@ typedef struct IscsiTask {

 typedef struct IscsiAIOCB {
    BlockAIOCB common;
-    QEMUIOVector *qiov;
    QEMUBH *bh;
    IscsiLun *iscsilun;
    struct scsi_task *task;
@@ -2093,6 +2092,7 @@ static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
    BlockDriverState *bs;
    IscsiLun *iscsilun = NULL;
    QDict *bs_options;
+    Error *local_err = NULL;

    bs = bdrv_new();

@@ -2103,8 +2103,13 @@ static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
    iscsilun = bs->opaque;

    bs_options = qdict_new();
-    qdict_put(bs_options, "filename", qstring_from_str(filename));
-    ret = iscsi_open(bs, bs_options, 0, NULL);
+    iscsi_parse_filename(filename, bs_options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+    } else {
+        ret = iscsi_open(bs, bs_options, 0, NULL);
+    }
    QDECREF(bs_options);

    if (ret != 0) {
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1112,10 +1112,11 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                             BlockdevOnError on_target_error,
                             bool unmap,
                             BlockCompletionFunc *cb,
-                             void *opaque, Error **errp,
+                             void *opaque,
                             const BlockJobDriver *driver,
                             bool is_none_mode, BlockDriverState *base,
-                             bool auto_complete, const char *filter_node_name)
+                             bool auto_complete, const char *filter_node_name,
+                             Error **errp)
 {
    MirrorBlockJob *s;
    BlockDriverState *mirror_top_bs;
@@ -1148,9 +1149,10 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
        return;
    }
    mirror_top_bs->total_sectors = bs->total_sectors;
+    bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));

    /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
-     * it alive until block_job_create() even if bs has no parent. */
+     * it alive until block_job_create() succeeds even if bs has no parent. */
    bdrv_ref(mirror_top_bs);
    bdrv_drained_begin(bs);
    bdrv_append(mirror_top_bs, bs, &local_err);
@@ -1168,10 +1170,12 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
                         BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
                         creation_flags, cb, opaque, errp);
-    bdrv_unref(mirror_top_bs);
    if (!s) {
        goto fail;
    }
+    /* The block job now has a reference to this node */
+    bdrv_unref(mirror_top_bs);
+
    s->source = bs;
    s->mirror_top_bs = mirror_top_bs;

@@ -1242,6 +1246,10 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,

 fail:
    if (s) {
+        /* Make sure this BDS does not go away until we have completed the graph
+         * changes below */
+        bdrv_ref(mirror_top_bs);
+
        g_free(s->replaces);
        blk_unref(s->target);
        block_job_unref(&s->common);
@@ -1250,6 +1258,8 @@ fail:
    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
                            &error_abort);
    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
+
+    bdrv_unref(mirror_top_bs);
 }

 void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1271,17 +1281,17 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
    mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
                     speed, granularity, buf_size, backing_mode,
-                     on_source_error, on_target_error, unmap, NULL, NULL, errp,
+                     on_source_error, on_target_error, unmap, NULL, NULL,
                     &mirror_job_driver, is_none_mode, base, false,
-                     filter_node_name);
+                     filter_node_name, errp);
 }

 void commit_active_start(const char *job_id, BlockDriverState *bs,
                         BlockDriverState *base, int creation_flags,
                         int64_t speed, BlockdevOnError on_error,
                         const char *filter_node_name,
-                         BlockCompletionFunc *cb, void *opaque, Error **errp,
-                         bool auto_complete)
+                         BlockCompletionFunc *cb, void *opaque,
+                         bool auto_complete, Error **errp)
 {
    int orig_base_flags;
    Error *local_err = NULL;
@@ -1294,9 +1304,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,

    mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                     MIRROR_LEAVE_BACKING_CHAIN,
-                     on_error, on_error, true, cb, opaque, &local_err,
+                     on_error, on_error, true, cb, opaque,
                     &commit_active_job_driver, false, base, auto_complete,
-                     filter_node_name);
+                     filter_node_name, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto error_restore_flags;
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -47,7 +47,7 @@ typedef struct BDRVNBDState {
    NBDClientSession client;

    /* For nbd_refresh_filename() */
-    SocketAddress *saddr;
+    SocketAddressFlat *saddr;
    char *export, *tlscredsid;
 } BDRVNBDState;

@@ -95,7 +95,7 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            goto out;
        }
        qdict_put(options, "server.type", qstring_from_str("unix"));
-        qdict_put(options, "server.data.path",
+        qdict_put(options, "server.path",
                  qstring_from_str(qp->p[0].value));
    } else {
        QString *host;
@@ -116,10 +116,10 @@ static int nbd_parse_uri(const char *filename, QDict *options)
        }

        qdict_put(options, "server.type", qstring_from_str("inet"));
-        qdict_put(options, "server.data.host", host);
+        qdict_put(options, "server.host", host);

        port_str = g_strdup_printf("%d", uri->port ?: NBD_DEFAULT_PORT);
-        qdict_put(options, "server.data.port", qstring_from_str(port_str));
+        qdict_put(options, "server.port", qstring_from_str(port_str));
        g_free(port_str);
    }

@@ -197,7 +197,7 @@ static void nbd_parse_filename(const char *filename, QDict *options,
    /* are we a UNIX or TCP socket? */
    if (strstart(host_spec, "unix:", &unixpath)) {
        qdict_put(options, "server.type", qstring_from_str("unix"));
-        qdict_put(options, "server.data.path", qstring_from_str(unixpath));
+        qdict_put(options, "server.path", qstring_from_str(unixpath));
    } else {
        InetSocketAddress *addr = NULL;

@@ -207,8 +207,8 @@ static void nbd_parse_filename(const char *filename, QDict *options,
        }

        qdict_put(options, "server.type", qstring_from_str("inet"));
-        qdict_put(options, "server.data.host", qstring_from_str(addr->host));
-        qdict_put(options, "server.data.port", qstring_from_str(addr->port));
+        qdict_put(options, "server.host", qstring_from_str(addr->host));
+        qdict_put(options, "server.port", qstring_from_str(addr->port));
        qapi_free_InetSocketAddress(addr);
    }

@@ -248,20 +248,21 @@ static bool nbd_process_legacy_socket_options(QDict *output_options,
        }

        qdict_put(output_options, "server.type", qstring_from_str("unix"));
-        qdict_put(output_options, "server.data.path", qstring_from_str(path));
+        qdict_put(output_options, "server.path", qstring_from_str(path));
    } else if (host) {
        qdict_put(output_options, "server.type", qstring_from_str("inet"));
-        qdict_put(output_options, "server.data.host", qstring_from_str(host));
-        qdict_put(output_options, "server.data.port",
+        qdict_put(output_options, "server.host", qstring_from_str(host));
+        qdict_put(output_options, "server.port",
                  qstring_from_str(port ?: stringify(NBD_DEFAULT_PORT)));
    }

    return true;
 }

-static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp)
+static SocketAddressFlat *nbd_config(BDRVNBDState *s, QDict *options,
+                                     Error **errp)
 {
-    SocketAddress *saddr = NULL;
+    SocketAddressFlat *saddr = NULL;
    QDict *addr = NULL;
    QObject *crumpled_addr = NULL;
    Visitor *iv = NULL;
@@ -278,8 +279,16 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp)
        goto done;
    }

+    /*
+     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive
+     * server.type=inet.  .to doesn't matter, it's ignored anyway.
+     * That's because when @options come from -blockdev or
+     * blockdev_add, members are typed according to the QAPI schema,
+     * but when they come from -drive, they're all QString.  The
+     * visitor expects the former.
+     */
    iv = qobject_input_visitor_new(crumpled_addr);
-    visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
+    visit_type_SocketAddressFlat(iv, NULL, &saddr, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto done;
@@ -298,9 +307,10 @@ NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
    return &s->client;
 }

-static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
+static QIOChannelSocket *nbd_establish_connection(SocketAddressFlat *saddr_flat,
                                                  Error **errp)
 {
+    SocketAddress *saddr = socket_address_crumple(saddr_flat);
    QIOChannelSocket *sioc;
    Error *local_err = NULL;

@@ -310,7 +320,9 @@ static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
    qio_channel_socket_connect_sync(sioc,
                                    saddr,
                                    &local_err);
+    qapi_free_SocketAddress(saddr);
    if (local_err) {
+        object_unref(OBJECT(sioc));
        error_propagate(errp, local_err);
        return NULL;
    }
@@ -401,7 +413,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
        goto error;
    }

-    /* Translate @host, @port, and @path to a SocketAddress */
+    /* Translate @host, @port, and @path to a SocketAddressFlat */
    if (!nbd_process_legacy_socket_options(options, opts, errp)) {
        goto error;
    }
@@ -421,11 +433,12 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
            goto error;
        }

-        if (s->saddr->type != SOCKET_ADDRESS_KIND_INET) {
+        /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */
+        if (s->saddr->type != SOCKET_ADDRESS_FLAT_TYPE_INET) {
            error_setg(errp, "TLS only supported over IP sockets");
            goto error;
        }
-        hostname = s->saddr->u.inet.data->host;
+        hostname = s->saddr->u.inet.host;
    }

    /* establish TCP connection, return error if it fails
@@ -448,7 +461,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
        object_unref(OBJECT(tlscreds));
    }
    if (ret < 0) {
-        qapi_free_SocketAddress(s->saddr);
+        qapi_free_SocketAddressFlat(s->saddr);
        g_free(s->export);
        g_free(s->tlscredsid);
    }
@@ -474,7 +487,7 @@ static void nbd_close(BlockDriverState *bs)

    nbd_client_close(bs);

-    qapi_free_SocketAddress(s->saddr);
+    qapi_free_SocketAddressFlat(s->saddr);
    g_free(s->export);
    g_free(s->tlscredsid);
 }
@@ -505,15 +518,15 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
    Visitor *ov;
    const char *host = NULL, *port = NULL, *path = NULL;

-    if (s->saddr->type == SOCKET_ADDRESS_KIND_INET) {
-        const InetSocketAddress *inet = s->saddr->u.inet.data;
+    if (s->saddr->type == SOCKET_ADDRESS_FLAT_TYPE_INET) {
+        const InetSocketAddress *inet = &s->saddr->u.inet;
        if (!inet->has_ipv4 && !inet->has_ipv6 && !inet->has_to) {
            host = inet->host;
            port = inet->port;
        }
-    } else if (s->saddr->type == SOCKET_ADDRESS_KIND_UNIX) {
-        path = s->saddr->u.q_unix.data->path;
-    }
+    } else if (s->saddr->type == SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
+        path = s->saddr->u.q_unix.path;
+    } /* else can't represent as pseudo-filename */

    qdict_put(opts, "driver", qstring_from_str("nbd"));

@@ -532,7 +545,7 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
    }

    ov = qobject_output_visitor_new(&saddr_qdict);
-    visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
+    visit_type_SocketAddressFlat(ov, NULL, &s->saddr, &error_abort);
    visit_complete(ov, &saddr_qdict);
    visit_free(ov);
    qdict_put_obj(opts, "server", saddr_qdict);
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -474,6 +474,13 @@ static NFSServer *nfs_config(QDict *options, Error **errp)
        goto out;
    }

+    /*
+     * Caution: this works only because all scalar members of
+     * NFSServer are QString in @crumpled_addr.  The visitor expects
+     * @crumpled_addr to be typed according to the QAPI schema.  It
+     * is when @options come from -blockdev or blockdev_add.  But when
+     * they come from -drive, they're all QString.
+     */
    iv = qobject_input_visitor_new(crumpled_addr);
    visit_type_NFSServer(iv, NULL, &server, &local_error);
    if (local_error) {
@@ -490,7 +497,7 @@ out:


 static int64_t nfs_client_open(NFSClient *client, QDict *options,
-                               int flags, Error **errp, int open_flags)
+                               int flags, int open_flags, Error **errp)
 {
    int ret = -EINVAL;
    QemuOpts *opts = NULL;
@@ -656,7 +663,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,

    ret = nfs_client_open(client, options,
                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
-                          errp, bs->open_flags);
+                          bs->open_flags, errp);
    if (ret < 0) {
        return ret;
    }
@@ -698,7 +705,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
        goto out;
    }

-    ret = nfs_client_open(client, options, O_CREAT, errp, 0);
+    ret = nfs_client_open(client, options, O_CREAT, 0, errp);
    if (ret < 0) {
        goto out;
    }
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -192,8 +192,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                                 int nb_sectors, int *pnum)
 {
    BDRVParallelsState *s = bs->opaque;
-    uint32_t idx, to_allocate, i;
-    int64_t pos, space;
+    int64_t pos, space, idx, to_allocate, i;

    pos = block_status(s, sector_num, nb_sectors, pnum);
    if (pos > 0) {
@@ -201,11 +200,19 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
    }

    idx = sector_num / s->tracks;
-    if (idx >= s->bat_size) {
-        return -EINVAL;
-    }
-
    to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
+
+    /* This function is called only by parallels_co_writev(), which will never
+     * pass a sector_num at or beyond the end of the image (because the block
+     * layer never passes such a sector_num to that function). Therefore, idx
+     * is always below s->bat_size.
+     * block_status() will limit *pnum so that sector_num + *pnum will not
+     * exceed the image end. Therefore, idx + to_allocate cannot exceed
+     * s->bat_size.
+     * Note that s->bat_size is an unsigned int, therefore idx + to_allocate
+     * will always fit into a uint32_t. */
+    assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
+
    space = to_allocate * s->tracks;
    if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
        int ret;
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1519,12 +1519,10 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,

    end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);

-    /* Round start up and end down */
-    offset = align_offset(offset, s->cluster_size);
-    end_offset = start_of_cluster(s, end_offset);
-
-    if (offset > end_offset) {
-        return 0;
+    /* The caller must cluster-align start; round end down except at EOF */
+    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
+    if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) {
+        end_offset = start_of_cluster(s, end_offset);
    }

    nb_clusters = size_to_clusters(s, end_offset - offset);
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -20,6 +20,7 @@
 #include "crypto/secret.h"
 #include "qemu/cutils.h"
 #include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qjson.h"

 /*
 * When specifying the image filename use:
@@ -93,7 +94,7 @@ typedef struct BDRVRBDState {
    rados_t cluster;
    rados_ioctx_t io_ctx;
    rbd_image_t image;
-    char *name;
+    char *image_name;
    char *snap;
 } BDRVRBDState;

@@ -135,18 +136,16 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
                                    Error **errp)
 {
    const char *start;
-    char *p, *buf, *keypairs;
+    char *p, *buf;
+    QList *keypairs = NULL;
    char *found_str;
-    size_t max_keypair_size;

    if (!strstart(filename, "rbd:", &start)) {
        error_setg(errp, "File name must start with 'rbd:'");
        return;
    }

-    max_keypair_size = strlen(start) + 1;
    buf = g_strdup(start);
-    keypairs = g_malloc0(max_keypair_size);
    p = buf;

    found_str = qemu_rbd_next_tok(p, '/', &p);
@@ -194,33 +193,30 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
        } else if (!strcmp(name, "id")) {
            qdict_put(options, "user" , qstring_from_str(value));
        } else {
-            /* FIXME: This is pretty ugly, and not the right way to do this.
-             *        These should be contained in a structure, and then
-             *        passed explicitly as individual key/value pairs to
-             *        rados.  Consider this legacy code that needs to be
-             *        updated. */
-            char *tmp = g_malloc0(max_keypair_size);
-            /* only use a delimiter if it is not the first keypair found */
-            /* These are sets of unknown key/value pairs we'll pass along
-             * to ceph */
-            if (keypairs[0]) {
-                snprintf(tmp, max_keypair_size, ":%s=%s", name, value);
-                pstrcat(keypairs, max_keypair_size, tmp);
-            } else {
-                snprintf(keypairs, max_keypair_size, "%s=%s", name, value);
+            /*
+             * We pass these internally to qemu_rbd_set_keypairs(), so
+             * we can get away with the simpler list of [ "key1",
+             * "value1", "key2", "value2" ] rather than a raw dict
+             * { "key1": "value1", "key2": "value2" } where we can't
+             * guarantee order, or even a more correct but complex
+             * [ { "key1": "value1" }, { "key2": "value2" } ]
+             */
+            if (!keypairs) {
+                keypairs = qlist_new();
            }
-            g_free(tmp);
+            qlist_append(keypairs, qstring_from_str(name));
+            qlist_append(keypairs, qstring_from_str(value));
        }
    }

-    if (keypairs[0]) {
-        qdict_put(options, "=keyvalue-pairs", qstring_from_str(keypairs));
+    if (keypairs) {
+        qdict_put(options, "=keyvalue-pairs",
+                  qobject_to_json(QOBJECT(keypairs)));
    }

-
 done:
    g_free(buf);
-    g_free(keypairs);
+    QDECREF(keypairs);
    return;
 }

@@ -244,36 +240,41 @@ static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
    return 0;
 }

-static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs,
+static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
                                 Error **errp)
 {
-    char *p, *buf;
-    char *name;
-    char *value;
+    QList *keypairs;
+    QString *name;
+    QString *value;
+    const char *key;
+    size_t remaining;
    int ret = 0;

-    buf = g_strdup(keypairs);
-    p = buf;
+    if (!keypairs_json) {
+        return ret;
+    }
+    keypairs = qobject_to_qlist(qobject_from_json(keypairs_json,
+                                                  &error_abort));
+    remaining = qlist_size(keypairs) / 2;
+    assert(remaining);

-    while (p) {
-        name = qemu_rbd_next_tok(p, '=', &p);
-        if (!p) {
-            error_setg(errp, "conf option %s has no value", name);
-            ret = -EINVAL;
-            break;
-        }
+    while (remaining--) {
+        name = qobject_to_qstring(qlist_pop(keypairs));
+        value = qobject_to_qstring(qlist_pop(keypairs));
+        assert(name && value);
+        key = qstring_get_str(name);

-        value = qemu_rbd_next_tok(p, ':', &p);
-
-        ret = rados_conf_set(cluster, name, value);
+        ret = rados_conf_set(cluster, key, qstring_get_str(value));
+        QDECREF(name);
+        QDECREF(value);
        if (ret < 0) {
-            error_setg_errno(errp, -ret, "invalid conf option %s", name);
+            error_setg_errno(errp, -ret, "invalid conf option %s", key);
            ret = -EINVAL;
            break;
        }
    }

-    g_free(buf);
+    QDECREF(keypairs);
    return ret;
 }

@@ -349,7 +350,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
    int64_t bytes = 0;
    int64_t objsize;
    int obj_order = 0;
-    const char *pool, *name, *conf, *clientname, *keypairs;
+    const char *pool, *image_name, *conf, *user, *keypairs;
    const char *secretid;
    rados_t cluster;
    rados_ioctx_t io_ctx;
@@ -384,13 +385,19 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
        goto exit;
    }

+    /*
+     * Caution: while qdict_get_try_str() is fine, getting non-string
+     * types would require more care.  When @options come from -blockdev
+     * or blockdev_add, its members are typed according to the QAPI
+     * schema, but when they come from -drive, they're all QString.
+     */
    pool       = qdict_get_try_str(options, "pool");
    conf       = qdict_get_try_str(options, "conf");
-    clientname = qdict_get_try_str(options, "user");
-    name       = qdict_get_try_str(options, "image");
+    user       = qdict_get_try_str(options, "user");
+    image_name = qdict_get_try_str(options, "image");
    keypairs   = qdict_get_try_str(options, "=keyvalue-pairs");

-    ret = rados_create(&cluster, clientname);
+    ret = rados_create(&cluster, user);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error initializing");
        goto exit;
@@ -427,7 +434,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
        goto shutdown;
    }

-    ret = rbd_create(io_ctx, name, bytes, &obj_order);
+    ret = rbd_create(io_ctx, image_name, bytes, &obj_order);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error rbd create");
    }
@@ -533,7 +540,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
                         Error **errp)
 {
    BDRVRBDState *s = bs->opaque;
-    const char *pool, *snap, *conf, *clientname, *name, *keypairs;
+    const char *pool, *snap, *conf, *user, *image_name, *keypairs;
    const char *secretid;
    QemuOpts *opts;
    Error *local_err = NULL;
@@ -560,24 +567,24 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    pool           = qemu_opt_get(opts, "pool");
    conf           = qemu_opt_get(opts, "conf");
    snap           = qemu_opt_get(opts, "snapshot");
-    clientname     = qemu_opt_get(opts, "user");
-    name           = qemu_opt_get(opts, "image");
+    user           = qemu_opt_get(opts, "user");
+    image_name     = qemu_opt_get(opts, "image");
    keypairs       = qemu_opt_get(opts, "=keyvalue-pairs");

-    if (!pool || !name) {
+    if (!pool || !image_name) {
        error_setg(errp, "Parameters 'pool' and 'image' are required");
        r = -EINVAL;
        goto failed_opts;
    }

-    r = rados_create(&s->cluster, clientname);
+    r = rados_create(&s->cluster, user);
    if (r < 0) {
        error_setg_errno(errp, -r, "error initializing");
        goto failed_opts;
    }

    s->snap = g_strdup(snap);
-    s->name = g_strdup(name);
+    s->image_name = g_strdup(image_name);

    /* try default location when conf=NULL, but ignore failure */
    r = rados_conf_read_file(s->cluster, conf);
@@ -628,13 +635,23 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
        goto failed_shutdown;
    }

-    r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
+    /* rbd_open is always r/w */
+    r = rbd_open(s->io_ctx, s->image_name, &s->image, s->snap);
    if (r < 0) {
-        error_setg_errno(errp, -r, "error reading header from %s", s->name);
+        error_setg_errno(errp, -r, "error reading header from %s",
+                         s->image_name);
        goto failed_open;
    }

-    bs->read_only = (s->snap != NULL);
+    /* If we are using an rbd snapshot, we must be r/o, otherwise
+     * leave as-is */
+    if (s->snap != NULL) {
+        r = bdrv_set_read_only(bs, true, &local_err);
+        if (r < 0) {
+            error_propagate(errp, local_err);
+            goto failed_open;
+        }
+    }

    qemu_opts_del(opts);
    return 0;
@@ -644,13 +661,33 @@ failed_open:
 failed_shutdown:
    rados_shutdown(s->cluster);
    g_free(s->snap);
-    g_free(s->name);
+    g_free(s->image_name);
 failed_opts:
    qemu_opts_del(opts);
    g_free(mon_host);
    return r;
 }

+
+/* Since RBD is currently always opened R/W via the API,
+ * we just need to check if we are using a snapshot or not, in
+ * order to determine if we will allow it to be R/W */
+static int qemu_rbd_reopen_prepare(BDRVReopenState *state,
+                                   BlockReopenQueue *queue, Error **errp)
+{
+    BDRVRBDState *s = state->bs->opaque;
+    int ret = 0;
+
+    if (s->snap && state->flags & BDRV_O_RDWR) {
+        error_setg(errp,
+                   "Cannot change node '%s' to r/w when using RBD snapshot",
+                   bdrv_get_device_or_node_name(state->bs));
+        ret = -EINVAL;
+    }
+
+    return ret;
+}
+
 static void qemu_rbd_close(BlockDriverState *bs)
 {
    BDRVRBDState *s = bs->opaque;
@@ -658,7 +695,7 @@ static void qemu_rbd_close(BlockDriverState *bs)
    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
    g_free(s->snap);
-    g_free(s->name);
+    g_free(s->image_name);
    rados_shutdown(s->cluster);
 }

@@ -1057,6 +1094,7 @@ static BlockDriver bdrv_rbd = {
    .bdrv_parse_filename    = qemu_rbd_parse_filename,
    .bdrv_file_open         = qemu_rbd_open,
    .bdrv_close             = qemu_rbd_close,
+    .bdrv_reopen_prepare    = qemu_rbd_reopen_prepare,
    .bdrv_create            = qemu_rbd_create,
    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
    .bdrv_get_info          = qemu_rbd_getinfo,
--- a/block/replication.c
+++ b/block/replication.c
@@ -656,7 +656,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
        s->replication_state = BLOCK_REPLICATION_FAILOVER;
        commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
                            BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
-                            NULL, replication_done, bs, errp, true);
+                            NULL, replication_done, bs, true, errp);
        break;
    default:
        aio_context_release(aio_context);
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -13,9 +13,11 @@
 */

 #include "qemu/osdep.h"
+#include "qapi-visit.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qint.h"
+#include "qapi/qobject-input-visitor.h"
 #include "qemu/uri.h"
 #include "qemu/error-report.h"
 #include "qemu/sockets.h"
@@ -547,12 +549,53 @@ static SocketAddress *sd_socket_address(const char *path,
    return addr;
 }

+static SocketAddress *sd_server_config(QDict *options, Error **errp)
+{
+    QDict *server = NULL;
+    QObject *crumpled_server = NULL;
+    Visitor *iv = NULL;
+    SocketAddressFlat *saddr_flat = NULL;
+    SocketAddress *saddr = NULL;
+    Error *local_err = NULL;
+
+    qdict_extract_subqdict(options, &server, "server.");
+
+    crumpled_server = qdict_crumple(server, errp);
+    if (!crumpled_server) {
+        goto done;
+    }
+
+    /*
+     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive
+     * server.type=inet.  .to doesn't matter, it's ignored anyway.
+     * That's because when @options come from -blockdev or
+     * blockdev_add, members are typed according to the QAPI schema,
+     * but when they come from -drive, they're all QString.  The
+     * visitor expects the former.
+     */
+    iv = qobject_input_visitor_new(crumpled_server);
+    visit_type_SocketAddressFlat(iv, NULL, &saddr_flat, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        goto done;
+    }
+
+    saddr = socket_address_crumple(saddr_flat);
+
+done:
+    qapi_free_SocketAddressFlat(saddr_flat);
+    visit_free(iv);
+    qobject_decref(crumpled_server);
+    QDECREF(server);
+    return saddr;
+}
+
 /* Return -EIO in case of error, file descriptor on success */
 static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)
 {
    int fd;

-    fd = socket_connect(s->addr, errp, NULL, NULL);
+    fd = socket_connect(s->addr, NULL, NULL, errp);

    if (s->addr->type == SOCKET_ADDRESS_KIND_INET && fd >= 0) {
        int ret = socket_set_nodelay(fd);
@@ -693,7 +736,7 @@ static int do_req(int sockfd, BlockDriverState *bs, SheepdogReq *hdr,
    } else {
        co = qemu_coroutine_create(do_co_req, &srco);
        if (bs) {
-            qemu_coroutine_enter(co);
+            bdrv_coroutine_enter(bs, co);
            BDRV_POLL_WHILE(bs, !srco.finished);
        } else {
            qemu_coroutine_enter(co);
@@ -899,7 +942,7 @@ static void co_read_response(void *opaque)
        s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
    }

-    aio_co_wake(s->co_recv);
+    aio_co_enter(s->aio_context, s->co_recv);
 }

 static void co_write_request(void *opaque)
@@ -1174,15 +1217,15 @@ static void sd_parse_filename(const char *filename, QDict *options,
        return;
    }

-    if (cfg.host) {
-        qdict_set_default_str(options, "host", cfg.host);
-    }
-    if (cfg.port) {
-        snprintf(buf, sizeof(buf), "%d", cfg.port);
-        qdict_set_default_str(options, "port", buf);
-    }
    if (cfg.path) {
-        qdict_set_default_str(options, "path", cfg.path);
+        qdict_set_default_str(options, "server.path", cfg.path);
+        qdict_set_default_str(options, "server.type", "unix");
+    } else {
+        qdict_set_default_str(options, "server.type", "inet");
+        qdict_set_default_str(options, "server.host",
+                              cfg.host ?: SD_DEFAULT_ADDR);
+        snprintf(buf, sizeof(buf), "%d", cfg.port ?: SD_DEFAULT_PORT);
+        qdict_set_default_str(options, "server.port", buf);
    }
    qdict_set_default_str(options, "vdi", cfg.vdi);
    qdict_set_default_str(options, "tag", cfg.tag);
@@ -1509,18 +1552,6 @@ static QemuOptsList runtime_opts = {
    .name = "sheepdog",
    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
    .desc = {
-        {
-            .name = "host",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "port",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "path",
-            .type = QEMU_OPT_STRING,
-        },
        {
            .name = "vdi",
            .type = QEMU_OPT_STRING,
@@ -1543,7 +1574,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
    int ret, fd;
    uint32_t vid = 0;
    BDRVSheepdogState *s = bs->opaque;
-    const char *host, *port, *path, *vdi, *snap_id_str, *tag;
+    const char *vdi, *snap_id_str, *tag;
    uint64_t snap_id;
    char *buf = NULL;
    QemuOpts *opts;
@@ -1560,20 +1591,17 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
        goto err_no_fd;
    }

-    host = qemu_opt_get(opts, "host");
-    port = qemu_opt_get(opts, "port");
-    path = qemu_opt_get(opts, "path");
+    s->addr = sd_server_config(options, errp);
+    if (!s->addr) {
+        ret = -EINVAL;
+        goto err_no_fd;
+    }
+
    vdi = qemu_opt_get(opts, "vdi");
    snap_id_str = qemu_opt_get(opts, "snap-id");
    snap_id = qemu_opt_get_number(opts, "snap-id", CURRENT_VDI_ID);
    tag = qemu_opt_get(opts, "tag");

-    if ((host || port) && path) {
-        error_setg(errp, "can't use 'path' together with 'host' or 'port'");
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-
    if (!vdi) {
        error_setg(errp, "parameter 'vdi' is missing");
        ret = -EINVAL;
@@ -1604,8 +1632,6 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
        goto err_no_fd;
    }

-    s->addr = sd_socket_address(path, host, port);
-
    QLIST_INIT(&s->inflight_aio_head);
    QLIST_INIT(&s->failed_aio_head);
    QLIST_INIT(&s->inflight_aiocb_head);
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -27,6 +27,7 @@
 #include "block/block_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qstring.h"

 QemuOptsList internal_snapshot_opts = {
    .name = "snapshot",
@@ -189,14 +190,33 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
    }

    if (bs->file) {
+        BlockDriverState *file;
+        QDict *options = qdict_clone_shallow(bs->options);
+        QDict *file_options;
+
+        file = bs->file->bs;
+        /* Prevent it from getting deleted when detached from bs */
+        bdrv_ref(file);
+
+        qdict_extract_subqdict(options, &file_options, "file.");
+        QDECREF(file_options);
+        qdict_put(options, "file", qstring_from_str(bdrv_get_node_name(file)));
+
        drv->bdrv_close(bs);
-        ret = bdrv_snapshot_goto(bs->file->bs, snapshot_id);
-        open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL);
+        bdrv_unref_child(bs, bs->file);
+        bs->file = NULL;
+
+        ret = bdrv_snapshot_goto(file, snapshot_id);
+        open_ret = drv->bdrv_open(bs, options, bs->open_flags, NULL);
+        QDECREF(options);
        if (open_ret < 0) {
-            bdrv_unref(bs->file->bs);
+            bdrv_unref(file);
            bs->drv = NULL;
            return open_ret;
        }
+
+        assert(bs->file->bs == file);
+        bdrv_unref(file);
        return ret;
    }

--- a/block/ssh.c
+++ b/block/ssh.c
@@ -601,6 +601,14 @@ static InetSocketAddress *ssh_config(QDict *options, Error **errp)
        goto out;
    }

+    /*
+     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive.
+     * .to doesn't matter, it's ignored anyway.
+     * That's because when @options come from -blockdev or
+     * blockdev_add, members are typed according to the QAPI schema,
+     * but when they come from -drive, they're all QString.  The
+     * visitor expects the former.
+     */
    iv = qobject_input_visitor_new(crumpled_addr);
    visit_type_InetSocketAddress(iv, NULL, &inet, &local_error);
    if (local_error) {
@@ -673,7 +681,7 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
    }

    /* Open the socket and connect. */
-    s->sock = inet_connect_saddr(s->inet, errp, NULL, NULL);
+    s->sock = inet_connect_saddr(s->inet, NULL, NULL, errp);
    if (s->sock < 0) {
        ret = -EIO;
        goto err;
--- a/block/trace-events
+++ b/block/trace-events
@@ -110,3 +110,20 @@ qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s
 qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
 qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
 qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
+
+# block/vxhs.c
+vxhs_iio_callback(int error) "ctx is NULL: error %d"
+vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o %d, %d"
+vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno %d"
+vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d"
+vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %"PRIu64" offset = %"PRIu64" ACB = %p. Error = %d, errno = %d"
+vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl failed, ret = %d, errno = %d"
+vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat ioctl returned size %"PRIu64
+vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %"PRIu64
+vxhs_parse_uri_filename(const char *filename) "URI passed via bdrv_parse_filename %s"
+vxhs_open_vdiskid(const char *vdisk_id) "Opening vdisk-id %s"
+vxhs_open_hostinfo(char *of_vsa_addr, int port) "Adding host %s:%d to BDRVVXHSState"
+vxhs_open_iio_open(const char *host) "Failed to connect to storage agent on host %s"
+vxhs_parse_uri_hostinfo(char *host, int port) "Host: IP %s, Port %d"
+vxhs_close(char *vdisk_guid) "Closing vdisk %s"
+vxhs_get_creds(const char *cacert, const char *client_key, const char *client_cert) "cacert %s, client_key %s, client_cert %s"
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1156,8 +1156,6 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,

    s->current_cluster=0xffffffff;

-    /* read only is the default for safety */
-    bs->read_only = true;
    s->qcow = NULL;
    s->qcow_filename = NULL;
    s->fat2 = NULL;
@@ -1169,11 +1167,24 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
    s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);

    if (qemu_opt_get_bool(opts, "rw", false)) {
-        ret = enable_write_target(bs, errp);
-        if (ret < 0) {
+        if (!bdrv_is_read_only(bs)) {
+            ret = enable_write_target(bs, errp);
+            if (ret < 0) {
+                goto fail;
+            }
+        } else {
+            ret = -EPERM;
+            error_setg(errp,
+                       "Unable to set VVFAT to 'rw' when drive is read-only");
+            goto fail;
+        }
+    } else  {
+        /* read only is the default for safety */
+        ret = bdrv_set_read_only(bs, true, &local_err);
+        if (ret < 0) {
+            error_propagate(errp, local_err);
            goto fail;
        }
-        bs->read_only = false;
    }

    bs->total_sectors = cyls * heads * secs;
--- a/block/vxhs.c
+++ b/block/vxhs.c
@@ -0,0 +1,575 @@
+/*
+ * QEMU Block driver for Veritas HyperScale (VxHS)
+ *
+ * Copyright (c) 2017 Veritas Technologies LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <qnio/qnio_api.h>
+#include <sys/param.h>
+#include "block/block_int.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "trace.h"
+#include "qemu/uri.h"
+#include "qapi/error.h"
+#include "qemu/uuid.h"
+#include "crypto/tlscredsx509.h"
+
+#define VXHS_OPT_FILENAME           "filename"
+#define VXHS_OPT_VDISK_ID           "vdisk-id"
+#define VXHS_OPT_SERVER             "server"
+#define VXHS_OPT_HOST               "host"
+#define VXHS_OPT_PORT               "port"
+
+/* Only accessed under QEMU global mutex */
+static uint32_t vxhs_ref;
+
+typedef enum {
+    VDISK_AIO_READ,
+    VDISK_AIO_WRITE,
+} VDISKAIOCmd;
+
+/*
+ * HyperScale AIO callbacks structure
+ */
+typedef struct VXHSAIOCB {
+    BlockAIOCB common;
+    int err;
+} VXHSAIOCB;
+
+typedef struct VXHSvDiskHostsInfo {
+    void *dev_handle; /* Device handle */
+    char *host; /* Host name or IP */
+    int port; /* Host's port number */
+} VXHSvDiskHostsInfo;
+
+/*
+ * Structure per vDisk maintained for state
+ */
+typedef struct BDRVVXHSState {
+    VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */
+    char *vdisk_guid;
+    char *tlscredsid; /* tlscredsid */
+} BDRVVXHSState;
+
+static void vxhs_complete_aio_bh(void *opaque)
+{
+    VXHSAIOCB *acb = opaque;
+    BlockCompletionFunc *cb = acb->common.cb;
+    void *cb_opaque = acb->common.opaque;
+    int ret = 0;
+
+    if (acb->err != 0) {
+        trace_vxhs_complete_aio(acb, acb->err);
+        ret = (-EIO);
+    }
+
+    qemu_aio_unref(acb);
+    cb(cb_opaque, ret);
+}
+
+/*
+ * Called from a libqnio thread
+ */
+static void vxhs_iio_callback(void *ctx, uint32_t opcode, uint32_t error)
+{
+    VXHSAIOCB *acb = NULL;
+
+    switch (opcode) {
+    case IRP_READ_REQUEST:
+    case IRP_WRITE_REQUEST:
+
+        /*
+         * ctx is VXHSAIOCB*
+         * ctx is NULL if error is QNIOERROR_CHANNEL_HUP
+         */
+        if (ctx) {
+            acb = ctx;
+        } else {
+            trace_vxhs_iio_callback(error);
+            goto out;
+        }
+
+        if (error) {
+            if (!acb->err) {
+                acb->err = error;
+            }
+            trace_vxhs_iio_callback(error);
+        }
+
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
+                                vxhs_complete_aio_bh, acb);
+        break;
+
+    default:
+        if (error == QNIOERROR_HUP) {
+            /*
+             * Channel failed, spontaneous notification,
+             * not in response to I/O
+             */
+            trace_vxhs_iio_callback_chnfail(error, errno);
+        } else {
+            trace_vxhs_iio_callback_unknwn(opcode, error);
+        }
+        break;
+    }
+out:
+    return;
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "vxhs",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = VXHS_OPT_FILENAME,
+            .type = QEMU_OPT_STRING,
+            .help = "URI to the Veritas HyperScale image",
+        },
+        {
+            .name = VXHS_OPT_VDISK_ID,
+            .type = QEMU_OPT_STRING,
+            .help = "UUID of the VxHS vdisk",
+        },
+        {
+            .name = "tls-creds",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of the TLS/SSL credentials to use",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList runtime_tcp_opts = {
+    .name = "vxhs_tcp",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
+    .desc = {
+        {
+            .name = VXHS_OPT_HOST,
+            .type = QEMU_OPT_STRING,
+            .help = "host address (ipv4 addresses)",
+        },
+        {
+            .name = VXHS_OPT_PORT,
+            .type = QEMU_OPT_NUMBER,
+            .help = "port number on which VxHSD is listening (default 9999)",
+            .def_value_str = "9999"
+        },
+        { /* end of list */ }
+    },
+};
+
+/*
+ * Parse incoming URI and populate *options with the host
+ * and device information
+ */
+static int vxhs_parse_uri(const char *filename, QDict *options)
+{
+    URI *uri = NULL;
+    char *port;
+    int ret = 0;
+
+    trace_vxhs_parse_uri_filename(filename);
+    uri = uri_parse(filename);
+    if (!uri || !uri->server || !uri->path) {
+        uri_free(uri);
+        return -EINVAL;
+    }
+
+    qdict_put(options, VXHS_OPT_SERVER".host", qstring_from_str(uri->server));
+
+    if (uri->port) {
+        port = g_strdup_printf("%d", uri->port);
+        qdict_put(options, VXHS_OPT_SERVER".port", qstring_from_str(port));
+        g_free(port);
+    }
+
+    qdict_put(options, "vdisk-id", qstring_from_str(uri->path));
+
+    trace_vxhs_parse_uri_hostinfo(uri->server, uri->port);
+    uri_free(uri);
+
+    return ret;
+}
+
+static void vxhs_parse_filename(const char *filename, QDict *options,
+                                Error **errp)
+{
+    if (qdict_haskey(options, "vdisk-id") || qdict_haskey(options, "server")) {
+        error_setg(errp, "vdisk-id/server and a file name may not be specified "
+                         "at the same time");
+        return;
+    }
+
+    if (strstr(filename, "://")) {
+        int ret = vxhs_parse_uri(filename, options);
+        if (ret < 0) {
+            error_setg(errp, "Invalid URI. URI should be of the form "
+                       "  vxhs://<host_ip>:<port>/<vdisk-id>");
+        }
+    }
+}
+
+static int vxhs_init_and_ref(void)
+{
+    if (vxhs_ref++ == 0) {
+        if (iio_init(QNIO_VERSION, vxhs_iio_callback)) {
+            return -ENODEV;
+        }
+    }
+    return 0;
+}
+
+static void vxhs_unref(void)
+{
+    if (--vxhs_ref == 0) {
+        iio_fini();
+    }
+}
+
+static void vxhs_get_tls_creds(const char *id, char **cacert,
+                               char **key, char **cert, Error **errp)
+{
+    Object *obj;
+    QCryptoTLSCreds *creds;
+    QCryptoTLSCredsX509 *creds_x509;
+
+    obj = object_resolve_path_component(
+        object_get_objects_root(), id);
+
+    if (!obj) {
+        error_setg(errp, "No TLS credentials with id '%s'",
+                   id);
+        return;
+    }
+
+    creds_x509 = (QCryptoTLSCredsX509 *)
+        object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS_X509);
+
+    if (!creds_x509) {
+        error_setg(errp, "Object with id '%s' is not TLS credentials",
+                   id);
+        return;
+    }
+
+    creds = &creds_x509->parent_obj;
+
+    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
+        error_setg(errp,
+                   "Expecting TLS credentials with a client endpoint");
+        return;
+    }
+
+    /*
+     * Get the cacert, client_cert and client_key file names.
+     */
+    if (!creds->dir) {
+        error_setg(errp, "TLS object missing 'dir' property value");
+        return;
+    }
+
+    *cacert = g_strdup_printf("%s/%s", creds->dir,
+                              QCRYPTO_TLS_CREDS_X509_CA_CERT);
+    *cert = g_strdup_printf("%s/%s", creds->dir,
+                            QCRYPTO_TLS_CREDS_X509_CLIENT_CERT);
+    *key = g_strdup_printf("%s/%s", creds->dir,
+                           QCRYPTO_TLS_CREDS_X509_CLIENT_KEY);
+}
+
+static int vxhs_open(BlockDriverState *bs, QDict *options,
+                     int bdrv_flags, Error **errp)
+{
+    BDRVVXHSState *s = bs->opaque;
+    void *dev_handlep;
+    QDict *backing_options = NULL;
+    QemuOpts *opts = NULL;
+    QemuOpts *tcp_opts = NULL;
+    char *of_vsa_addr = NULL;
+    Error *local_err = NULL;
+    const char *vdisk_id_opt;
+    const char *server_host_opt;
+    int ret = 0;
+    char *cacert = NULL;
+    char *client_key = NULL;
+    char *client_cert = NULL;
+
+    ret = vxhs_init_and_ref();
+    if (ret < 0) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* Create opts info from runtime_opts and runtime_tcp_opts list */
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
+
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* vdisk-id is the disk UUID */
+    vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID);
+    if (!vdisk_id_opt) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* vdisk-id may contain a leading '/' */
+    if (strlen(vdisk_id_opt) > UUID_FMT_LEN + 1) {
+        error_setg(&local_err, "vdisk-id cannot be more than %d characters",
+                   UUID_FMT_LEN);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    s->vdisk_guid = g_strdup(vdisk_id_opt);
+    trace_vxhs_open_vdiskid(vdisk_id_opt);
+
+    /* get the 'server.' arguments */
+    qdict_extract_subqdict(options, &backing_options, VXHS_OPT_SERVER".");
+
+    qemu_opts_absorb_qdict(tcp_opts, backing_options, &local_err);
+    if (local_err != NULL) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST);
+    if (!server_host_opt) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER,
+                   VXHS_OPT_SERVER"."VXHS_OPT_HOST);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (strlen(server_host_opt) > MAXHOSTNAMELEN) {
+        error_setg(&local_err, "server.host cannot be more than %d characters",
+                   MAXHOSTNAMELEN);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* check if we got tls-creds via the --object argument */
+    s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
+    if (s->tlscredsid) {
+        vxhs_get_tls_creds(s->tlscredsid, &cacert, &client_key,
+                           &client_cert, &local_err);
+        if (local_err != NULL) {
+            ret = -EINVAL;
+            goto out;
+        }
+        trace_vxhs_get_creds(cacert, client_key, client_cert);
+    }
+
+    s->vdisk_hostinfo.host = g_strdup(server_host_opt);
+    s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts,
+                                                          VXHS_OPT_PORT),
+                                                          NULL, 0);
+
+    trace_vxhs_open_hostinfo(s->vdisk_hostinfo.host,
+                             s->vdisk_hostinfo.port);
+
+    of_vsa_addr = g_strdup_printf("of://%s:%d",
+                                  s->vdisk_hostinfo.host,
+                                  s->vdisk_hostinfo.port);
+
+    /*
+     * Open qnio channel to storage agent if not opened before
+     */
+    dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0,
+                           cacert, client_key, client_cert);
+    if (dev_handlep == NULL) {
+        trace_vxhs_open_iio_open(of_vsa_addr);
+        ret = -ENODEV;
+        goto out;
+    }
+    s->vdisk_hostinfo.dev_handle = dev_handlep;
+
+out:
+    g_free(of_vsa_addr);
+    QDECREF(backing_options);
+    qemu_opts_del(tcp_opts);
+    qemu_opts_del(opts);
+    g_free(cacert);
+    g_free(client_key);
+    g_free(client_cert);
+
+    if (ret < 0) {
+        vxhs_unref();
+        error_propagate(errp, local_err);
+        g_free(s->vdisk_hostinfo.host);
+        g_free(s->vdisk_guid);
+        g_free(s->tlscredsid);
+        s->vdisk_guid = NULL;
+    }
+
+    return ret;
+}
+
+static const AIOCBInfo vxhs_aiocb_info = {
+    .aiocb_size = sizeof(VXHSAIOCB)
+};
+
+/*
+ * This allocates QEMU-VXHS callback for each IO
+ * and is passed to QNIO. When QNIO completes the work,
+ * it will be passed back through the callback.
+ */
+static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
+                               QEMUIOVector *qiov, int nb_sectors,
+                               BlockCompletionFunc *cb, void *opaque,
+                               VDISKAIOCmd iodir)
+{
+    VXHSAIOCB *acb = NULL;
+    BDRVVXHSState *s = bs->opaque;
+    size_t size;
+    uint64_t offset;
+    int iio_flags = 0;
+    int ret = 0;
+    void *dev_handle = s->vdisk_hostinfo.dev_handle;
+
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque);
+
+    /*
+     * Initialize VXHSAIOCB.
+     */
+    acb->err = 0;
+
+    iio_flags = IIO_FLAG_ASYNC;
+
+    switch (iodir) {
+    case VDISK_AIO_WRITE:
+            ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov,
+                             offset, (uint64_t)size, iio_flags);
+            break;
+    case VDISK_AIO_READ:
+            ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov,
+                            offset, (uint64_t)size, iio_flags);
+            break;
+    default:
+            trace_vxhs_aio_rw_invalid(iodir);
+            goto errout;
+    }
+
+    if (ret != 0) {
+        trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset,
+                                acb, ret, errno);
+        goto errout;
+    }
+    return &acb->common;
+
+errout:
+    qemu_aio_unref(acb);
+    return NULL;
+}
+
+static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs,
+                                   int64_t sector_num, QEMUIOVector *qiov,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb, void *opaque)
+{
+    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
+                       opaque, VDISK_AIO_READ);
+}
+
+static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs,
+                                   int64_t sector_num, QEMUIOVector *qiov,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb, void *opaque)
+{
+    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors,
+                       cb, opaque, VDISK_AIO_WRITE);
+}
+
+static void vxhs_close(BlockDriverState *bs)
+{
+    BDRVVXHSState *s = bs->opaque;
+
+    trace_vxhs_close(s->vdisk_guid);
+
+    g_free(s->vdisk_guid);
+    s->vdisk_guid = NULL;
+
+    /*
+     * Close vDisk device
+     */
+    if (s->vdisk_hostinfo.dev_handle) {
+        iio_close(s->vdisk_hostinfo.dev_handle);
+        s->vdisk_hostinfo.dev_handle = NULL;
+    }
+
+    vxhs_unref();
+
+    /*
+     * Free the dynamically allocated host string etc
+     */
+    g_free(s->vdisk_hostinfo.host);
+    g_free(s->tlscredsid);
+    s->tlscredsid = NULL;
+    s->vdisk_hostinfo.host = NULL;
+    s->vdisk_hostinfo.port = 0;
+}
+
+static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s)
+{
+    int64_t vdisk_size = -1;
+    int ret = 0;
+    void *dev_handle = s->vdisk_hostinfo.dev_handle;
+
+    ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0);
+    if (ret < 0) {
+        trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno);
+        return -EIO;
+    }
+
+    trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size);
+    return vdisk_size;
+}
+
+/*
+ * Returns the size of vDisk in bytes. This is required
+ * by QEMU block upper block layer so that it is visible
+ * to guest.
+ */
+static int64_t vxhs_getlength(BlockDriverState *bs)
+{
+    BDRVVXHSState *s = bs->opaque;
+    int64_t vdisk_size;
+
+    vdisk_size = vxhs_get_vdisk_stat(s);
+    if (vdisk_size < 0) {
+        return -EIO;
+    }
+
+    return vdisk_size;
+}
+
+static BlockDriver bdrv_vxhs = {
+    .format_name                  = "vxhs",
+    .protocol_name                = "vxhs",
+    .instance_size                = sizeof(BDRVVXHSState),
+    .bdrv_file_open               = vxhs_open,
+    .bdrv_parse_filename          = vxhs_parse_filename,
+    .bdrv_close                   = vxhs_close,
+    .bdrv_getlength               = vxhs_getlength,
+    .bdrv_aio_readv               = vxhs_aio_readv,
+    .bdrv_aio_writev              = vxhs_aio_writev,
+};
+
+static void bdrv_vxhs_init(void)
+{
+    bdrv_register(&bdrv_vxhs);
+}
+
+block_init(bdrv_vxhs_init);
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -124,6 +124,7 @@ void qmp_nbd_server_start(SocketAddress *addr,
            goto error;
        }

+        /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */
        if (addr->type != SOCKET_ADDRESS_KIND_INET) {
            error_setg(errp, "TLS is only supported with IPv4/IPv6");
            goto error;
--- a/blockdev.c
+++ b/blockdev.c
@@ -1728,7 +1728,7 @@ static void external_snapshot_prepare(BlkActionState *common,
            bdrv_img_create(new_image_file, format,
                            state->old_bs->filename,
                            state->old_bs->drv->format_name,
-                            NULL, size, flags, &local_err, false);
+                            NULL, size, flags, false, &local_err);
            if (local_err) {
                error_propagate(errp, local_err);
                return;
@@ -1772,6 +1772,8 @@ static void external_snapshot_prepare(BlkActionState *common,
        return;
    }

+    bdrv_set_aio_context(state->new_bs, state->aio_context);
+
    /* This removes our old bs and adds the new bs. This is an operation that
     * can fail, so we need to do it in .prepare; undoing it for abort is
     * always possible. */
@@ -1789,8 +1791,6 @@ static void external_snapshot_commit(BlkActionState *common)
    ExternalSnapshotState *state =
                             DO_UPCAST(ExternalSnapshotState, common, common);

-    bdrv_set_aio_context(state->new_bs, state->aio_context);
-
    /* We don't need (or want) to use the transactional
     * bdrv_reopen_multiple() across all the entries at once, because we
     * don't want to abort all of them if one of them fails the reopen */
@@ -3142,7 +3142,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
        }
        commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
                            BLOCK_JOB_DEFAULT, speed, on_error,
-                            filter_node_name, NULL, NULL, &local_err, false);
+                            filter_node_name, NULL, NULL, false, &local_err);
    } else {
        BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
        if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
@@ -3237,10 +3237,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
        if (source) {
            bdrv_img_create(backup->target, backup->format, source->filename,
                            source->drv->format_name, NULL,
-                            size, flags, &local_err, false);
+                            size, flags, false, &local_err);
        } else {
            bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL,
-                            size, flags, &local_err, false);
+                            size, flags, false, &local_err);
        }
    }

@@ -3531,7 +3531,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
        /* create new image w/o backing file */
        assert(format);
        bdrv_img_create(arg->target, format,
-                        NULL, NULL, NULL, size, flags, &local_err, false);
+                        NULL, NULL, NULL, size, flags, false, &local_err);
    } else {
        switch (arg->mode) {
        case NEW_IMAGE_MODE_EXISTING:
@@ -3541,7 +3541,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
            bdrv_img_create(arg->target, format,
                            source->filename,
                            source->drv->format_name,
-                            NULL, size, flags, &local_err, false);
+                            NULL, size, flags, false, &local_err);
            break;
        default:
            abort();
--- a/blockjob.c
+++ b/blockjob.c
@@ -290,7 +290,7 @@ void block_job_start(BlockJob *job)
    job->pause_count--;
    job->busy = true;
    job->paused = false;
-    qemu_coroutine_enter(job->co);
+    bdrv_coroutine_enter(blk_bs(job->blk), job->co);
 }

 void block_job_ref(BlockJob *job)
@@ -532,7 +532,7 @@ void block_job_user_resume(BlockJob *job)
 void block_job_enter(BlockJob *job)
 {
    if (job->co && !job->busy) {
-        qemu_coroutine_enter(job->co);
+        bdrv_coroutine_enter(blk_bs(job->blk), job->co);
    }
 }

--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -47,7 +47,6 @@ typedef struct {
    int max_size;
    int do_telnetopt;
    int do_nodelay;
-    int is_unix;
    int *read_msgfds;
    size_t read_msgfds_num;
    int *write_msgfds;
@@ -358,6 +357,10 @@ static char *SocketAddress_to_str(const char *prefix, SocketAddress *addr,
        return g_strdup_printf("%sfd:%s%s", prefix, addr->u.fd.data->str,
                               is_listen ? ",server" : "");
        break;
+    case SOCKET_ADDRESS_KIND_VSOCK:
+        return g_strdup_printf("%svsock:%s:%s", prefix,
+                               addr->u.vsock.data->cid,
+                               addr->u.vsock.data->port);
    default:
        abort();
    }
@@ -825,7 +828,6 @@ static void qmp_chardev_open_socket(Chardev *chr,
    int64_t reconnect   = sock->has_reconnect ? sock->reconnect : 0;
    QIOChannelSocket *sioc = NULL;

-    s->is_unix = addr->type == SOCKET_ADDRESS_KIND_UNIX;
    s->is_listen = is_listen;
    s->is_telnet = is_telnet;
    s->do_nodelay = do_nodelay;
@@ -865,7 +867,8 @@ static void qmp_chardev_open_socket(Chardev *chr,
    s->addr = QAPI_CLONE(SocketAddress, sock->addr);

    qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_RECONNECTABLE);
-    if (s->is_unix) {
+    /* TODO SOCKET_ADDRESS_FD where fd has AF_UNIX */
+    if (addr->type == SOCKET_ADDRESS_KIND_UNIX) {
        qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_FD_PASS);
    }

--- a/268
+++ b/268
@@ -320,9 +320,11 @@ numa=""
 tcmalloc="no"
 jemalloc="no"
 replication="yes"
+vxhs=""

 supported_cpu="no"
 supported_os="no"
+bogus_os="no"

 # parse CC options first
 for opt do
@@ -520,11 +522,11 @@ ARCH=
 # Normalise host CPU name and set ARCH.
 # Note that this case should only have supported host CPUs, not guests.
 case "$cpu" in
-  ppc|ppc64|s390|s390x|x32)
+  ppc|ppc64|s390|s390x|sparc64|x32)
    cpu="$cpu"
    supported_cpu="yes"
  ;;
-  ia64|sparc64)
+  ia64)
    cpu="$cpu"
  ;;
  i386|i486|i586|i686|i86pc|BePC)
@@ -549,6 +551,7 @@ case "$cpu" in
  ;;
  sparc|sun4[cdmuv])
    cpu="sparc"
+    supported_cpu="yes"
  ;;
  *)
    # This will result in either an error or falling back to TCI later
@@ -694,7 +697,10 @@ Linux)
  supported_os="yes"
 ;;
 *)
-  error_exit "Unsupported host OS $targetos"
+  # This is a fatal error, but don't report it yet, because we
+  # might be going to just print the --help text, or it might
+  # be the result of a missing compiler.
+  bogus_os="yes"
 ;;
 esac

@@ -737,7 +743,7 @@ if test "$mingw32" = "yes" ; then
  sysconfdir="\${prefix}"
  local_statedir=
  confsuffix=""
-  libs_qga="-lws2_32 -lwinmm -lpowrprof -liphlpapi -lnetapi32 $libs_qga"
+  libs_qga="-lws2_32 -lwinmm -lpowrprof -lwtsapi32 -liphlpapi -lnetapi32 $libs_qga"
 fi

 werror=""
@@ -1178,6 +1184,10 @@ for opt do
  ;;
  --enable-replication) replication="yes"
  ;;
+  --disable-vxhs) vxhs="no"
+  ;;
+  --enable-vxhs) vxhs="yes"
+  ;;
  *)
      echo "ERROR: unknown option $opt"
      echo "Try '$0 --help' for more information"
@@ -1186,21 +1196,6 @@ for opt do
  esac
 done

-if ! has $python; then
-  error_exit "Python not found. Use --python=/path/to/python"
-fi
-
-# Note that if the Python conditional here evaluates True we will exit
-# with status 1 which is a shell 'false' value.
-if ! $python -c 'import sys; sys.exit(sys.version_info < (2,6) or sys.version_info >= (3,))'; then
-  error_exit "Cannot use '$python', Python 2.6 or later is required." \
-      "Note that Python 3 or later is not yet supported." \
-      "Use --python=/path/to/python to specify a supported Python."
-fi
-
-# Suppress writing compiled files
-python="$python -B"
-
 case "$cpu" in
    ppc)
           CPU_CFLAGS="-m32"
@@ -1275,6 +1270,9 @@ for config in $mak_wilds; do
    default_target_list="${default_target_list} $(basename "$config" .mak)"
 done

+# Enumerate public trace backends for --help output
+trace_backend_list=$(echo $(grep -le '^PUBLIC = True$' "$source_path"/scripts/tracetool/backend/*.py | sed -e 's/^.*\/\(.*\)\.py$/\1/'))
+
 if test x"$show_help" = x"yes" ; then
 cat << EOF

@@ -1328,7 +1326,7 @@ Advanced options (experts only):
                           set block driver read-only whitelist
                           (affects only QEMU, not qemu-img)
  --enable-trace-backends=B Set trace backend
-                           Available backends: $($python $source_path/scripts/tracetool.py --list-backends)
+                           Available backends: $trace_backend_list
  --with-trace-file=NAME   Full PATH,NAME of file to store traces
                           Default:trace-<pid>
  --disable-slirp          disable SLIRP userspace network connectivity
@@ -1422,12 +1420,28 @@ disabled with --disable-FEATURE, default is enabled if available:
  xfsctl          xfsctl support
  qom-cast-debug  cast debugging support
  tools           build qemu-io, qemu-nbd and qemu-image tools
+  vxhs            Veritas HyperScale vDisk backend support

 NOTE: The object files are built at the place where configure is launched
 EOF
 exit 0
 fi

+if ! has $python; then
+  error_exit "Python not found. Use --python=/path/to/python"
+fi
+
+# Note that if the Python conditional here evaluates True we will exit
+# with status 1 which is a shell 'false' value.
+if ! $python -c 'import sys; sys.exit(sys.version_info < (2,6) or sys.version_info >= (3,))'; then
+  error_exit "Cannot use '$python', Python 2.6 or later is required." \
+      "Note that Python 3 or later is not yet supported." \
+      "Use --python=/path/to/python to specify a supported Python."
+fi
+
+# Suppress writing compiled files
+python="$python -B"
+
 # Now we have handled --enable-tcg-interpreter and know we're not just
 # printing the help message, bail out if the host CPU isn't supported.
 if test "$ARCH" = "unknown"; then
@@ -1460,6 +1474,14 @@ if ! compile_prog ; then
    error_exit "\"$cc\" cannot build an executable (is your linker broken?)"
 fi

+if test "$bogus_os" = "yes"; then
+    # Now that we know that we're not printing the help and that
+    # the compiler works (so the results of the check_defines we used
+    # to identify the OS are reliable), if we didn't recognize the
+    # host OS we should stop now.
+    error_exit "Unrecognized host OS $targetos"
+fi
+
 # Check that the C++ compiler exists and works with the C compiler
 if has $cxx; then
    cat > $TMPC <<EOF
@@ -1975,30 +1997,65 @@ fi
 # xen probe

 if test "$xen" != "no" ; then
-  xen_libs="-lxenstore -lxenctrl -lxenguest"
-  xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn"
+  # Check whether Xen library path is specified via --extra-ldflags to avoid
+  # overriding this setting with pkg-config output. If not, try pkg-config
+  # to obtain all needed flags.

-  # First we test whether Xen headers and libraries are available.
-  # If no, we are done and there is no Xen support.
-  # If yes, more tests are run to detect the Xen version.
+  if ! echo $EXTRA_LDFLAGS | grep tools/libxc > /dev/null && \
+     $pkg_config --exists xencontrol ; then
+    xen_ctrl_version="$(printf '%d%02d%02d' \
+      $($pkg_config --modversion xencontrol | sed 's/\./ /g') )"
+    xen=yes
+    xen_pc="xencontrol xenstore xenguest xenforeignmemory xengnttab"
+    xen_pc="$xen_pc xenevtchn xendevicemodel"
+    QEMU_CFLAGS="$QEMU_CFLAGS $($pkg_config --cflags $xen_pc)"
+    libs_softmmu="$($pkg_config --libs $xen_pc) $libs_softmmu"
+    LDFLAGS="$($pkg_config --libs $xen_pc) $LDFLAGS"
+  else

-  # Xen (any)
-  cat > $TMPC <<EOF
+    xen_libs="-lxenstore -lxenctrl -lxenguest"
+    xen_stable_libs="-lxencall -lxenforeignmemory -lxengnttab -lxenevtchn"
+
+    # First we test whether Xen headers and libraries are available.
+    # If no, we are done and there is no Xen support.
+    # If yes, more tests are run to detect the Xen version.
+
+    # Xen (any)
+    cat > $TMPC <<EOF
 #include <xenctrl.h>
 int main(void) {
  return 0;
 }
 EOF
-  if ! compile_prog "" "$xen_libs" ; then
-    # Xen not found
-    if test "$xen" = "yes" ; then
-      feature_not_found "xen" "Install xen devel"
-    fi
-    xen=no
+    if ! compile_prog "" "$xen_libs" ; then
+      # Xen not found
+      if test "$xen" = "yes" ; then
+        feature_not_found "xen" "Install xen devel"
+      fi
+      xen=no

-  # Xen unstable
-  elif
-      cat > $TMPC <<EOF &&
+    # Xen unstable
+    elif
+        cat > $TMPC <<EOF &&
+#undef XC_WANT_COMPAT_DEVICEMODEL_API
+#define __XEN_TOOLS__
+#include <xendevicemodel.h>
+int main(void) {
+  xendevicemodel_handle *xd;
+
+  xd = xendevicemodel_open(0, 0);
+  xendevicemodel_close(xd);
+
+  return 0;
+}
+EOF
+        compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs"
+      then
+      xen_stable_libs="-lxendevicemodel $xen_stable_libs"
+      xen_ctrl_version=40900
+      xen=yes
+    elif
+        cat > $TMPC <<EOF &&
 /*
 * If we have stable libs the we don't want the libxc compat
 * layers, regardless of what CFLAGS we may have been given.
@@ -2048,12 +2105,12 @@ int main(void) {
  return 0;
 }
 EOF
-      compile_prog "" "$xen_libs $xen_stable_libs"
-    then
-    xen_ctrl_version=480
-    xen=yes
-  elif
-      cat > $TMPC <<EOF &&
+        compile_prog "" "$xen_libs $xen_stable_libs"
+      then
+      xen_ctrl_version=40800
+      xen=yes
+    elif
+        cat > $TMPC <<EOF &&
 /*
 * If we have stable libs the we don't want the libxc compat
 * layers, regardless of what CFLAGS we may have been given.
@@ -2099,12 +2156,12 @@ int main(void) {
  return 0;
 }
 EOF
-      compile_prog "" "$xen_libs $xen_stable_libs"
-    then
-    xen_ctrl_version=471
-    xen=yes
-  elif
-      cat > $TMPC <<EOF &&
+        compile_prog "" "$xen_libs $xen_stable_libs"
+      then
+      xen_ctrl_version=40701
+      xen=yes
+    elif
+        cat > $TMPC <<EOF &&
 #include <xenctrl.h>
 #include <stdint.h>
 int main(void) {
@@ -2114,14 +2171,14 @@ int main(void) {
  return 0;
 }
 EOF
-      compile_prog "" "$xen_libs"
-    then
-    xen_ctrl_version=470
-    xen=yes
+        compile_prog "" "$xen_libs"
+      then
+      xen_ctrl_version=40700
+      xen=yes

-  # Xen 4.6
-  elif
-      cat > $TMPC <<EOF &&
+    # Xen 4.6
+    elif
+        cat > $TMPC <<EOF &&
 #include <xenctrl.h>
 #include <xenstore.h>
 #include <stdint.h>
@@ -2142,14 +2199,14 @@ int main(void) {
  return 0;
 }
 EOF
-      compile_prog "" "$xen_libs"
-    then
-    xen_ctrl_version=460
-    xen=yes
+        compile_prog "" "$xen_libs"
+      then
+      xen_ctrl_version=40600
+      xen=yes

-  # Xen 4.5
-  elif
-      cat > $TMPC <<EOF &&
+    # Xen 4.5
+    elif
+        cat > $TMPC <<EOF &&
 #include <xenctrl.h>
 #include <xenstore.h>
 #include <stdint.h>
@@ -2169,13 +2226,13 @@ int main(void) {
  return 0;
 }
 EOF
-      compile_prog "" "$xen_libs"
-    then
-    xen_ctrl_version=450
-    xen=yes
+        compile_prog "" "$xen_libs"
+      then
+      xen_ctrl_version=40500
+      xen=yes

-  elif
-      cat > $TMPC <<EOF &&
+    elif
+        cat > $TMPC <<EOF &&
 #include <xenctrl.h>
 #include <xenstore.h>
 #include <stdint.h>
@@ -2194,24 +2251,25 @@ int main(void) {
  return 0;
 }
 EOF
-      compile_prog "" "$xen_libs"
-    then
-    xen_ctrl_version=420
-    xen=yes
+        compile_prog "" "$xen_libs"
+      then
+      xen_ctrl_version=40200
+      xen=yes

-  else
-    if test "$xen" = "yes" ; then
-      feature_not_found "xen (unsupported version)" \
-                        "Install a supported xen (xen 4.2 or newer)"
+    else
+      if test "$xen" = "yes" ; then
+        feature_not_found "xen (unsupported version)" \
+                          "Install a supported xen (xen 4.2 or newer)"
+      fi
+      xen=no
    fi
-    xen=no
-  fi

-  if test "$xen" = yes; then
-    if test $xen_ctrl_version -ge 471  ; then
-      libs_softmmu="$xen_stable_libs $libs_softmmu"
+    if test "$xen" = yes; then
+      if test $xen_ctrl_version -ge 40701  ; then
+        libs_softmmu="$xen_stable_libs $libs_softmmu"
+      fi
+      libs_softmmu="$xen_libs $libs_softmmu"
    fi
-    libs_softmmu="$xen_libs $libs_softmmu"
  fi
 fi

@@ -3060,12 +3118,23 @@ fi
 ##########################################
 # glib support probe

-glib_req_ver=2.22
+if test "$mingw32" = yes; then
+    glib_req_ver=2.30
+else
+    glib_req_ver=2.22
+fi
 glib_modules=gthread-2.0
 if test "$modules" = yes; then
    glib_modules="$glib_modules gmodule-2.0"
 fi

+# This workaround is required due to a bug in pkg-config file for glib as it
+# doesn't define GLIB_STATIC_COMPILATION for pkg-config --static
+
+if test "$static" = yes -a "$mingw32" = yes; then
+    QEMU_CFLAGS="-DGLIB_STATIC_COMPILATION $QEMU_CFLAGS"
+fi
+
 for i in $glib_modules; do
    if $pkg_config --atleast-version=$glib_req_ver $i; then
        glib_cflags=$($pkg_config --cflags $i)
@@ -4756,6 +4825,33 @@ if compile_prog "" "" ; then
    have_sysmacros=yes
 fi

+##########################################
+# Veritas HyperScale block driver VxHS
+# Check if libvxhs is installed
+
+if test "$vxhs" != "no" ; then
+  cat > $TMPC <<EOF
+#include <stdint.h>
+#include <qnio/qnio_api.h>
+
+void *vxhs_callback;
+
+int main(void) {
+    iio_init(QNIO_VERSION, vxhs_callback);
+    return 0;
+}
+EOF
+  vxhs_libs="-lvxhs -lssl"
+  if compile_prog "" "$vxhs_libs" ; then
+    vxhs=yes
+  else
+    if test "$vxhs" = "yes" ; then
+      feature_not_found "vxhs block device" "Install libvxhs See github"
+    fi
+    vxhs=no
+  fi
+fi
+
 ##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
@@ -5122,6 +5218,7 @@ echo "tcmalloc support  $tcmalloc"
 echo "jemalloc support  $jemalloc"
 echo "avx2 optimization $avx2_opt"
 echo "replication support $replication"
+echo "VxHS block device $vxhs"

 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5761,6 +5858,11 @@ if test "$pthread_setname_np" = "yes" ; then
  echo "CONFIG_PTHREAD_SETNAME_NP=y" >> $config_host_mak
 fi

+if test "$vxhs" = "yes" ; then
+  echo "CONFIG_VXHS=y" >> $config_host_mak
+  echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak
+fi
+
 if test "$tcg_interpreter" = "yes"; then
  QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
 elif test "$ARCH" = "sparc64" ; then
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -600,13 +600,13 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
    /* Instruction counter expired.  */
    assert(use_icount);
 #ifndef CONFIG_USER_ONLY
-    if (cpu->icount_extra) {
-        /* Refill decrementer and continue execution.  */
-        cpu->icount_extra += insns_left;
-        insns_left = MIN(0xffff, cpu->icount_extra);
-        cpu->icount_extra -= insns_left;
-        cpu->icount_decr.u16.low = insns_left;
-    } else {
+    /* Ensure global icount has gone forward */
+    cpu_update_icount(cpu);
+    /* Refill decrementer and continue execution.  */
+    insns_left = MIN(0xffff, cpu->icount_budget);
+    cpu->icount_decr.u16.low = insns_left;
+    cpu->icount_extra = cpu->icount_budget - insns_left;
+    if (!cpu->icount_extra) {
        /* Execute any remaining instructions, then let the main loop
         * handle the next event.
         */
--- a/cpus.c
+++ b/cpus.c
@@ -202,7 +202,7 @@ void qemu_tcg_configure(QemuOpts *opts, Error **errp)
            } else if (use_icount) {
                error_setg(errp, "No MTTCG when icount is enabled");
            } else {
-#ifndef TARGET_SUPPORT_MTTCG
+#ifndef TARGET_SUPPORTS_MTTCG
                error_report("Guest not yet converted to MTTCG - "
                             "you may get unexpected results");
 #endif
@@ -223,20 +223,51 @@ void qemu_tcg_configure(QemuOpts *opts, Error **errp)
    }
 }

+/* The current number of executed instructions is based on what we
+ * originally budgeted minus the current state of the decrementing
+ * icount counters in extra/u16.low.
+ */
+static int64_t cpu_get_icount_executed(CPUState *cpu)
+{
+    return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
+}
+
+/*
+ * Update the global shared timer_state.qemu_icount to take into
+ * account executed instructions. This is done by the TCG vCPU
+ * thread so the main-loop can see time has moved forward.
+ */
+void cpu_update_icount(CPUState *cpu)
+{
+    int64_t executed = cpu_get_icount_executed(cpu);
+    cpu->icount_budget -= executed;
+
+#ifdef CONFIG_ATOMIC64
+    atomic_set__nocheck(&timers_state.qemu_icount,
+                        atomic_read__nocheck(&timers_state.qemu_icount) +
+                        executed);
+#else /* FIXME: we need 64bit atomics to do this safely */
+    timers_state.qemu_icount += executed;
+#endif
+}
+
 int64_t cpu_get_icount_raw(void)
 {
-    int64_t icount;
    CPUState *cpu = current_cpu;

-    icount = timers_state.qemu_icount;
-    if (cpu) {
+    if (cpu && cpu->running) {
        if (!cpu->can_do_io) {
            fprintf(stderr, "Bad icount read\n");
            exit(1);
        }
-        icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
+        /* Take into account what has run */
+        cpu_update_icount(cpu);
    }
-    return icount;
+#ifdef CONFIG_ATOMIC64
+    return atomic_read__nocheck(&timers_state.qemu_icount);
+#else /* FIXME: we need 64bit atomics to do this safely */
+    return timers_state.qemu_icount;
+#endif
 }

 /* Return the virtual CPU time, based on the instruction counter.  */
@@ -1179,6 +1210,41 @@ static void handle_icount_deadline(void)
    }
 }

+static void prepare_icount_for_run(CPUState *cpu)
+{
+    if (use_icount) {
+        int insns_left;
+
+        /* These should always be cleared by process_icount_data after
+         * each vCPU execution. However u16.high can be raised
+         * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
+         */
+        g_assert(cpu->icount_decr.u16.low == 0);
+        g_assert(cpu->icount_extra == 0);
+
+        cpu->icount_budget = tcg_get_icount_limit();
+        insns_left = MIN(0xffff, cpu->icount_budget);
+        cpu->icount_decr.u16.low = insns_left;
+        cpu->icount_extra = cpu->icount_budget - insns_left;
+    }
+}
+
+static void process_icount_data(CPUState *cpu)
+{
+    if (use_icount) {
+        /* Account for executed instructions */
+        cpu_update_icount(cpu);
+
+        /* Reset the counters */
+        cpu->icount_decr.u16.low = 0;
+        cpu->icount_extra = 0;
+        cpu->icount_budget = 0;
+
+        replay_account_executed_instructions();
+    }
+}
+
+
 static int tcg_cpu_exec(CPUState *cpu)
 {
    int ret;
@@ -1189,20 +1255,6 @@ static int tcg_cpu_exec(CPUState *cpu)
 #ifdef CONFIG_PROFILER
    ti = profile_getclock();
 #endif
-    if (use_icount) {
-        int64_t count;
-        int decr;
-        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
-                                    + cpu->icount_extra);
-        cpu->icount_decr.u16.low = 0;
-        cpu->icount_extra = 0;
-        count = tcg_get_icount_limit();
-        timers_state.qemu_icount += count;
-        decr = (count > 0xffff) ? 0xffff : count;
-        count -= decr;
-        cpu->icount_decr.u16.low = decr;
-        cpu->icount_extra = count;
-    }
    qemu_mutex_unlock_iothread();
    cpu_exec_start(cpu);
    ret = cpu_exec(cpu);
@@ -1211,15 +1263,6 @@ static int tcg_cpu_exec(CPUState *cpu)
 #ifdef CONFIG_PROFILER
    tcg_time += profile_getclock() - ti;
 #endif
-    if (use_icount) {
-        /* Fold pending instructions back into the
-           instruction counter, and clear the interrupt flag.  */
-        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
-                        + cpu->icount_extra);
-        cpu->icount_decr.u32 = 0;
-        cpu->icount_extra = 0;
-        replay_account_executed_instructions();
-    }
    return ret;
 }

@@ -1306,7 +1349,13 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)

            if (cpu_can_run(cpu)) {
                int r;
+
+                prepare_icount_for_run(cpu);
+
                r = tcg_cpu_exec(cpu);
+
+                process_icount_data(cpu);
+
                if (r == EXCP_DEBUG) {
                    cpu_handle_guest_debug(cpu);
                    break;
@@ -1392,6 +1441,8 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
 {
    CPUState *cpu = arg;

+    g_assert(!use_icount);
+
    rcu_register_thread();

    qemu_mutex_lock_iothread();
@@ -1434,8 +1485,6 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
            }
        }

-        handle_icount_deadline();
-
        atomic_mb_set(&cpu->exit_request, 0);
        qemu_tcg_wait_io_event(cpu);
    }
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -473,10 +473,10 @@ qcrypto_block_luks_load_key(QCryptoBlock *block,
     * then encrypted.
     */
    rv = readfunc(block,
+                  opaque,
                  slot->key_offset * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
                  splitkey, splitkeylen,
-                  errp,
-                  opaque);
+                  errp);
    if (rv < 0) {
        goto cleanup;
    }
@@ -676,11 +676,10 @@ qcrypto_block_luks_open(QCryptoBlock *block,

    /* Read the entire LUKS header, minus the key material from
     * the underlying device */
-    rv = readfunc(block, 0,
+    rv = readfunc(block, opaque, 0,
                  (uint8_t *)&luks->header,
                  sizeof(luks->header),
-                  errp,
-                  opaque);
+                  errp);
    if (rv < 0) {
        ret = rv;
        goto fail;
@@ -1246,7 +1245,7 @@ qcrypto_block_luks_create(QCryptoBlock *block,
        QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;

    /* Reserve header space to match payload offset */
-    initfunc(block, block->payload_offset, &local_err, opaque);
+    initfunc(block, opaque, block->payload_offset, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto error;
@@ -1268,11 +1267,10 @@ qcrypto_block_luks_create(QCryptoBlock *block,


    /* Write out the partition header and key slot headers */
-    writefunc(block, 0,
+    writefunc(block, opaque, 0,
              (const uint8_t *)&luks->header,
              sizeof(luks->header),
-              &local_err,
-              opaque);
+              &local_err);

    /* Delay checking local_err until we've byte-swapped */

@@ -1297,12 +1295,11 @@ qcrypto_block_luks_create(QCryptoBlock *block,

    /* Write out the master key material, starting at the
     * sector immediately following the partition header. */
-    if (writefunc(block,
+    if (writefunc(block, opaque,
                  luks->header.key_slots[0].key_offset *
                  QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
                  splitkey, splitkeylen,
-                  errp,
-                  opaque) != splitkeylen) {
+                  errp) != splitkeylen) {
        goto error;
    }

--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -29,6 +29,7 @@ CONFIG_LAN9118=y
 CONFIG_SMC91C111=y
 CONFIG_ALLWINNER_EMAC=y
 CONFIG_IMX_FEC=y
+CONFIG_FTGMAC100=y
 CONFIG_DS1338=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_PFLASH_CFI02=y
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -39,7 +39,6 @@ CONFIG_TPM_TIS=$(CONFIG_TPM)
 CONFIG_MC146818RTC=y
 CONFIG_PCI_PIIX=y
 CONFIG_WDT_IB700=y
-CONFIG_XEN_I386=$(CONFIG_XEN)
 CONFIG_ISA_DEBUG=y
 CONFIG_ISA_TESTDEV=y
 CONFIG_VMPORT=y
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -45,6 +45,7 @@ CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
 CONFIG_PLATFORM_BUS=y
 CONFIG_ETSEC=y
 CONFIG_LIBDECNUMBER=y
+CONFIG_SM501=y
 # For PReP
 CONFIG_SERIAL_ISA=y
 CONFIG_MC146818RTC=y
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -6,6 +6,10 @@ include usb.mak
 CONFIG_VIRTIO_VGA=y
 CONFIG_ESCC=y
 CONFIG_M48T59=y
+CONFIG_IPMI=y
+CONFIG_IPMI_LOCAL=y
+CONFIG_IPMI_EXTERN=y
+CONFIG_ISA_IPMI_BT=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
@@ -47,6 +51,7 @@ CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
 CONFIG_PLATFORM_BUS=y
 CONFIG_ETSEC=y
 CONFIG_LIBDECNUMBER=y
+CONFIG_SM501=y
 # For pSeries
 CONFIG_XICS=$(CONFIG_PSERIES)
 CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
--- a/default-configs/ppcemb-softmmu.mak
+++ b/default-configs/ppcemb-softmmu.mak
@@ -15,3 +15,4 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_LIBDECNUMBER=y
+CONFIG_SM501=y
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -39,7 +39,6 @@ CONFIG_TPM_TIS=$(CONFIG_TPM)
 CONFIG_MC146818RTC=y
 CONFIG_PCI_PIIX=y
 CONFIG_WDT_IB700=y
-CONFIG_XEN_I386=$(CONFIG_XEN)
 CONFIG_ISA_DEBUG=y
 CONFIG_ISA_TESTDEV=y
 CONFIG_VMPORT=y
--- a/disas/cris.c
+++ b/disas/cris.c
@@ -2048,7 +2048,7 @@ print_with_operands (const struct cris_opcode *opcodep,
 	  {
 	    /* We're looking at [pc+], i.e. we need to output an immediate
 	       number, where the size can depend on different things.  */
-	    long number;
+	    int32_t number;
 	    int signedp
 	      = ((*cs == 'z' && (insn & 0x20))
 		 || opcodep->match == BDAP_QUICK_OPCODE);
@@ -2290,7 +2290,7 @@ print_with_operands (const struct cris_opcode *opcodep,

 		    if ((prefix_insn & 0x400) && (prefix_insn & 15) == 15)
 		      {
-			long number;
+			int32_t number;
 			unsigned int nbytes;

 			/* It's a value.  Get its size.  */
--- a/exec.c
+++ b/exec.c
@@ -223,6 +223,12 @@ struct CPUAddressSpace {
    MemoryListener tcg_as_listener;
 };

+struct DirtyBitmapSnapshot {
+    ram_addr_t start;
+    ram_addr_t end;
+    unsigned long dirty[];
+};
+
 #endif

 #if !defined(CONFIG_USER_ONLY)
@@ -1061,6 +1067,75 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
    return dirty;
 }

+DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
+     (ram_addr_t start, ram_addr_t length, unsigned client)
+{
+    DirtyMemoryBlocks *blocks;
+    unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL);
+    ram_addr_t first = QEMU_ALIGN_DOWN(start, align);
+    ram_addr_t last  = QEMU_ALIGN_UP(start + length, align);
+    DirtyBitmapSnapshot *snap;
+    unsigned long page, end, dest;
+
+    snap = g_malloc0(sizeof(*snap) +
+                     ((last - first) >> (TARGET_PAGE_BITS + 3)));
+    snap->start = first;
+    snap->end   = last;
+
+    page = first >> TARGET_PAGE_BITS;
+    end  = last  >> TARGET_PAGE_BITS;
+    dest = 0;
+
+    rcu_read_lock();
+
+    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+
+    while (page < end) {
+        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
+
+        assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
+        assert(QEMU_IS_ALIGNED(num,    (1 << BITS_PER_LEVEL)));
+        offset >>= BITS_PER_LEVEL;
+
+        bitmap_copy_and_clear_atomic(snap->dirty + dest,
+                                     blocks->blocks[idx] + offset,
+                                     num);
+        page += num;
+        dest += num >> BITS_PER_LEVEL;
+    }
+
+    rcu_read_unlock();
+
+    if (tcg_enabled()) {
+        tlb_reset_dirty_range_all(start, length);
+    }
+
+    return snap;
+}
+
+bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
+                                            ram_addr_t start,
+                                            ram_addr_t length)
+{
+    unsigned long page, end;
+
+    assert(start >= snap->start);
+    assert(start + length <= snap->end);
+
+    end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS;
+    page = (start - snap->start) >> TARGET_PAGE_BITS;
+
+    while (page < end) {
+        if (test_bit(page, snap->dirty)) {
+            return true;
+        }
+        page++;
+    }
+    return false;
+}
+
 /* Called from RCU critical section */
 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
                                       MemoryRegionSection *section,
@@ -1528,7 +1603,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
    return offset;
 }

-ram_addr_t last_ram_offset(void)
+unsigned long last_ram_page(void)
 {
    RAMBlock *block;
    ram_addr_t last = 0;
@@ -1538,7 +1613,7 @@ ram_addr_t last_ram_offset(void)
        last = MAX(last, block->offset + block->max_length);
    }
    rcu_read_unlock();
-    return last;
+    return last >> TARGET_PAGE_BITS;
 }

 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
@@ -1727,7 +1802,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
    ram_addr_t old_ram_size, new_ram_size;
    Error *err = NULL;

-    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
+    old_ram_size = last_ram_page();

    qemu_mutex_lock_ramlist();
    new_block->offset = find_ram_offset(new_block->max_length);
@@ -1758,7 +1833,6 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
    new_ram_size = MAX(old_ram_size,
              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
    if (new_ram_size > old_ram_size) {
-        migration_bitmap_extend(old_ram_size, new_ram_size);
        dirty_memory_extend(old_ram_size, new_ram_size);
    }
    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
@@ -3236,75 +3310,33 @@ int64_t address_space_cache_init(MemoryRegionCache *cache,
                                 hwaddr len,
                                 bool is_write)
 {
-    hwaddr l, xlat;
-    MemoryRegion *mr;
-    void *ptr;
-
-    assert(len > 0);
-
-    l = len;
-    mr = address_space_translate(as, addr, &xlat, &l, is_write);
-    if (!memory_access_is_direct(mr, is_write)) {
-        return -EINVAL;
-    }
-
-    l = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
-    ptr = qemu_ram_ptr_length(mr->ram_block, xlat, &l);
-
-    cache->xlat = xlat;
-    cache->is_write = is_write;
-    cache->mr = mr;
-    cache->ptr = ptr;
-    cache->len = l;
-    memory_region_ref(cache->mr);
-
-    return l;
+    cache->len = len;
+    cache->as = as;
+    cache->xlat = addr;
+    return len;
 }

 void address_space_cache_invalidate(MemoryRegionCache *cache,
                                    hwaddr addr,
                                    hwaddr access_len)
 {
-    assert(cache->is_write);
-    invalidate_and_set_dirty(cache->mr, addr + cache->xlat, access_len);
 }

 void address_space_cache_destroy(MemoryRegionCache *cache)
 {
-    if (!cache->mr) {
-        return;
-    }
-
-    if (xen_enabled()) {
-        xen_invalidate_map_cache_entry(cache->ptr);
-    }
-    memory_region_unref(cache->mr);
-    cache->mr = NULL;
-}
-
-/* Called from RCU critical section.  This function has the same
- * semantics as address_space_translate, but it only works on a
- * predefined range of a MemoryRegion that was mapped with
- * address_space_cache_init.
- */
-static inline MemoryRegion *address_space_translate_cached(
-    MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat,
-    hwaddr *plen, bool is_write)
-{
-    assert(addr < cache->len && *plen <= cache->len - addr);
-    *xlat = addr + cache->xlat;
-    return cache->mr;
+    cache->as = NULL;
 }

 #define ARG1_DECL                MemoryRegionCache *cache
 #define ARG1                     cache
 #define SUFFIX                   _cached
-#define TRANSLATE(...)           address_space_translate_cached(cache, __VA_ARGS__)
+#define TRANSLATE(addr, ...)     \
+    address_space_translate(cache->as, cache->xlat + (addr), __VA_ARGS__)
 #define IS_DIRECT(mr, is_write)  true
-#define MAP_RAM(mr, ofs)         (cache->ptr + (ofs - cache->xlat))
-#define INVALIDATE(mr, ofs, len) ((void)0)
-#define RCU_READ_LOCK()          ((void)0)
-#define RCU_READ_UNLOCK()        ((void)0)
+#define MAP_RAM(mr, ofs)         qemu_map_ram_ptr((mr)->ram_block, ofs)
+#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len)
+#define RCU_READ_LOCK()          rcu_read_lock()
+#define RCU_READ_UNLOCK()        rcu_read_unlock()
 #include "memory_ldst.inc.c"

 /* virtual memory access for debug (includes writing to ROM) */
@@ -3349,9 +3381,9 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
 * Allows code that needs to deal with migration bitmaps etc to still be built
 * target independent.
 */
-size_t qemu_target_page_bits(void)
+size_t qemu_target_page_size(void)
 {
-    return TARGET_PAGE_BITS;
+    return TARGET_PAGE_SIZE;
 }

 #endif
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -523,6 +523,38 @@ Dump 80 16 bit values at the start of the video memory.
 0x000b8090: 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720
@end smallexample
@end itemize
+ETEXI
+
+    {
+        .name       = "gpa2hva",
+        .args_type  = "addr:l",
+        .params     = "addr",
+        .help       = "print the host virtual address corresponding to a guest physical address",
+        .cmd        = hmp_gpa2hva,
+    },
+
+STEXI
+@item gpa2hva @var{addr}
+@findex gpa2hva
+Print the host virtual address at which the guest's physical address @var{addr}
+is mapped.
+ETEXI
+
+#ifdef CONFIG_LINUX
+    {
+        .name       = "gpa2hpa",
+        .args_type  = "addr:l",
+        .params     = "addr",
+        .help       = "print the host physical address corresponding to a guest physical address",
+        .cmd        = hmp_gpa2hpa,
+    },
+#endif
+
+STEXI
+@item gpa2hpa @var{addr}
+@findex gpa2hpa
+Print the host physical address at which the guest's physical address @var{addr}
+is mapped.
 ETEXI

    {
--- a/hmp.c
+++ b/hmp.c
@@ -215,6 +215,9 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
                       info->ram->normal_bytes >> 10);
        monitor_printf(mon, "dirty sync count: %" PRIu64 "\n",
                       info->ram->dirty_sync_count);
+        monitor_printf(mon, "page size: %" PRIu64 " kbytes\n",
+                       info->ram->page_size >> 10);
+
        if (info->ram->dirty_pages_rate) {
            monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n",
                           info->ram->dirty_pages_rate);
@@ -265,13 +268,11 @@ void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict)
    caps = qmp_query_migrate_capabilities(NULL);

    if (caps) {
-        monitor_printf(mon, "capabilities: ");
        for (cap = caps; cap; cap = cap->next) {
-            monitor_printf(mon, "%s: %s ",
+            monitor_printf(mon, "%s: %s\n",
                           MigrationCapability_lookup[cap->value->capability],
                           cap->value->state ? "on" : "off");
        }
-        monitor_printf(mon, "\n");
    }

    qapi_free_MigrationCapabilityStatusList(caps);
@@ -284,46 +285,44 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
    params = qmp_query_migrate_parameters(NULL);

    if (params) {
-        monitor_printf(mon, "parameters:");
        assert(params->has_compress_level);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_LEVEL],
            params->compress_level);
        assert(params->has_compress_threads);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_THREADS],
            params->compress_threads);
        assert(params->has_decompress_threads);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_DECOMPRESS_THREADS],
            params->decompress_threads);
        assert(params->has_cpu_throttle_initial);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL],
            params->cpu_throttle_initial);
        assert(params->has_cpu_throttle_increment);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT],
            params->cpu_throttle_increment);
-        monitor_printf(mon, " %s: '%s'",
+        monitor_printf(mon, "%s: '%s'\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_CREDS],
            params->has_tls_creds ? params->tls_creds : "");
-        monitor_printf(mon, " %s: '%s'",
+        monitor_printf(mon, "%s: '%s'\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_HOSTNAME],
            params->has_tls_hostname ? params->tls_hostname : "");
        assert(params->has_max_bandwidth);
-        monitor_printf(mon, " %s: %" PRId64 " bytes/second",
+        monitor_printf(mon, "%s: %" PRId64 " bytes/second\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_MAX_BANDWIDTH],
            params->max_bandwidth);
        assert(params->has_downtime_limit);
-        monitor_printf(mon, " %s: %" PRId64 " milliseconds",
+        monitor_printf(mon, "%s: %" PRId64 " milliseconds\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_DOWNTIME_LIMIT],
            params->downtime_limit);
        assert(params->has_x_checkpoint_delay);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
            MigrationParameter_lookup[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY],
            params->x_checkpoint_delay);
-        monitor_printf(mon, "\n");
    }

    qapi_free_MigrationParameters(params);
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -1098,8 +1098,13 @@ static int local_name_to_path(FsContext *ctx, V9fsPath *dir_path,
 {
    if (dir_path) {
        v9fs_path_sprintf(target, "%s/%s", dir_path->data, name);
-    } else {
+    } else if (strcmp(name, "/")) {
        v9fs_path_sprintf(target, "%s", name);
+    } else {
+        /* We want the path of the export root to be relative, otherwise
+         * "*at()" syscalls would treat it as "/" in the host.
+         */
+        v9fs_path_sprintf(target, "%s", ".");
    }
    return 0;
 }
--- a/hw/9pfs/9p-xattr.c
+++ b/hw/9pfs/9p-xattr.c
@@ -108,6 +108,7 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path,
    g_free(name);
    close_preserve_errno(dirfd);
    if (xattr_len < 0) {
+        g_free(orig_value);
        return -1;
    }

--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -539,14 +539,15 @@ static void coroutine_fn virtfs_reset(V9fsPDU *pdu)

    /* Free all fids */
    while (s->fid_list) {
+        /* Get fid */
        fidp = s->fid_list;
-        s->fid_list = fidp->next;
+        fidp->ref++;

-        if (fidp->ref) {
-            fidp->clunked = 1;
-        } else {
-            free_fid(pdu, fidp);
-        }
+        /* Clunk fid */
+        s->fid_list = fidp->next;
+        fidp->clunked = 1;
+
+        put_fid(pdu, fidp);
    }
 }

@@ -2387,8 +2388,10 @@ static void coroutine_fn v9fs_flush(void *opaque)
         * Wait for pdu to complete.
         */
        qemu_co_queue_wait(&cancel_pdu->complete, NULL);
-        cancel_pdu->cancelled = 0;
-        pdu_free(cancel_pdu);
+        if (!qemu_co_queue_next(&cancel_pdu->complete)) {
+            cancel_pdu->cancelled = 0;
+            pdu_free(cancel_pdu);
+        }
    }
    pdu_complete(pdu, 7);
 }
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -119,6 +119,12 @@ static inline char *rpath(FsContext *ctx, const char *path)
 typedef struct V9fsPDU V9fsPDU;
 struct V9fsState;

+typedef struct {
+    uint32_t size_le;
+    uint8_t id;
+    uint16_t tag_le;
+} QEMU_PACKED P9MsgHeader;
+
 struct V9fsPDU
 {
    uint32_t size;
--- a/hw/9pfs/Makefile.objs
+++ b/hw/9pfs/Makefile.objs
@@ -5,5 +5,6 @@ common-obj-y += coth.o cofs.o codir.o cofile.o
 common-obj-y += coxattr.o 9p-synth.o
 common-obj-$(CONFIG_OPEN_BY_HANDLE) +=  9p-handle.o
 common-obj-y += 9p-proxy.o
+common-obj-$(CONFIG_XEN) += xen-9p-backend.o

 obj-y += virtio-9p-device.o
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -46,11 +46,7 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
    VirtQueueElement *elem;

    while ((pdu = pdu_alloc(s))) {
-        struct {
-            uint32_t size_le;
-            uint8_t id;
-            uint16_t tag_le;
-        } QEMU_PACKED out;
+        P9MsgHeader out;

        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
        if (!elem) {
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -0,0 +1,440 @@
+/*
+ * Xen 9p backend
+ *
+ * Copyright Aporeto 2017
+ *
+ * Authors:
+ *  Stefano Stabellini <stefano@aporeto.com>
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/hw.h"
+#include "hw/9pfs/9p.h"
+#include "hw/xen/xen_backend.h"
+#include "hw/9pfs/xen-9pfs.h"
+#include "qemu/config-file.h"
+#include "fsdev/qemu-fsdev.h"
+
+#define VERSIONS "1"
+#define MAX_RINGS 8
+#define MAX_RING_ORDER 8
+
+typedef struct Xen9pfsRing {
+    struct Xen9pfsDev *priv;
+
+    int ref;
+    xenevtchn_handle   *evtchndev;
+    int evtchn;
+    int local_port;
+    int ring_order;
+    struct xen_9pfs_data_intf *intf;
+    unsigned char *data;
+    struct xen_9pfs_data ring;
+
+    struct iovec *sg;
+    QEMUBH *bh;
+
+    /* local copies, so that we can read/write PDU data directly from
+     * the ring */
+    RING_IDX out_cons, out_size, in_cons;
+    bool inprogress;
+} Xen9pfsRing;
+
+typedef struct Xen9pfsDev {
+    struct XenDevice xendev;  /* must be first */
+    V9fsState state;
+    char *path;
+    char *security_model;
+    char *tag;
+    char *id;
+
+    int num_rings;
+    Xen9pfsRing *rings;
+} Xen9pfsDev;
+
+static void xen_9pfs_in_sg(Xen9pfsRing *ring,
+                           struct iovec *in_sg,
+                           int *num,
+                           uint32_t idx,
+                           uint32_t size)
+{
+    RING_IDX cons, prod, masked_prod, masked_cons;
+
+    cons = ring->intf->in_cons;
+    prod = ring->intf->in_prod;
+    xen_rmb();
+    masked_prod = xen_9pfs_mask(prod, XEN_FLEX_RING_SIZE(ring->ring_order));
+    masked_cons = xen_9pfs_mask(cons, XEN_FLEX_RING_SIZE(ring->ring_order));
+
+    if (masked_prod < masked_cons) {
+        in_sg[0].iov_base = ring->ring.in + masked_prod;
+        in_sg[0].iov_len = masked_cons - masked_prod;
+        *num = 1;
+    } else {
+        in_sg[0].iov_base = ring->ring.in + masked_prod;
+        in_sg[0].iov_len = XEN_FLEX_RING_SIZE(ring->ring_order) - masked_prod;
+        in_sg[1].iov_base = ring->ring.in;
+        in_sg[1].iov_len = masked_cons;
+        *num = 2;
+    }
+}
+
+static void xen_9pfs_out_sg(Xen9pfsRing *ring,
+                            struct iovec *out_sg,
+                            int *num,
+                            uint32_t idx)
+{
+    RING_IDX cons, prod, masked_prod, masked_cons;
+
+    cons = ring->intf->out_cons;
+    prod = ring->intf->out_prod;
+    xen_rmb();
+    masked_prod = xen_9pfs_mask(prod, XEN_FLEX_RING_SIZE(ring->ring_order));
+    masked_cons = xen_9pfs_mask(cons, XEN_FLEX_RING_SIZE(ring->ring_order));
+
+    if (masked_cons < masked_prod) {
+        out_sg[0].iov_base = ring->ring.out + masked_cons;
+        out_sg[0].iov_len = ring->out_size;
+        *num = 1;
+    } else {
+        if (ring->out_size >
+            (XEN_FLEX_RING_SIZE(ring->ring_order) - masked_cons)) {
+            out_sg[0].iov_base = ring->ring.out + masked_cons;
+            out_sg[0].iov_len = XEN_FLEX_RING_SIZE(ring->ring_order) -
+                                masked_cons;
+            out_sg[1].iov_base = ring->ring.out;
+            out_sg[1].iov_len = ring->out_size -
+                                (XEN_FLEX_RING_SIZE(ring->ring_order) -
+                                 masked_cons);
+            *num = 2;
+        } else {
+            out_sg[0].iov_base = ring->ring.out + masked_cons;
+            out_sg[0].iov_len = ring->out_size;
+            *num = 1;
+        }
+    }
+}
+
+static ssize_t xen_9pfs_pdu_vmarshal(V9fsPDU *pdu,
+                                     size_t offset,
+                                     const char *fmt,
+                                     va_list ap)
+{
+    Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
+    struct iovec in_sg[2];
+    int num;
+
+    xen_9pfs_in_sg(&xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings],
+                   in_sg, &num, pdu->idx, ROUND_UP(offset + 128, 512));
+    return v9fs_iov_vmarshal(in_sg, num, offset, 0, fmt, ap);
+}
+
+static ssize_t xen_9pfs_pdu_vunmarshal(V9fsPDU *pdu,
+                                       size_t offset,
+                                       const char *fmt,
+                                       va_list ap)
+{
+    Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
+    struct iovec out_sg[2];
+    int num;
+
+    xen_9pfs_out_sg(&xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings],
+                    out_sg, &num, pdu->idx);
+    return v9fs_iov_vunmarshal(out_sg, num, offset, 0, fmt, ap);
+}
+
+static void xen_9pfs_init_out_iov_from_pdu(V9fsPDU *pdu,
+                                           struct iovec **piov,
+                                           unsigned int *pniov)
+{
+    Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
+    Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings];
+    int num;
+
+    g_free(ring->sg);
+
+    ring->sg = g_malloc0(sizeof(*ring->sg) * 2);
+    xen_9pfs_out_sg(ring, ring->sg, &num, pdu->idx);
+    *piov = ring->sg;
+    *pniov = num;
+}
+
+static void xen_9pfs_init_in_iov_from_pdu(V9fsPDU *pdu,
+                                          struct iovec **piov,
+                                          unsigned int *pniov,
+                                          size_t size)
+{
+    Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
+    Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings];
+    int num;
+
+    g_free(ring->sg);
+
+    ring->sg = g_malloc0(sizeof(*ring->sg) * 2);
+    xen_9pfs_in_sg(ring, ring->sg, &num, pdu->idx, size);
+    *piov = ring->sg;
+    *pniov = num;
+}
+
+static void xen_9pfs_push_and_notify(V9fsPDU *pdu)
+{
+    RING_IDX prod;
+    Xen9pfsDev *priv = container_of(pdu->s, Xen9pfsDev, state);
+    Xen9pfsRing *ring = &priv->rings[pdu->tag % priv->num_rings];
+
+    g_free(ring->sg);
+    ring->sg = NULL;
+
+    ring->intf->out_cons = ring->out_cons;
+    xen_wmb();
+
+    prod = ring->intf->in_prod;
+    xen_rmb();
+    ring->intf->in_prod = prod + pdu->size;
+    xen_wmb();
+
+    ring->inprogress = false;
+    xenevtchn_notify(ring->evtchndev, ring->local_port);
+
+    qemu_bh_schedule(ring->bh);
+}
+
+static const struct V9fsTransport xen_9p_transport = {
+    .pdu_vmarshal = xen_9pfs_pdu_vmarshal,
+    .pdu_vunmarshal = xen_9pfs_pdu_vunmarshal,
+    .init_in_iov_from_pdu = xen_9pfs_init_in_iov_from_pdu,
+    .init_out_iov_from_pdu = xen_9pfs_init_out_iov_from_pdu,
+    .push_and_notify = xen_9pfs_push_and_notify,
+};
+
+static int xen_9pfs_init(struct XenDevice *xendev)
+{
+    return 0;
+}
+
+static int xen_9pfs_receive(Xen9pfsRing *ring)
+{
+    P9MsgHeader h;
+    RING_IDX cons, prod, masked_prod, masked_cons;
+    V9fsPDU *pdu;
+
+    if (ring->inprogress) {
+        return 0;
+    }
+
+    cons = ring->intf->out_cons;
+    prod = ring->intf->out_prod;
+    xen_rmb();
+
+    if (xen_9pfs_queued(prod, cons, XEN_FLEX_RING_SIZE(ring->ring_order)) <
+        sizeof(h)) {
+        return 0;
+    }
+    ring->inprogress = true;
+
+    masked_prod = xen_9pfs_mask(prod, XEN_FLEX_RING_SIZE(ring->ring_order));
+    masked_cons = xen_9pfs_mask(cons, XEN_FLEX_RING_SIZE(ring->ring_order));
+
+    xen_9pfs_read_packet((uint8_t *) &h, ring->ring.out, sizeof(h),
+                         masked_prod, &masked_cons,
+                         XEN_FLEX_RING_SIZE(ring->ring_order));
+
+    /* cannot fail, because we only handle one request per ring at a time */
+    pdu = pdu_alloc(&ring->priv->state);
+    pdu->size = le32_to_cpu(h.size_le);
+    pdu->id = h.id;
+    pdu->tag = le32_to_cpu(h.tag_le);
+    ring->out_size = le32_to_cpu(h.size_le);
+    ring->out_cons = cons + le32_to_cpu(h.size_le);
+
+    qemu_co_queue_init(&pdu->complete);
+    pdu_submit(pdu);
+
+    return 0;
+}
+
+static void xen_9pfs_bh(void *opaque)
+{
+    Xen9pfsRing *ring = opaque;
+    xen_9pfs_receive(ring);
+}
+
+static void xen_9pfs_evtchn_event(void *opaque)
+{
+    Xen9pfsRing *ring = opaque;
+    evtchn_port_t port;
+
+    port = xenevtchn_pending(ring->evtchndev);
+    xenevtchn_unmask(ring->evtchndev, port);
+
+    qemu_bh_schedule(ring->bh);
+}
+
+static int xen_9pfs_free(struct XenDevice *xendev)
+{
+    int i;
+    Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
+
+    g_free(xen_9pdev->id);
+    g_free(xen_9pdev->tag);
+    g_free(xen_9pdev->path);
+    g_free(xen_9pdev->security_model);
+
+    for (i = 0; i < xen_9pdev->num_rings; i++) {
+        if (xen_9pdev->rings[i].data != NULL) {
+            xengnttab_unmap(xen_9pdev->xendev.gnttabdev,
+                    xen_9pdev->rings[i].data,
+                    (1 << xen_9pdev->rings[i].ring_order));
+        }
+        if (xen_9pdev->rings[i].intf != NULL) {
+            xengnttab_unmap(xen_9pdev->xendev.gnttabdev,
+                    xen_9pdev->rings[i].intf,
+                    1);
+        }
+        if (xen_9pdev->rings[i].evtchndev > 0) {
+            qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
+                    NULL, NULL, NULL);
+            xenevtchn_unbind(xen_9pdev->rings[i].evtchndev,
+                             xen_9pdev->rings[i].local_port);
+        }
+        if (xen_9pdev->rings[i].bh != NULL) {
+            qemu_bh_delete(xen_9pdev->rings[i].bh);
+        }
+    }
+    g_free(xen_9pdev->rings);
+    return 0;
+}
+
+static int xen_9pfs_connect(struct XenDevice *xendev)
+{
+    int i;
+    Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
+    V9fsState *s = &xen_9pdev->state;
+    QemuOpts *fsdev;
+
+    if (xenstore_read_fe_int(&xen_9pdev->xendev, "num-rings",
+                             &xen_9pdev->num_rings) == -1 ||
+        xen_9pdev->num_rings > MAX_RINGS || xen_9pdev->num_rings < 1) {
+        return -1;
+    }
+
+    xen_9pdev->rings = g_malloc0(xen_9pdev->num_rings * sizeof(Xen9pfsRing));
+    for (i = 0; i < xen_9pdev->num_rings; i++) {
+        char *str;
+        int ring_order;
+
+        xen_9pdev->rings[i].priv = xen_9pdev;
+        xen_9pdev->rings[i].evtchn = -1;
+        xen_9pdev->rings[i].local_port = -1;
+
+        str = g_strdup_printf("ring-ref%u", i);
+        if (xenstore_read_fe_int(&xen_9pdev->xendev, str,
+                                 &xen_9pdev->rings[i].ref) == -1) {
+            goto out;
+        }
+        g_free(str);
+        str = g_strdup_printf("event-channel-%u", i);
+        if (xenstore_read_fe_int(&xen_9pdev->xendev, str,
+                                 &xen_9pdev->rings[i].evtchn) == -1) {
+            goto out;
+        }
+        g_free(str);
+
+        xen_9pdev->rings[i].intf =  xengnttab_map_grant_ref(
+                xen_9pdev->xendev.gnttabdev,
+                xen_9pdev->xendev.dom,
+                xen_9pdev->rings[i].ref,
+                PROT_READ | PROT_WRITE);
+        if (!xen_9pdev->rings[i].intf) {
+            goto out;
+        }
+        ring_order = xen_9pdev->rings[i].intf->ring_order;
+        if (ring_order > MAX_RING_ORDER) {
+            goto out;
+        }
+        xen_9pdev->rings[i].ring_order = ring_order;
+        xen_9pdev->rings[i].data = xengnttab_map_domain_grant_refs(
+                xen_9pdev->xendev.gnttabdev,
+                (1 << ring_order),
+                xen_9pdev->xendev.dom,
+                xen_9pdev->rings[i].intf->ref,
+                PROT_READ | PROT_WRITE);
+        if (!xen_9pdev->rings[i].data) {
+            goto out;
+        }
+        xen_9pdev->rings[i].ring.in = xen_9pdev->rings[i].data;
+        xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
+                                       XEN_FLEX_RING_SIZE(ring_order);
+
+        xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]);
+        xen_9pdev->rings[i].out_cons = 0;
+        xen_9pdev->rings[i].out_size = 0;
+        xen_9pdev->rings[i].inprogress = false;
+
+
+        xen_9pdev->rings[i].evtchndev = xenevtchn_open(NULL, 0);
+        if (xen_9pdev->rings[i].evtchndev == NULL) {
+            goto out;
+        }
+        fcntl(xenevtchn_fd(xen_9pdev->rings[i].evtchndev), F_SETFD, FD_CLOEXEC);
+        xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain
+                                            (xen_9pdev->rings[i].evtchndev,
+                                             xendev->dom,
+                                             xen_9pdev->rings[i].evtchn);
+        if (xen_9pdev->rings[i].local_port == -1) {
+            xen_pv_printf(xendev, 0,
+                          "xenevtchn_bind_interdomain failed port=%d\n",
+                          xen_9pdev->rings[i].evtchn);
+            goto out;
+        }
+        xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
+        qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
+                xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]);
+    }
+
+    xen_9pdev->security_model = xenstore_read_be_str(xendev, "security_model");
+    xen_9pdev->path = xenstore_read_be_str(xendev, "path");
+    xen_9pdev->id = s->fsconf.fsdev_id =
+        g_strdup_printf("xen9p%d", xendev->dev);
+    xen_9pdev->tag = s->fsconf.tag = xenstore_read_fe_str(xendev, "tag");
+    v9fs_register_transport(s, &xen_9p_transport);
+    fsdev = qemu_opts_create(qemu_find_opts("fsdev"),
+            s->fsconf.tag,
+            1, NULL);
+    qemu_opt_set(fsdev, "fsdriver", "local", NULL);
+    qemu_opt_set(fsdev, "path", xen_9pdev->path, NULL);
+    qemu_opt_set(fsdev, "security_model", xen_9pdev->security_model, NULL);
+    qemu_opts_set_id(fsdev, s->fsconf.fsdev_id);
+    qemu_fsdev_add(fsdev);
+    v9fs_device_realize_common(s, NULL);
+
+    return 0;
+
+out:
+    xen_9pfs_free(xendev);
+    return -1;
+}
+
+static void xen_9pfs_alloc(struct XenDevice *xendev)
+{
+    xenstore_write_be_str(xendev, "versions", VERSIONS);
+    xenstore_write_be_int(xendev, "max-rings", MAX_RINGS);
+    xenstore_write_be_int(xendev, "max-ring-page-order", MAX_RING_ORDER);
+}
+
+static void xen_9pfs_disconnect(struct XenDevice *xendev)
+{
+    /* Dynamic hotplug of PV filesystems at runtime is not supported. */
+}
+
+struct XenDevOps xen_9pfs_ops = {
+    .size       = sizeof(Xen9pfsDev),
+    .flags      = DEVOPS_FLAG_NEED_GNTDEV,
+    .alloc      = xen_9pfs_alloc,
+    .init       = xen_9pfs_init,
+    .initialise = xen_9pfs_connect,
+    .disconnect = xen_9pfs_disconnect,
+    .free       = xen_9pfs_free,
+};
--- a/hw/9pfs/xen-9pfs.h
+++ b/hw/9pfs/xen-9pfs.h
@@ -0,0 +1,21 @@
+/*
+ * Xen 9p backend
+ *
+ * Copyright Aporeto 2017
+ *
+ * Authors:
+ *  Stefano Stabellini <stefano@aporeto.com>
+ *
+ * This work is licensed under the terms of the GNU GPL version 2 or
+ * later. See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <xen/io/protocols.h>
+#include "hw/xen/io/ring.h"
+
+/*
+ * Do not merge into xen-9p-backend.c: clang doesn't allow unused static
+ * inline functions in c files.
+ */
+DEFINE_XEN_FLEX_RING_AND_INTF(xen_9pfs);
--- a/hw/acpi/tco.c
+++ b/hw/acpi/tco.c
@@ -75,8 +75,6 @@ static void tco_timer_expired(void *opaque)

    if (pm->smi_en & ICH9_PMIO_SMI_EN_TCO_EN) {
        ich9_generate_smi();
-    } else {
-        ich9_generate_nmi();
    }
    tr->tco.rld = tr->tco.tmr;
    tco_timer_reload(tr);
--- a/hw/arm/allwinner-a10.c
+++ b/hw/arm/allwinner-a10.c
@@ -118,12 +118,6 @@ static void aw_a10_class_init(ObjectClass *oc, void *data)
    DeviceClass *dc = DEVICE_CLASS(oc);

    dc->realize = aw_a10_realize;
-
-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }

 static const TypeInfo aw_a10_type_info = {
--- a/hw/arm/aspeed_soc.c
+++ b/hw/arm/aspeed_soc.c
@@ -19,6 +19,7 @@
 #include "hw/char/serial.h"
 #include "qemu/log.h"
 #include "hw/i2c/aspeed_i2c.h"
+#include "net/net.h"

 #define ASPEED_SOC_UART_5_BASE      0x00184000
 #define ASPEED_SOC_IOMEM_SIZE       0x00200000
@@ -33,6 +34,8 @@
 #define ASPEED_SOC_TIMER_BASE       0x1E782000
 #define ASPEED_SOC_WDT_BASE         0x1E785000
 #define ASPEED_SOC_I2C_BASE         0x1E78A000
+#define ASPEED_SOC_ETH1_BASE        0x1E660000
+#define ASPEED_SOC_ETH2_BASE        0x1E680000

 static const int uart_irqs[] = { 9, 32, 33, 34, 10 };
 static const int timer_irqs[] = { 16, 17, 18, 35, 36, 37, 38, 39, };
@@ -175,6 +178,10 @@ static void aspeed_soc_init(Object *obj)
    object_initialize(&s->wdt, sizeof(s->wdt), TYPE_ASPEED_WDT);
    object_property_add_child(obj, "wdt", OBJECT(&s->wdt), NULL);
    qdev_set_parent_bus(DEVICE(&s->wdt), sysbus_get_default());
+
+    object_initialize(&s->ftgmac100, sizeof(s->ftgmac100), TYPE_FTGMAC100);
+    object_property_add_child(obj, "ftgmac100", OBJECT(&s->ftgmac100), NULL);
+    qdev_set_parent_bus(DEVICE(&s->ftgmac100), sysbus_get_default());
 }

 static void aspeed_soc_realize(DeviceState *dev, Error **errp)
@@ -299,6 +306,20 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp)
        return;
    }
    sysbus_mmio_map(SYS_BUS_DEVICE(&s->wdt), 0, ASPEED_SOC_WDT_BASE);
+
+    /* Net */
+    qdev_set_nic_properties(DEVICE(&s->ftgmac100), &nd_table[0]);
+    object_property_set_bool(OBJECT(&s->ftgmac100), true, "aspeed", &err);
+    object_property_set_bool(OBJECT(&s->ftgmac100), true, "realized",
+                             &local_err);
+    error_propagate(&err, local_err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->ftgmac100), 0, ASPEED_SOC_ETH1_BASE);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->ftgmac100), 0,
+                       qdev_get_gpio_in(DEVICE(&s->vic), 2));
 }

 static void aspeed_soc_class_init(ObjectClass *oc, void *data)
--- a/hw/arm/bcm2836.c
+++ b/hw/arm/bcm2836.c
@@ -160,12 +160,6 @@ static void bcm2836_class_init(ObjectClass *oc, void *data)

    dc->props = bcm2836_props;
    dc->realize = bcm2836_realize;
-
-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }

 static const TypeInfo bcm2836_type_info = {
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -31,6 +31,9 @@
 #define KERNEL_LOAD_ADDR 0x00010000
 #define KERNEL64_LOAD_ADDR 0x00080000

+#define ARM64_TEXT_OFFSET_OFFSET    8
+#define ARM64_MAGIC_OFFSET          56
+
 typedef enum {
    FIXUP_NONE = 0,     /* do nothing */
    FIXUP_TERMINATOR,   /* end of insns */
@@ -768,6 +771,49 @@ static uint64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry,
    return ret;
 }

+static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base,
+                                   hwaddr *entry)
+{
+    hwaddr kernel_load_offset = KERNEL64_LOAD_ADDR;
+    uint8_t *buffer;
+    int size;
+
+    /* On aarch64, it's the bootloader's job to uncompress the kernel. */
+    size = load_image_gzipped_buffer(filename, LOAD_IMAGE_MAX_GUNZIP_BYTES,
+                                     &buffer);
+
+    if (size < 0) {
+        gsize len;
+
+        /* Load as raw file otherwise */
+        if (!g_file_get_contents(filename, (char **)&buffer, &len, NULL)) {
+            return -1;
+        }
+        size = len;
+    }
+
+    /* check the arm64 magic header value -- very old kernels may not have it */
+    if (memcmp(buffer + ARM64_MAGIC_OFFSET, "ARM\x64", 4) == 0) {
+        uint64_t hdrvals[2];
+
+        /* The arm64 Image header has text_offset and image_size fields at 8 and
+         * 16 bytes into the Image header, respectively. The text_offset field
+         * is only valid if the image_size is non-zero.
+         */
+        memcpy(&hdrvals, buffer + ARM64_TEXT_OFFSET_OFFSET, sizeof(hdrvals));
+        if (hdrvals[1] != 0) {
+            kernel_load_offset = le64_to_cpu(hdrvals[0]);
+        }
+    }
+
+    *entry = mem_base + kernel_load_offset;
+    rom_add_blob_fixed(filename, buffer, size, *entry);
+
+    g_free(buffer);
+
+    return size;
+}
+
 static void arm_load_kernel_notify(Notifier *notifier, void *data)
 {
    CPUState *cs;
@@ -776,7 +822,7 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
    int is_linux = 0;
    uint64_t elf_entry, elf_low_addr, elf_high_addr;
    int elf_machine;
-    hwaddr entry, kernel_load_offset;
+    hwaddr entry;
    static const ARMInsnFixup *primary_loader;
    ArmLoadKernelNotifier *n = DO_UPCAST(ArmLoadKernelNotifier,
                                         notifier, notifier);
@@ -841,14 +887,12 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)

    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
        primary_loader = bootloader_aarch64;
-        kernel_load_offset = KERNEL64_LOAD_ADDR;
        elf_machine = EM_AARCH64;
    } else {
        primary_loader = bootloader;
        if (!info->write_board_setup) {
            primary_loader += BOOTLOADER_NO_BOARD_SETUP_OFFSET;
        }
-        kernel_load_offset = KERNEL_LOAD_ADDR;
        elf_machine = EM_ARM;
    }

@@ -900,17 +944,15 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
        kernel_size = load_uimage(info->kernel_filename, &entry, NULL,
                                  &is_linux, NULL, NULL);
    }
-    /* On aarch64, it's the bootloader's job to uncompress the kernel. */
    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && kernel_size < 0) {
-        entry = info->loader_start + kernel_load_offset;
-        kernel_size = load_image_gzipped(info->kernel_filename, entry,
-                                         info->ram_size - kernel_load_offset);
+        kernel_size = load_aarch64_image(info->kernel_filename,
+                                         info->loader_start, &entry);
        is_linux = 1;
-    }
-    if (kernel_size < 0) {
-        entry = info->loader_start + kernel_load_offset;
+    } else if (kernel_size < 0) {
+        /* 32-bit ARM */
+        entry = info->loader_start + KERNEL_LOAD_ADDR;
        kernel_size = load_image_targphys(info->kernel_filename, entry,
-                                          info->ram_size - kernel_load_offset);
+                                          info->ram_size - KERNEL_LOAD_ADDR);
        is_linux = 1;
    }
    if (kernel_size < 0) {
--- a/hw/arm/digic.c
+++ b/hw/arm/digic.c
@@ -101,12 +101,6 @@ static void digic_class_init(ObjectClass *oc, void *data)
    DeviceClass *dc = DEVICE_CLASS(oc);

    dc->realize = digic_realize;
-
-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }

 static const TypeInfo digic_type_info = {
--- a/hw/arm/exynos4210.c
+++ b/hw/arm/exynos4210.c
@@ -32,6 +32,7 @@
 #include "hw/arm/arm.h"
 #include "hw/loader.h"
 #include "hw/arm/exynos4210.h"
+#include "hw/sd/sd.h"
 #include "hw/usb/hcd-ehci.h"

 #define EXYNOS4210_CHIPID_ADDR         0x10000000
@@ -72,6 +73,13 @@
 #define EXYNOS4210_EXT_COMBINER_BASE_ADDR   0x10440000
 #define EXYNOS4210_INT_COMBINER_BASE_ADDR   0x10448000

+/* SD/MMC host controllers */
+#define EXYNOS4210_SDHCI_CAPABILITIES       0x05E80080
+#define EXYNOS4210_SDHCI_BASE_ADDR          0x12510000
+#define EXYNOS4210_SDHCI_ADDR(n)            (EXYNOS4210_SDHCI_BASE_ADDR + \
+                                                0x00010000 * (n))
+#define EXYNOS4210_SDHCI_NUMBER             4
+
 /* PMU SFR base address */
 #define EXYNOS4210_PMU_BASE_ADDR            0x10020000

@@ -382,6 +390,27 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
                           EXYNOS4210_UART3_FIFO_SIZE, 3, NULL,
                  s->irq_table[exynos4210_get_irq(EXYNOS4210_UART_INT_GRP, 3)]);

+    /*** SD/MMC host controllers ***/
+    for (n = 0; n < EXYNOS4210_SDHCI_NUMBER; n++) {
+        DeviceState *carddev;
+        BlockBackend *blk;
+        DriveInfo *di;
+
+        dev = qdev_create(NULL, "generic-sdhci");
+        qdev_prop_set_uint32(dev, "capareg", EXYNOS4210_SDHCI_CAPABILITIES);
+        qdev_init_nofail(dev);
+
+        busdev = SYS_BUS_DEVICE(dev);
+        sysbus_mmio_map(busdev, 0, EXYNOS4210_SDHCI_ADDR(n));
+        sysbus_connect_irq(busdev, 0, s->irq_table[exynos4210_get_irq(29, n)]);
+
+        di = drive_get(IF_SD, 0, n);
+        blk = di ? blk_by_legacy_dinfo(di) : NULL;
+        carddev = qdev_create(qdev_get_child_bus(dev, "sd-bus"), TYPE_SD_CARD);
+        qdev_prop_set_drive(carddev, "drive", blk, &error_abort);
+        qdev_init_nofail(carddev);
+    }
+
    /*** Display controller (FIMD) ***/
    sysbus_create_varargs("exynos4210.fimd", EXYNOS4210_FIMD0_BASE_ADDR,
            s->irq_table[exynos4210_get_irq(11, 0)],
--- a/hw/arm/exynos4_boards.c
+++ b/hw/arm/exynos4_boards.c
@@ -22,6 +22,7 @@
 */

 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu-common.h"
 #include "cpu.h"
 #include "sysemu/sysemu.h"
@@ -101,9 +102,9 @@ static Exynos4210State *exynos4_boards_init_common(MachineState *machine,
    MachineClass *mc = MACHINE_GET_CLASS(machine);

    if (smp_cpus != EXYNOS4210_NCPUS && !qtest_enabled()) {
-        fprintf(stderr, "%s board supports only %d CPU cores. Ignoring smp_cpus"
-                " value.\n",
-                mc->name, EXYNOS4210_NCPUS);
+        error_report("%s board supports only %d CPU cores, ignoring smp_cpus"
+                     " value",
+                     mc->name, EXYNOS4210_NCPUS);
    }

    exynos4_board_binfo.ram_size = exynos4_board_ram_size[board_type];
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -290,11 +290,6 @@ static void fsl_imx25_class_init(ObjectClass *oc, void *data)

    dc->realize = fsl_imx25_realize;

-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
    dc->desc = "i.MX25 SOC";
 }

--- a/hw/arm/fsl-imx31.c
+++ b/hw/arm/fsl-imx31.c
@@ -262,11 +262,6 @@ static void fsl_imx31_class_init(ObjectClass *oc, void *data)

    dc->realize = fsl_imx31_realize;

-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
    dc->desc = "i.MX31 SOC";
 }

--- a/hw/arm/fsl-imx6.c
+++ b/hw/arm/fsl-imx6.c
@@ -442,11 +442,6 @@ static void fsl_imx6_class_init(ObjectClass *oc, void *data)

    dc->realize = fsl_imx6_realize;

-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
    dc->desc = "i.MX6 SOC";
 }

--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -755,19 +755,18 @@ static void pxa2xx_ssp_reset(DeviceState *d)
    s->rx_start = s->rx_level = 0;
 }

-static int pxa2xx_ssp_init(SysBusDevice *sbd)
+static void pxa2xx_ssp_init(Object *obj)
 {
-    DeviceState *dev = DEVICE(sbd);
-    PXA2xxSSPState *s = PXA2XX_SSP(dev);
-
+    DeviceState *dev = DEVICE(obj);
+    PXA2xxSSPState *s = PXA2XX_SSP(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
    sysbus_init_irq(sbd, &s->irq);

-    memory_region_init_io(&s->iomem, OBJECT(s), &pxa2xx_ssp_ops, s,
+    memory_region_init_io(&s->iomem, obj, &pxa2xx_ssp_ops, s,
                          "pxa2xx-ssp", 0x1000);
    sysbus_init_mmio(sbd, &s->iomem);

    s->bus = ssi_create_bus(dev, "ssi");
-    return 0;
 }

 /* Real-Time Clock */
@@ -2321,10 +2320,8 @@ PXA2xxState *pxa255_init(MemoryRegion *address_space, unsigned int sdram_size)

 static void pxa2xx_ssp_class_init(ObjectClass *klass, void *data)
 {
-    SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
    DeviceClass *dc = DEVICE_CLASS(klass);

-    sdc->init = pxa2xx_ssp_init;
    dc->reset = pxa2xx_ssp_reset;
    dc->vmsd = &vmstate_pxa2xx_ssp;
 }
@@ -2333,6 +2330,7 @@ static const TypeInfo pxa2xx_ssp_info = {
    .name          = TYPE_PXA2XX_SSP,
    .parent        = TYPE_SYS_BUS_DEVICE,
    .instance_size = sizeof(PXA2xxSSPState),
+    .instance_init = pxa2xx_ssp_init,
    .class_init    = pxa2xx_ssp_class_init,
 };

--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -108,7 +108,10 @@ static void gptm_reload(gptm_state *s, int n, int reset)
    } else if (s->mode[n] == 0xa) {
        /* PWM mode.  Not implemented.  */
    } else {
-        hw_error("TODO: 16-bit timer mode 0x%x\n", s->mode[n]);
+        qemu_log_mask(LOG_UNIMP,
+                      "GPTM: 16-bit timer mode unimplemented: 0x%x\n",
+                      s->mode[n]);
+        return;
    }
    s->tick[n] = tick;
    timer_mod(s->timer[n], tick);
@@ -149,7 +152,9 @@ static void gptm_tick(void *opaque)
    } else if (s->mode[n] == 0xa) {
        /* PWM mode.  Not implemented.  */
    } else {
-        hw_error("TODO: 16-bit timer mode 0x%x\n", s->mode[n]);
+        qemu_log_mask(LOG_UNIMP,
+                      "GPTM: 16-bit timer mode unimplemented: 0x%x\n",
+                      s->mode[n]);
    }
    gptm_update_irq(s);
 }
@@ -286,7 +291,8 @@ static void gptm_write(void *opaque, hwaddr offset,
        s->match_prescale[0] = value;
        break;
    default:
-        hw_error("gptm_write: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "GPTM: read at bad offset 0x%x\n", (int)offset);
    }
    gptm_update_irq(s);
 }
@@ -425,7 +431,10 @@ static int ssys_board_class(const ssys_state *s)
        }
        /* for unknown classes, fall through */
    default:
-        hw_error("ssys_board_class: Unknown class 0x%08x\n", did0);
+        /* This can only happen if the hardwired constant did0 value
+         * in this board's stellaris_board_info struct is wrong.
+         */
+        g_assert_not_reached();
    }
 }

@@ -479,8 +488,7 @@ static uint64_t ssys_read(void *opaque, hwaddr offset,
            case DID0_CLASS_SANDSTORM:
                return pllcfg_sandstorm[xtal];
            default:
-                hw_error("ssys_read: Unhandled class for PLLCFG read.\n");
-                return 0;
+                g_assert_not_reached();
            }
        }
    case 0x070: /* RCC2 */
@@ -512,7 +520,8 @@ static uint64_t ssys_read(void *opaque, hwaddr offset,
    case 0x1e4: /* USER1 */
        return s->user1;
    default:
-        hw_error("ssys_read: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSYS: read at bad offset 0x%x\n", (int)offset);
        return 0;
    }
 }
@@ -614,7 +623,8 @@ static void ssys_write(void *opaque, hwaddr offset,
        s->ldoarst = value;
        break;
    default:
-        hw_error("ssys_write: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSYS: write at bad offset 0x%x\n", (int)offset);
    }
    ssys_update(s);
 }
@@ -748,7 +758,8 @@ static uint64_t stellaris_i2c_read(void *opaque, hwaddr offset,
    case 0x20: /* MCR */
        return s->mcr;
    default:
-        hw_error("strllaris_i2c_read: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "stellaris_i2c: read at bad offset 0x%x\n", (int)offset);
        return 0;
    }
 }
@@ -823,17 +834,18 @@ static void stellaris_i2c_write(void *opaque, hwaddr offset,
        s->mris &= ~value;
        break;
    case 0x20: /* MCR */
-        if (value & 1)
-            hw_error(
-                      "stellaris_i2c_write: Loopback not implemented\n");
-        if (value & 0x20)
-            hw_error(
-                      "stellaris_i2c_write: Slave mode not implemented\n");
+        if (value & 1) {
+            qemu_log_mask(LOG_UNIMP, "stellaris_i2c: Loopback not implemented");
+        }
+        if (value & 0x20) {
+            qemu_log_mask(LOG_UNIMP,
+                          "stellaris_i2c: Slave mode not implemented");
+        }
        s->mcr = value & 0x31;
        break;
    default:
-        hw_error("stellaris_i2c_write: Bad offset 0x%x\n",
-                  (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "stellaris_i2c: write at bad offset 0x%x\n", (int)offset);
    }
    stellaris_i2c_update(s);
 }
@@ -1057,8 +1069,8 @@ static uint64_t stellaris_adc_read(void *opaque, hwaddr offset,
    case 0x30: /* SAC */
        return s->sac;
    default:
-        hw_error("strllaris_adc_read: Bad offset 0x%x\n",
-                  (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "stellaris_adc: read at bad offset 0x%x\n", (int)offset);
        return 0;
    }
 }
@@ -1078,8 +1090,9 @@ static void stellaris_adc_write(void *opaque, hwaddr offset,
            return;
        case 0x04: /* SSCTL */
            if (value != 6) {
-                hw_error("ADC: Unimplemented sequence %" PRIx64 "\n",
-                          value);
+                qemu_log_mask(LOG_UNIMP,
+                              "ADC: Unimplemented sequence %" PRIx64 "\n",
+                              value);
            }
            s->ssctl[n] = value;
            return;
@@ -1110,13 +1123,14 @@ static void stellaris_adc_write(void *opaque, hwaddr offset,
        s->sspri = value;
        break;
    case 0x28: /* PSSI */
-        hw_error("Not implemented:  ADC sample initiate\n");
+        qemu_log_mask(LOG_UNIMP, "ADC: sample initiate unimplemented");
        break;
    case 0x30: /* SAC */
        s->sac = value;
        break;
    default:
-        hw_error("stellaris_adc_write: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "stellaris_adc: write at bad offset 0x%x\n", (int)offset);
    }
    stellaris_adc_update(s);
 }
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -30,6 +30,8 @@
 #define ARM_PHYS_TIMER_PPI  30
 #define ARM_VIRT_TIMER_PPI  27

+#define GEM_REVISION        0x40070106
+
 #define GIC_BASE_ADDR       0xf9000000
 #define GIC_DIST_ADDR       0xf9010000
 #define GIC_CPU_ADDR        0xf9020000
@@ -334,8 +336,10 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
            qemu_check_nic_model(nd, TYPE_CADENCE_GEM);
            qdev_set_nic_properties(DEVICE(&s->gem[i]), nd);
        }
+        object_property_set_int(OBJECT(&s->gem[i]), GEM_REVISION, "revision",
+                                &error_abort);
        object_property_set_int(OBJECT(&s->gem[i]), 2, "num-priority-queues",
-                                  &error_abort);
+                                &error_abort);
        object_property_set_bool(OBJECT(&s->gem[i]), true, "realized", &err);
        if (err) {
            error_propagate(errp, err);
@@ -439,12 +443,6 @@ static void xlnx_zynqmp_class_init(ObjectClass *oc, void *data)

    dc->props = xlnx_zynqmp_props;
    dc->realize = xlnx_zynqmp_realize;
-
-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }

 static const TypeInfo xlnx_zynqmp_type_info = {
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -2521,8 +2521,8 @@ static void fdctrl_result_timer(void *opaque)
 }

 /* Init functions */
-static void fdctrl_connect_drives(FDCtrl *fdctrl, Error **errp,
-                                  DeviceState *fdc_dev)
+static void fdctrl_connect_drives(FDCtrl *fdctrl, DeviceState *fdc_dev,
+                                  Error **errp)
 {
    unsigned int i;
    FDrive *drive;
@@ -2675,7 +2675,7 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl,
    }

    floppy_bus_create(fdctrl, &fdctrl->bus, dev);
-    fdctrl_connect_drives(fdctrl, errp, dev);
+    fdctrl_connect_drives(fdctrl, dev, errp);
 }

 static const MemoryRegionPortio fdc_portio_list[] = {
--- a/hw/block/xen_blkif.h
+++ b/hw/block/xen_blkif.h
@@ -1,7 +1,7 @@
 #ifndef XEN_BLKIF_H
 #define XEN_BLKIF_H

-#include <xen/io/ring.h>
+#include "hw/xen/io/ring.h"
 #include <xen/io/blkif.h>
 #include <xen/io/protocols.h>

--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -492,7 +492,7 @@ static int ioreq_map(struct ioreq *ioreq)
    return 0;
 }

-#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 480
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800

 static void ioreq_free_copy_buffers(struct ioreq *ioreq)
 {
--- a/hw/char/exynos4210_uart.c
+++ b/hw/char/exynos4210_uart.c
@@ -102,7 +102,7 @@ typedef struct Exynos4210UartReg {
    uint32_t            reset_value;
 } Exynos4210UartReg;

-static Exynos4210UartReg exynos4210_uart_regs[] = {
+static const Exynos4210UartReg exynos4210_uart_regs[] = {
    {"ULCON",    ULCON,    0x00000000},
    {"UCON",     UCON,     0x00003000},
    {"UFCON",    UFCON,    0x00000000},
@@ -220,7 +220,7 @@ static uint8_t fifo_retrieve(Exynos4210UartFIFO *q)
    return  ret;
 }

-static int fifo_elements_number(Exynos4210UartFIFO *q)
+static int fifo_elements_number(const Exynos4210UartFIFO *q)
 {
    if (q->sp < q->rp) {
        return q->size - q->rp + q->sp;
@@ -229,7 +229,7 @@ static int fifo_elements_number(Exynos4210UartFIFO *q)
    return q->sp - q->rp;
 }

-static int fifo_empty_elements_number(Exynos4210UartFIFO *q)
+static int fifo_empty_elements_number(const Exynos4210UartFIFO *q)
 {
    return q->size - fifo_elements_number(q);
 }
@@ -245,7 +245,7 @@ static void fifo_reset(Exynos4210UartFIFO *q)
    q->rp = 0;
 }

-static uint32_t exynos4210_uart_Tx_FIFO_trigger_level(Exynos4210UartState *s)
+static uint32_t exynos4210_uart_Tx_FIFO_trigger_level(const Exynos4210UartState *s)
 {
    uint32_t level = 0;
    uint32_t reg;
--- a/hw/core/null-machine.c
+++ b/hw/core/null-machine.c
@@ -40,6 +40,12 @@ static void machine_none_init(MachineState *mch)
        memory_region_allocate_system_memory(ram, NULL, "ram", mch->ram_size);
        memory_region_add_subregion(get_system_memory(), 0, ram);
    }
+
+    if (mch->kernel_filename) {
+        error_report("The -kernel parameter is not supported "
+                     "(use the generic 'loader' device instead).");
+        exit(1);
+    }
 }

 static void machine_none_machine_init(MachineClass *mc)
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -124,8 +124,12 @@ static void release_drive(Object *obj, const char *name, void *opaque)
    BlockBackend **ptr = qdev_get_prop_ptr(dev, prop);

    if (*ptr) {
+        AioContext *ctx = blk_get_aio_context(*ptr);
+
+        aio_context_acquire(ctx);
        blockdev_auto_del(*ptr);
        blk_detach_dev(*ptr, dev);
+        aio_context_release(ctx);
    }
 }

@@ -405,7 +409,7 @@ void qdev_prop_set_drive(DeviceState *dev, const char *name,
    if (value) {
        ref = blk_name(value);
        if (!*ref) {
-            BlockDriverState *bs = blk_bs(value);
+            const BlockDriverState *bs = blk_bs(value);
            if (bs) {
                ref = bdrv_get_node_name(bs);
            }
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -1010,7 +1010,8 @@ void qdev_prop_set_string(DeviceState *dev, const char *name, const char *value)
    object_property_set_str(OBJECT(dev), value, name, &error_abort);
 }

-void qdev_prop_set_macaddr(DeviceState *dev, const char *name, uint8_t *value)
+void qdev_prop_set_macaddr(DeviceState *dev, const char *name,
+                           const uint8_t *value)
 {
    char str[2 * 6 + 5 + 1];
    snprintf(str, sizeof(str), "%02x:%02x:%02x:%02x:%02x:%02x",
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -39,9 +39,9 @@
 #include "qapi-event.h"
 #include "migration/migration.h"

-int qdev_hotplug = 0;
+bool qdev_hotplug = false;
 static bool qdev_hot_added = false;
-static bool qdev_hot_removed = false;
+bool qdev_hot_removed = false;

 const VMStateDescription *qdev_get_vmsd(DeviceState *dev)
 {
@@ -271,40 +271,6 @@ HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev)
    return hotplug_ctrl;
 }

-void qdev_unplug(DeviceState *dev, Error **errp)
-{
-    DeviceClass *dc = DEVICE_GET_CLASS(dev);
-    HotplugHandler *hotplug_ctrl;
-    HotplugHandlerClass *hdc;
-
-    if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) {
-        error_setg(errp, QERR_BUS_NO_HOTPLUG, dev->parent_bus->name);
-        return;
-    }
-
-    if (!dc->hotpluggable) {
-        error_setg(errp, QERR_DEVICE_NO_HOTPLUG,
-                   object_get_typename(OBJECT(dev)));
-        return;
-    }
-
-    qdev_hot_removed = true;
-
-    hotplug_ctrl = qdev_get_hotplug_handler(dev);
-    /* hotpluggable device MUST have HotplugHandler, if it doesn't
-     * then something is very wrong with it */
-    g_assert(hotplug_ctrl);
-
-    /* If device supports async unplug just request it to be done,
-     * otherwise just remove it synchronously */
-    hdc = HOTPLUG_HANDLER_GET_CLASS(hotplug_ctrl);
-    if (hdc->unplug_request) {
-        hotplug_handler_unplug_request(hotplug_ctrl, dev, errp);
-    } else {
-        hotplug_handler_unplug(hotplug_ctrl, dev, errp);
-    }
-}
-
 static int qdev_reset_one(DeviceState *dev, void *opaque)
 {
    device_reset(dev);
@@ -385,7 +351,7 @@ void qdev_machine_creation_done(void)
     * ok, initial machine setup is done, starting from now we can
     * only create hotpluggable devices
     */
-    qdev_hotplug = 1;
+    qdev_hotplug = true;
 }

 bool qdev_machine_modified(void)
@@ -1037,13 +1003,6 @@ static bool device_get_hotplugged(Object *obj, Error **err)
    return dev->hotplugged;
 }

-static void device_set_hotplugged(Object *obj, bool value, Error **err)
-{
-    DeviceState *dev = DEVICE(obj);
-
-    dev->hotplugged = value;
-}
-
 static void device_initfn(Object *obj)
 {
    DeviceState *dev = DEVICE(obj);
@@ -1063,7 +1022,7 @@ static void device_initfn(Object *obj)
    object_property_add_bool(obj, "hotpluggable",
                             device_get_hotpluggable, NULL, NULL);
    object_property_add_bool(obj, "hotplugged",
-                             device_get_hotplugged, device_set_hotplugged,
+                             device_get_hotplugged, NULL,
                             &error_abort);

    class = object_get_class(OBJECT(dev));
--- a/hw/display/cg3.c
+++ b/hw/display/cg3.c
@@ -26,7 +26,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "cpu.h"
 #include "qemu/error-report.h"
 #include "ui/console.h"
 #include "hw/sysbus.h"
@@ -114,8 +113,8 @@ static void cg3_update_display(void *opaque)
    for (y = 0; y < height; y++) {
        int update = s->full_update;

-        page = (y * width) & TARGET_PAGE_MASK;
-        update |= memory_region_get_dirty(&s->vram_mem, page, page + width,
+        page = y * width;
+        update |= memory_region_get_dirty(&s->vram_mem, page, width,
                                          DIRTY_MEMORY_VGA);
        if (update) {
            if (y_start < 0) {
@@ -148,8 +147,7 @@ static void cg3_update_display(void *opaque)
    }
    if (page_max >= page_min) {
        memory_region_reset_dirty(&s->vram_mem,
-                              page_min, page_max - page_min + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
+                              page_min, page_max - page_min, DIRTY_MEMORY_VGA);
    }
    /* vsync interrupt? */
    if (s->regs[0] & CG3_CR_ENABLE_INTS) {
@@ -305,8 +303,7 @@ static void cg3_realizefn(DeviceState *dev, Error **errp)
    vmstate_register_ram_global(&s->rom);
    fcode_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, CG3_ROM_FILE);
    if (fcode_filename) {
-        ret = load_image_targphys(fcode_filename, s->prom_addr,
-                                  FCODE_MAX_ROM_SIZE);
+        ret = load_image_mr(fcode_filename, &s->rom);
        g_free(fcode_filename);
        if (ret < 0 || ret > FCODE_MAX_ROM_SIZE) {
            error_report("cg3: could not load prom '%s'", CG3_ROM_FILE);
@@ -371,7 +368,6 @@ static Property cg3_properties[] = {
    DEFINE_PROP_UINT16("width",        CG3State, width,     -1),
    DEFINE_PROP_UINT16("height",       CG3State, height,    -1),
    DEFINE_PROP_UINT16("depth",        CG3State, depth,     -1),
-    DEFINE_PROP_UINT64("prom-addr",    CG3State, prom_addr, -1),
    DEFINE_PROP_END_OF_LIST(),
 };

--- a/hw/display/exynos4210_fimd.c
+++ b/hw/display/exynos4210_fimd.c
@@ -1263,6 +1263,7 @@ static void exynos4210_fimd_update(void *opaque)
    Exynos4210fimdState *s = (Exynos4210fimdState *)opaque;
    DisplaySurface *surface;
    Exynos4210fimdWindow *w;
+    DirtyBitmapSnapshot *snap;
    int i, line;
    hwaddr fb_line_addr, inc_size;
    int scrn_height;
@@ -1291,10 +1292,12 @@ static void exynos4210_fimd_update(void *opaque)
            memory_region_sync_dirty_bitmap(w->mem_section.mr);
            host_fb_addr = w->host_fb_addr;
            fb_line_addr = w->mem_section.offset_within_region;
+            snap = memory_region_snapshot_and_clear_dirty(w->mem_section.mr,
+                    fb_line_addr, inc_size * scrn_height, DIRTY_MEMORY_VGA);

            for (line = 0; line < scrn_height; line++) {
-                is_dirty = memory_region_get_dirty(w->mem_section.mr,
-                            fb_line_addr, scrn_width, DIRTY_MEMORY_VGA);
+                is_dirty = memory_region_snapshot_get_dirty(w->mem_section.mr,
+                            snap, fb_line_addr, scrn_width);

                if (s->invalidate || is_dirty) {
                    if (first_line == -1) {
@@ -1309,9 +1312,7 @@ static void exynos4210_fimd_update(void *opaque)
                fb_line_addr += inc_size;
                is_dirty = false;
            }
-            memory_region_reset_dirty(w->mem_section.mr,
-                w->mem_section.offset_within_region,
-                w->fb_len, DIRTY_MEMORY_VGA);
+            g_free(snap);
            blend = true;
        }
    }
--- a/hw/display/framebuffer.c
+++ b/hw/display/framebuffer.c
@@ -67,7 +67,7 @@ void framebuffer_update_display(
    int *first_row, /* Input and output.  */
    int *last_row /* Output only */)
 {
-    hwaddr src_len;
+    DirtyBitmapSnapshot *snap;
    uint8_t *dest;
    uint8_t *src;
    int first, last = 0;
@@ -78,7 +78,6 @@ void framebuffer_update_display(

    i = *first_row;
    *first_row = -1;
-    src_len = (hwaddr)src_width * rows;

    mem = mem_section->mr;
    if (!mem) {
@@ -102,9 +101,10 @@ void framebuffer_update_display(
    src += i * src_width;
    dest += i * dest_row_pitch;

+    snap = memory_region_snapshot_and_clear_dirty(mem, addr, src_width * rows,
+                                                  DIRTY_MEMORY_VGA);
    for (; i < rows; i++) {
-        dirty = memory_region_get_dirty(mem, addr, src_width,
-                                             DIRTY_MEMORY_VGA);
+        dirty = memory_region_snapshot_get_dirty(mem, snap, addr, src_width);
        if (dirty || invalidate) {
            fn(opaque, dest, src, cols, dest_col_pitch);
            if (first == -1)
@@ -115,11 +115,10 @@ void framebuffer_update_display(
        src += src_width;
        dest += dest_row_pitch;
    }
+    g_free(snap);
    if (first < 0) {
        return;
    }
-    memory_region_reset_dirty(mem, mem_section->offset_within_region, src_len,
-                              DIRTY_MEMORY_VGA);
    *first_row = first;
    *last_row = last;
 }
--- a/hw/display/g364fb.c
+++ b/hw/display/g364fb.c
@@ -64,17 +64,8 @@ typedef struct G364State {

 static inline int check_dirty(G364State *s, ram_addr_t page)
 {
-    return memory_region_get_dirty(&s->mem_vram, page, G364_PAGE_SIZE,
-                                   DIRTY_MEMORY_VGA);
-}
-
-static inline void reset_dirty(G364State *s,
-                               ram_addr_t page_min, ram_addr_t page_max)
-{
-    memory_region_reset_dirty(&s->mem_vram,
-                              page_min,
-                              page_max + G364_PAGE_SIZE - page_min - 1,
-                              DIRTY_MEMORY_VGA);
+    return memory_region_test_and_clear_dirty(&s->mem_vram, page, G364_PAGE_SIZE,
+                                              DIRTY_MEMORY_VGA);
 }

 static void g364fb_draw_graphic8(G364State *s)
@@ -83,7 +74,7 @@ static void g364fb_draw_graphic8(G364State *s)
    int i, w;
    uint8_t *vram;
    uint8_t *data_display, *dd;
-    ram_addr_t page, page_min, page_max;
+    ram_addr_t page;
    int x, y;
    int xmin, xmax;
    int ymin, ymax;
@@ -114,8 +105,6 @@ static void g364fb_draw_graphic8(G364State *s)
    }

    page = 0;
-    page_min = (ram_addr_t)-1;
-    page_max = 0;

    x = y = 0;
    xmin = s->width;
@@ -137,9 +126,6 @@ static void g364fb_draw_graphic8(G364State *s)
        if (check_dirty(s, page)) {
            if (y < ymin)
                ymin = ymax = y;
-            if (page_min == (ram_addr_t)-1)
-                page_min = page;
-            page_max = page;
            if (x < xmin)
                xmin = x;
            for (i = 0; i < G364_PAGE_SIZE; i++) {
@@ -196,10 +182,7 @@ static void g364fb_draw_graphic8(G364State *s)
                ymax = y;
        } else {
            int dy;
-            if (page_min != (ram_addr_t)-1) {
-                reset_dirty(s, page_min, page_max);
-                page_min = (ram_addr_t)-1;
-                page_max = 0;
+            if (xmax || ymax) {
                dpy_gfx_update(s->con, xmin, ymin,
                               xmax - xmin + 1, ymax - ymin + 1);
                xmin = s->width;
@@ -219,9 +202,8 @@ static void g364fb_draw_graphic8(G364State *s)
    }

 done:
-    if (page_min != (ram_addr_t)-1) {
+    if (xmax || ymax) {
        dpy_gfx_update(s->con, xmin, ymin, xmax - xmin + 1, ymax - ymin + 1);
-        reset_dirty(s, page_min, page_max);
    }
 }

--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -26,6 +26,7 @@
 #include "qemu/queue.h"
 #include "qemu/atomic.h"
 #include "sysemu/sysemu.h"
+#include "migration/migration.h"
 #include "trace.h"

 #include "qxl.h"
@@ -304,6 +305,16 @@ void qxl_spice_reset_cursor(PCIQXLDevice *qxl)
    qxl->ssd.cursor = cursor_builtin_hidden();
 }

+static uint32_t qxl_crc32(const uint8_t *p, unsigned len)
+{
+    /*
+     * zlib xors the seed with 0xffffffff, and xors the result
+     * again with 0xffffffff; Both are not done with linux's crc32,
+     * which we want to be compatible with, so undo that.
+     */
+    return crc32(0xffffffff, p, len) ^ 0xffffffff;
+}
+
 static ram_addr_t qxl_rom_size(void)
 {
 #define QXL_REQUIRED_SZ (sizeof(QXLRom) + sizeof(QXLModes) + sizeof(qxl_modes))
@@ -368,6 +379,18 @@ static void init_qxl_rom(PCIQXLDevice *d)
    rom->num_pages          = cpu_to_le32(num_pages);
    rom->ram_header_offset  = cpu_to_le32(d->vga.vram_size - ram_header_size);

+    if (d->xres && d->yres) {
+        /* needs linux kernel 4.12+ to work */
+        rom->client_monitors_config.count = 1;
+        rom->client_monitors_config.heads[0].left = 0;
+        rom->client_monitors_config.heads[0].top = 0;
+        rom->client_monitors_config.heads[0].right = cpu_to_le32(d->xres);
+        rom->client_monitors_config.heads[0].bottom = cpu_to_le32(d->yres);
+        rom->client_monitors_config_crc = qxl_crc32(
+            (const uint8_t *)&rom->client_monitors_config,
+            sizeof(rom->client_monitors_config));
+    }
+
    d->shadow_rom = *rom;
    d->rom        = rom;
    d->modes      = modes;
@@ -639,6 +662,30 @@ static int interface_get_command(QXLInstance *sin, struct QXLCommandExt *ext)
        qxl->guest_primary.commands++;
        qxl_track_command(qxl, ext);
        qxl_log_command(qxl, "cmd", ext);
+        {
+            /*
+             * Windows 8 drivers place qxl commands in the vram
+             * (instead of the ram) bar.  We can't live migrate such a
+             * guest, so add a migration blocker in case we detect
+             * this, to avoid triggering the assert in pre_save().
+             *
+             * https://cgit.freedesktop.org/spice/win32/qxl-wddm-dod/commit/?id=f6e099db39e7d0787f294d5fd0dce328b5210faa
+             */
+            void *msg = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
+            if (msg != NULL && (
+                    msg < (void *)qxl->vga.vram_ptr ||
+                    msg > ((void *)qxl->vga.vram_ptr + qxl->vga.vram_size))) {
+                if (!qxl->migration_blocker) {
+                    Error *local_err = NULL;
+                    error_setg(&qxl->migration_blocker,
+                               "qxl: guest bug: command not in ram bar");
+                    migrate_add_blocker(qxl->migration_blocker, &local_err);
+                    if (local_err) {
+                        error_report_err(local_err);
+                    }
+                }
+            }
+        }
        trace_qxl_ring_command_get(qxl->id, qxl_mode_to_string(qxl->mode));
        return true;
    default:
@@ -986,16 +1033,6 @@ static void interface_set_client_capabilities(QXLInstance *sin,
    qxl_send_events(qxl, QXL_INTERRUPT_CLIENT);
 }

-static uint32_t qxl_crc32(const uint8_t *p, unsigned len)
-{
-    /*
-     * zlib xors the seed with 0xffffffff, and xors the result
-     * again with 0xffffffff; Both are not done with linux's crc32,
-     * which we want to be compatible with, so undo that.
-     */
-    return crc32(0xffffffff, p, len) ^ 0xffffffff;
-}
-
 static bool qxl_rom_monitors_config_changed(QXLRom *rom,
        VDAgentMonitorsConfig *monitors_config,
        unsigned int max_outputs)
@@ -1146,6 +1183,7 @@ static void qxl_enter_vga_mode(PCIQXLDevice *d)
    update_displaychangelistener(&d->ssd.dcl, GUI_REFRESH_INTERVAL_DEFAULT);
    qemu_spice_create_host_primary(&d->ssd);
    d->mode = QXL_MODE_VGA;
+    qemu_spice_display_switch(&d->ssd, d->ssd.ds);
    vga_dirty_log_start(&d->vga);
    graphic_hw_update(d->vga.con);
 }
@@ -1235,6 +1273,12 @@ static void qxl_hard_reset(PCIQXLDevice *d, int loadvm)
    qemu_spice_create_host_memslot(&d->ssd);
    qxl_soft_reset(d);

+    if (d->migration_blocker) {
+        migrate_del_blocker(d->migration_blocker);
+        error_free(d->migration_blocker);
+        d->migration_blocker = NULL;
+    }
+
    if (startstop) {
        qemu_spice_display_start();
    }
@@ -2365,6 +2409,8 @@ static Property qxl_properties[] = {
 #if SPICE_SERVER_VERSION >= 0x000c06 /* release 0.12.6 */
        DEFINE_PROP_UINT16("max_outputs", PCIQXLDevice, max_outputs, 0),
 #endif
+        DEFINE_PROP_UINT32("xres", PCIQXLDevice, xres, 0),
+        DEFINE_PROP_UINT32("yres", PCIQXLDevice, yres, 0),
        DEFINE_PROP_END_OF_LIST(),
 };

--- a/hw/display/qxl.h
+++ b/hw/display/qxl.h
@@ -40,6 +40,7 @@ typedef struct PCIQXLDevice {
    uint32_t           cmdlog;

    uint32_t           guest_bug;
+    Error              *migration_blocker;

    enum qxl_mode      mode;
    uint32_t           cmdflags;
@@ -118,6 +119,8 @@ typedef struct PCIQXLDevice {
    uint32_t          vram_size_mb;
    uint32_t          vram32_size_mb;
    uint32_t          vgamem_size_mb;
+    uint32_t          xres;
+    uint32_t          yres;

    /* qxl_render_update state */
    int                render_update_cookie_num;
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
--- a/hw/display/sm501_template.h
+++ b/hw/display/sm501_template.h
@@ -47,81 +47,67 @@ static void glue(draw_line8_, PIXEL_NAME)(
 {
    uint8_t v, r, g, b;
    do {
-	v = ldub_p(s);
-	r = (pal[v] >> 16) & 0xff;
-	g = (pal[v] >>  8) & 0xff;
-	b = (pal[v] >>  0) & 0xff;
-	((PIXEL_TYPE *) d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
-	s ++;
-	d += BPP;
-    } while (-- width != 0);
+        v = ldub_p(s);
+        r = (pal[v] >> 16) & 0xff;
+        g = (pal[v] >>  8) & 0xff;
+        b = (pal[v] >>  0) & 0xff;
+        *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+        s++;
+        d += BPP;
+    } while (--width != 0);
 }

 static void glue(draw_line16_, PIXEL_NAME)(
-		 uint8_t *d, const uint8_t *s, int width, const uint32_t *pal)
+                 uint8_t *d, const uint8_t *s, int width, const uint32_t *pal)
 {
    uint16_t rgb565;
    uint8_t r, g, b;

    do {
-	rgb565 = lduw_p(s);
-	r = ((rgb565 >> 11) & 0x1f) << 3;
-	g = ((rgb565 >>  5) & 0x3f) << 2;
-	b = ((rgb565 >>  0) & 0x1f) << 3;
-	((PIXEL_TYPE *) d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
-	s += 2;
-	d += BPP;
-    } while (-- width != 0);
+        rgb565 = lduw_le_p(s);
+        r = (rgb565 >> 8) & 0xf8;
+        g = (rgb565 >> 3) & 0xfc;
+        b = (rgb565 << 3) & 0xf8;
+        *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+        s += 2;
+        d += BPP;
+    } while (--width != 0);
 }

 static void glue(draw_line32_, PIXEL_NAME)(
-		 uint8_t *d, const uint8_t *s, int width, const uint32_t *pal)
+                 uint8_t *d, const uint8_t *s, int width, const uint32_t *pal)
 {
    uint8_t r, g, b;

    do {
-	ldub_p(s);
-#if defined(TARGET_WORDS_BIGENDIAN)
-        r = s[1];
-        g = s[2];
-        b = s[3];
-#else
-        b = s[0];
-        g = s[1];
        r = s[2];
-#endif
-	((PIXEL_TYPE *) d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
-	s += 4;
-	d += BPP;
-    } while (-- width != 0);
+        g = s[1];
+        b = s[0];
+        *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+        s += 4;
+        d += BPP;
+    } while (--width != 0);
 }

 /**
 * Draw hardware cursor image on the given line.
 */
-static void glue(draw_hwc_line_, PIXEL_NAME)(SM501State * s, int crt,
-                         uint8_t * palette, int c_y, uint8_t *d, int width)
+static void glue(draw_hwc_line_, PIXEL_NAME)(uint8_t *d, const uint8_t *s,
+                 int width, const uint8_t *palette, int c_x, int c_y)
 {
-    int x, i;
-    uint8_t bitset = 0;
-
-    /* get hardware cursor pattern */
-    uint32_t cursor_addr = get_hwc_address(s, crt);
-    assert(0 <= c_y && c_y < SM501_HWC_HEIGHT);
-    cursor_addr += 64 * c_y / 4;  /* 4 pixels per byte */
-    cursor_addr += s->base;
+    int i;
+    uint8_t r, g, b, v, bitset = 0;

    /* get cursor position */
-    x = get_hwc_x(s, crt);
-    d += x * BPP;
-
-    for (i = 0; i < SM501_HWC_WIDTH && x + i < width; i++) {
-        uint8_t v;
+    assert(0 <= c_y && c_y < SM501_HWC_HEIGHT);
+    s += SM501_HWC_WIDTH * c_y / 4;  /* 4 pixels per byte */
+    d += c_x * BPP;

+    for (i = 0; i < SM501_HWC_WIDTH && c_x + i < width; i++) {
        /* get pixel value */
        if (i % 4 == 0) {
-            bitset = ldub_phys(&address_space_memory, cursor_addr);
-            cursor_addr++;
+            bitset = ldub_p(s);
+            s++;
        }
        v = bitset & 3;
        bitset >>= 2;
@@ -129,10 +115,10 @@ static void glue(draw_hwc_line_, PIXEL_NAME)(SM501State * s, int crt,
        /* write pixel */
        if (v) {
            v--;
-            uint8_t r = palette[v * 3 + 0];
-            uint8_t g = palette[v * 3 + 1];
-            uint8_t b = palette[v * 3 + 2];
-            ((PIXEL_TYPE *) d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+            r = palette[v * 3 + 0];
+            g = palette[v * 3 + 1];
+            b = palette[v * 3 + 2];
+            *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
        }
        d += BPP;
    }
--- a/hw/display/tcx.c
+++ b/hw/display/tcx.c
@@ -25,7 +25,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "cpu.h" /* FIXME shouldn't use TARGET_PAGE_SIZE */
 #include "ui/console.h"
 #include "ui/pixel_ops.h"
 #include "hw/loader.h"
@@ -93,41 +92,46 @@ typedef struct TCXState {
    uint16_t cursy;
 } TCXState;

-static void tcx_set_dirty(TCXState *s)
+static void tcx_set_dirty(TCXState *s, ram_addr_t addr, int len)
 {
-    memory_region_set_dirty(&s->vram_mem, 0, MAXX * MAXY);
+    memory_region_set_dirty(&s->vram_mem, addr, len);
+
+    if (s->depth == 24) {
+        memory_region_set_dirty(&s->vram_mem, s->vram24_offset + addr * 4,
+                                len * 4);
+        memory_region_set_dirty(&s->vram_mem, s->cplane_offset + addr * 4,
+                                len * 4);
+    }
 }

-static inline int tcx24_check_dirty(TCXState *s, ram_addr_t page,
-                                    ram_addr_t page24, ram_addr_t cpage)
+static int tcx_check_dirty(TCXState *s, ram_addr_t addr, int len)
 {
    int ret;

-    ret = memory_region_get_dirty(&s->vram_mem, page, TARGET_PAGE_SIZE,
-                                  DIRTY_MEMORY_VGA);
-    ret |= memory_region_get_dirty(&s->vram_mem, page24, TARGET_PAGE_SIZE * 4,
-                                   DIRTY_MEMORY_VGA);
-    ret |= memory_region_get_dirty(&s->vram_mem, cpage, TARGET_PAGE_SIZE * 4,
-                                   DIRTY_MEMORY_VGA);
+    ret = memory_region_get_dirty(&s->vram_mem, addr, len, DIRTY_MEMORY_VGA);
+
+    if (s->depth == 24) {
+        ret |= memory_region_get_dirty(&s->vram_mem,
+                                       s->vram24_offset + addr * 4, len * 4,
+                                       DIRTY_MEMORY_VGA);
+        ret |= memory_region_get_dirty(&s->vram_mem,
+                                       s->cplane_offset + addr * 4, len * 4,
+                                       DIRTY_MEMORY_VGA);
+    }
+
    return ret;
 }

-static inline void tcx24_reset_dirty(TCXState *ts, ram_addr_t page_min,
-                               ram_addr_t page_max, ram_addr_t page24,
-                              ram_addr_t cpage)
+static void tcx_reset_dirty(TCXState *s, ram_addr_t addr, int len)
 {
-    memory_region_reset_dirty(&ts->vram_mem,
-                              page_min,
-                              (page_max - page_min) + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-    memory_region_reset_dirty(&ts->vram_mem,
-                              page24 + page_min * 4,
-                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-    memory_region_reset_dirty(&ts->vram_mem,
-                              cpage + page_min * 4,
-                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
+    memory_region_reset_dirty(&s->vram_mem, addr, len, DIRTY_MEMORY_VGA);
+
+    if (s->depth == 24) {
+        memory_region_reset_dirty(&s->vram_mem, s->vram24_offset + addr * 4,
+                                  len * 4, DIRTY_MEMORY_VGA);
+        memory_region_reset_dirty(&s->vram_mem, s->cplane_offset + addr * 4,
+                                  len * 4, DIRTY_MEMORY_VGA);
+    }
 }

 static void update_palette_entries(TCXState *s, int start, int end)
@@ -136,27 +140,14 @@ static void update_palette_entries(TCXState *s, int start, int end)
    int i;

    for (i = start; i < end; i++) {
-        switch (surface_bits_per_pixel(surface)) {
-        default:
-        case 8:
-            s->palette[i] = rgb_to_pixel8(s->r[i], s->g[i], s->b[i]);
-            break;
-        case 15:
-            s->palette[i] = rgb_to_pixel15(s->r[i], s->g[i], s->b[i]);
-            break;
-        case 16:
-            s->palette[i] = rgb_to_pixel16(s->r[i], s->g[i], s->b[i]);
-            break;
-        case 32:
-            if (is_surface_bgr(surface)) {
-                s->palette[i] = rgb_to_pixel32bgr(s->r[i], s->g[i], s->b[i]);
-            } else {
-                s->palette[i] = rgb_to_pixel32(s->r[i], s->g[i], s->b[i]);
-            }
-            break;
+        if (is_surface_bgr(surface)) {
+            s->palette[i] = rgb_to_pixel32bgr(s->r[i], s->g[i], s->b[i]);
+        } else {
+            s->palette[i] = rgb_to_pixel32(s->r[i], s->g[i], s->b[i]);
        }
+        break;
    }
-    tcx_set_dirty(s);
+    tcx_set_dirty(s, 0, memory_region_size(&s->vram_mem));
 }

 static void tcx_draw_line32(TCXState *s1, uint8_t *d,
@@ -172,31 +163,6 @@ static void tcx_draw_line32(TCXState *s1, uint8_t *d,
    }
 }

-static void tcx_draw_line16(TCXState *s1, uint8_t *d,
-                            const uint8_t *s, int width)
-{
-    int x;
-    uint8_t val;
-    uint16_t *p = (uint16_t *)d;
-
-    for (x = 0; x < width; x++) {
-        val = *s++;
-        *p++ = s1->palette[val];
-    }
-}
-
-static void tcx_draw_line8(TCXState *s1, uint8_t *d,
-                           const uint8_t *s, int width)
-{
-    int x;
-    uint8_t val;
-
-    for(x = 0; x < width; x++) {
-        val = *s++;
-        *d++ = s1->palette[val];
-    }
-}
-
 static void tcx_draw_cursor32(TCXState *s1, uint8_t *d,
                              int y, int width)
 {
@@ -223,57 +189,6 @@ static void tcx_draw_cursor32(TCXState *s1, uint8_t *d,
    }
 }

-static void tcx_draw_cursor16(TCXState *s1, uint8_t *d,
-                              int y, int width)
-{
-    int x, len;
-    uint32_t mask, bits;
-    uint16_t *p = (uint16_t *)d;
-
-    y = y - s1->cursy;
-    mask = s1->cursmask[y];
-    bits = s1->cursbits[y];
-    len = MIN(width - s1->cursx, 32);
-    p = &p[s1->cursx];
-    for (x = 0; x < len; x++) {
-        if (mask & 0x80000000) {
-            if (bits & 0x80000000) {
-                *p = s1->palette[259];
-            } else {
-                *p = s1->palette[258];
-            }
-        }
-        p++;
-        mask <<= 1;
-        bits <<= 1;
-    }
-}
-
-static void tcx_draw_cursor8(TCXState *s1, uint8_t *d,
-                              int y, int width)
-{
-    int x, len;
-    uint32_t mask, bits;
-
-    y = y - s1->cursy;
-    mask = s1->cursmask[y];
-    bits = s1->cursbits[y];
-    len = MIN(width - s1->cursx, 32);
-    d = &d[s1->cursx];
-    for (x = 0; x < len; x++) {
-        if (mask & 0x80000000) {
-            if (bits & 0x80000000) {
-                *d = s1->palette[259];
-            } else {
-                *d = s1->palette[258];
-            }
-        }
-        d++;
-        mask <<= 1;
-        bits <<= 1;
-    }
-}
-
 /*
  XXX Could be much more optimal:
  * detect if line/page/whole screen is in 24 bit mode
@@ -322,10 +237,8 @@ static void tcx_update_display(void *opaque)
    ram_addr_t page, page_min, page_max;
    int y, y_start, dd, ds;
    uint8_t *d, *s;
-    void (*f)(TCXState *s1, uint8_t *dst, const uint8_t *src, int width);
-    void (*fc)(TCXState *s1, uint8_t *dst, int y, int width);

-    if (surface_bits_per_pixel(surface) == 0) {
+    if (surface_bits_per_pixel(surface) != 32) {
        return;
    }

@@ -338,29 +251,9 @@ static void tcx_update_display(void *opaque)
    dd = surface_stride(surface);
    ds = 1024;

-    switch (surface_bits_per_pixel(surface)) {
-    case 32:
-        f = tcx_draw_line32;
-        fc = tcx_draw_cursor32;
-        break;
-    case 15:
-    case 16:
-        f = tcx_draw_line16;
-        fc = tcx_draw_cursor16;
-        break;
-    default:
-    case 8:
-        f = tcx_draw_line8;
-        fc = tcx_draw_cursor8;
-        break;
-    case 0:
-        return;
-    }
-
    memory_region_sync_dirty_bitmap(&ts->vram_mem);
-    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE) {
-        if (memory_region_get_dirty(&ts->vram_mem, page, TARGET_PAGE_SIZE,
-                                    DIRTY_MEMORY_VGA)) {
+    for (y = 0; y < ts->height; y++, page += ds) {
+        if (tcx_check_dirty(ts, page, ds)) {
            if (y_start < 0)
                y_start = y;
            if (page < page_min)
@@ -368,37 +261,10 @@ static void tcx_update_display(void *opaque)
            if (page > page_max)
                page_max = page;

-            f(ts, d, s, ts->width);
+            tcx_draw_line32(ts, d, s, ts->width);
            if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) {
-                fc(ts, d, y, ts->width);
+                tcx_draw_cursor32(ts, d, y, ts->width);
            }
-            d += dd;
-            s += ds;
-            y++;
-
-            f(ts, d, s, ts->width);
-            if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) {
-                fc(ts, d, y, ts->width);
-            }
-            d += dd;
-            s += ds;
-            y++;
-
-            f(ts, d, s, ts->width);
-            if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) {
-                fc(ts, d, y, ts->width);
-            }
-            d += dd;
-            s += ds;
-            y++;
-
-            f(ts, d, s, ts->width);
-            if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) {
-                fc(ts, d, y, ts->width);
-            }
-            d += dd;
-            s += ds;
-            y++;
        } else {
            if (y_start >= 0) {
                /* flush to display */
@@ -406,10 +272,9 @@ static void tcx_update_display(void *opaque)
                               ts->width, y - y_start);
                y_start = -1;
            }
-            d += dd * 4;
-            s += ds * 4;
-            y += 4;
        }
+        s += ds;
+        d += dd;
    }
    if (y_start >= 0) {
        /* flush to display */
@@ -418,10 +283,7 @@ static void tcx_update_display(void *opaque)
    }
    /* reset modified pages */
    if (page_max >= page_min) {
-        memory_region_reset_dirty(&ts->vram_mem,
-                                  page_min,
-                                  (page_max - page_min) + TARGET_PAGE_SIZE,
-                                  DIRTY_MEMORY_VGA);
+        tcx_reset_dirty(ts, page_min, page_max - page_min);
    }
 }

@@ -429,7 +291,7 @@ static void tcx24_update_display(void *opaque)
 {
    TCXState *ts = opaque;
    DisplaySurface *surface = qemu_console_surface(ts->con);
-    ram_addr_t page, page_min, page_max, cpage, page24;
+    ram_addr_t page, page_min, page_max;
    int y, y_start, dd, ds;
    uint8_t *d, *s;
    uint32_t *cptr, *s24;
@@ -439,8 +301,6 @@ static void tcx24_update_display(void *opaque)
    }

    page = 0;
-    page24 = ts->vram24_offset;
-    cpage = ts->cplane_offset;
    y_start = -1;
    page_min = -1;
    page_max = 0;
@@ -452,9 +312,8 @@ static void tcx24_update_display(void *opaque)
    ds = 1024;

    memory_region_sync_dirty_bitmap(&ts->vram_mem);
-    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE,
-            page24 += TARGET_PAGE_SIZE, cpage += TARGET_PAGE_SIZE) {
-        if (tcx24_check_dirty(ts, page, page24, cpage)) {
+    for (y = 0; y < ts->height; y++, page += ds) {
+        if (tcx_check_dirty(ts, page, ds)) {
            if (y_start < 0)
                y_start = y;
            if (page < page_min)
@@ -465,38 +324,6 @@ static void tcx24_update_display(void *opaque)
            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
                tcx_draw_cursor32(ts, d, y, ts->width);
            }
-            d += dd;
-            s += ds;
-            cptr += ds;
-            s24 += ds;
-            y++;
-            tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
-            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
-                tcx_draw_cursor32(ts, d, y, ts->width);
-            }
-            d += dd;
-            s += ds;
-            cptr += ds;
-            s24 += ds;
-            y++;
-            tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
-            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
-                tcx_draw_cursor32(ts, d, y, ts->width);
-            }
-            d += dd;
-            s += ds;
-            cptr += ds;
-            s24 += ds;
-            y++;
-            tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
-            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
-                tcx_draw_cursor32(ts, d, y, ts->width);
-            }
-            d += dd;
-            s += ds;
-            cptr += ds;
-            s24 += ds;
-            y++;
        } else {
            if (y_start >= 0) {
                /* flush to display */
@@ -504,12 +331,11 @@ static void tcx24_update_display(void *opaque)
                               ts->width, y - y_start);
                y_start = -1;
            }
-            d += dd * 4;
-            s += ds * 4;
-            cptr += ds * 4;
-            s24 += ds * 4;
-            y += 4;
        }
+        d += dd;
+        s += ds;
+        cptr += ds;
+        s24 += ds;
    }
    if (y_start >= 0) {
        /* flush to display */
@@ -518,7 +344,7 @@ static void tcx24_update_display(void *opaque)
    }
    /* reset modified pages */
    if (page_max >= page_min) {
-        tcx24_reset_dirty(ts, page_min, page_max, page24, cpage);
+        tcx_reset_dirty(ts, page_min, page_max - page_min);
    }
 }

@@ -526,7 +352,7 @@ static void tcx_invalidate_display(void *opaque)
 {
    TCXState *s = opaque;

-    tcx_set_dirty(s);
+    tcx_set_dirty(s, 0, memory_region_size(&s->vram_mem));
    qemu_console_resize(s->con, s->width, s->height);
 }

@@ -534,7 +360,7 @@ static void tcx24_invalidate_display(void *opaque)
 {
    TCXState *s = opaque;

-    tcx_set_dirty(s);
+    tcx_set_dirty(s, 0, memory_region_size(&s->vram_mem));
    qemu_console_resize(s->con, s->width, s->height);
 }

@@ -543,7 +369,7 @@ static int vmstate_tcx_post_load(void *opaque, int version_id)
    TCXState *s = opaque;

    update_palette_entries(s, 0, 256);
-    tcx_set_dirty(s);
+    tcx_set_dirty(s, 0, memory_region_size(&s->vram_mem));
    return 0;
 }

@@ -699,7 +525,7 @@ static void tcx_stip_writel(void *opaque, hwaddr addr,
                val <<= 1;
            }
        }
-        memory_region_set_dirty(&s->vram_mem, addr, 32);
+        tcx_set_dirty(s, addr, 32);
    }
 }

@@ -732,7 +558,7 @@ static void tcx_rstip_writel(void *opaque, hwaddr addr,
                val <<= 1;
            }
        }
-        memory_region_set_dirty(&s->vram_mem, addr, 32);
+        tcx_set_dirty(s, addr, 32);
    }
 }

@@ -790,7 +616,7 @@ static void tcx_blit_writel(void *opaque, hwaddr addr,
                memcpy(&s->vram24[addr], &s->vram24[adsr], len * 4);
            }
        }
-        memory_region_set_dirty(&s->vram_mem, addr, len);
+        tcx_set_dirty(s, addr, len);
    }
 }

@@ -824,7 +650,7 @@ static void tcx_rblit_writel(void *opaque, hwaddr addr,
                memcpy(&s->cplane[addr], &s->cplane[adsr], len * 4);
            }
        }
-        memory_region_set_dirty(&s->vram_mem, addr, len);
+        tcx_set_dirty(s, addr, len);
    }
 }

@@ -861,7 +687,7 @@ static void tcx_invalidate_cursor_position(TCXState *s)
    start = ymin * 1024;
    end   = ymax * 1024;

-    memory_region_set_dirty(&s->vram_mem, start, end-start);
+    tcx_set_dirty(s, start, end - start);
 }

 static uint64_t tcx_thc_readl(void *opaque, hwaddr addr,
@@ -1017,8 +843,7 @@ static void tcx_realizefn(DeviceState *dev, Error **errp)
    vmstate_register_ram_global(&s->rom);
    fcode_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, TCX_ROM_FILE);
    if (fcode_filename) {
-        ret = load_image_targphys(fcode_filename, s->prom_addr,
-                                  FCODE_MAX_ROM_SIZE);
+        ret = load_image_mr(fcode_filename, &s->rom);
        g_free(fcode_filename);
        if (ret < 0 || ret > FCODE_MAX_ROM_SIZE) {
            error_report("tcx: could not load prom '%s'", TCX_ROM_FILE);
@@ -1076,7 +901,6 @@ static Property tcx_properties[] = {
    DEFINE_PROP_UINT16("width",    TCXState, width,     -1),
    DEFINE_PROP_UINT16("height",   TCXState, height,    -1),
    DEFINE_PROP_UINT16("depth",    TCXState, depth,     -1),
-    DEFINE_PROP_UINT64("prom_addr", TCXState, prom_addr, -1),
    DEFINE_PROP_END_OF_LIST(),
 };

--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -1434,6 +1434,14 @@ void vga_invalidate_scanlines(VGACommonState *s, int y1, int y2)
    }
 }

+static bool vga_scanline_invalidated(VGACommonState *s, int y)
+{
+    if (y >= VGA_MAX_HEIGHT) {
+        return false;
+    }
+    return s->invalidated_y_table[y >> 5] & (1 << (y & 0x1f));
+}
+
 void vga_sync_dirty_bitmap(VGACommonState *s)
 {
    memory_region_sync_dirty_bitmap(&s->vram);
@@ -1457,7 +1465,8 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
    DisplaySurface *surface = qemu_console_surface(s->con);
    int y1, y, update, linesize, y_start, double_scan, mask, depth;
    int width, height, shift_control, line_offset, bwidth, bits;
-    ram_addr_t page0, page1, page_min, page_max;
+    ram_addr_t page0, page1;
+    DirtyBitmapSnapshot *snap = NULL;
    int disp_width, multi_scan, multi_run;
    uint8_t *d;
    uint32_t v, addr1, addr;
@@ -1472,9 +1481,6 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)

    full_update |= update_basic_params(s);

-    if (!full_update)
-        vga_sync_dirty_bitmap(s);
-
    s->get_resolution(s, &width, &height);
    disp_width = width;

@@ -1617,11 +1623,17 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
    addr1 = (s->start_addr * 4);
    bwidth = (width * bits + 7) / 8;
    y_start = -1;
-    page_min = -1;
-    page_max = 0;
    d = surface_data(surface);
    linesize = surface_stride(surface);
    y1 = 0;
+
+    if (!full_update) {
+        vga_sync_dirty_bitmap(s);
+        snap = memory_region_snapshot_and_clear_dirty(&s->vram, addr1,
+                                                      bwidth * height,
+                                                      DIRTY_MEMORY_VGA);
+    }
+
    for(y = 0; y < height; y++) {
        addr = addr1;
        if (!(s->cr[VGA_CRTC_MODE] & 1)) {
@@ -1636,17 +1648,17 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
        update = full_update;
        page0 = addr;
        page1 = addr + bwidth - 1;
-        update |= memory_region_get_dirty(&s->vram, page0, page1 - page0,
-                                          DIRTY_MEMORY_VGA);
-        /* explicit invalidation for the hardware cursor */
-        update |= (s->invalidated_y_table[y >> 5] >> (y & 0x1f)) & 1;
+        if (full_update) {
+            update = 1;
+        } else {
+            update = memory_region_snapshot_get_dirty(&s->vram, snap,
+                                                      page0, page1 - page0);
+        }
+        /* explicit invalidation for the hardware cursor (cirrus only) */
+        update |= vga_scanline_invalidated(s, y);
        if (update) {
            if (y_start < 0)
                y_start = y;
-            if (page0 < page_min)
-                page_min = page0;
-            if (page1 > page_max)
-                page_max = page1;
            if (!(is_buffer_shared(surface))) {
                vga_draw_line(s, d, s->vram_ptr + addr, width);
                if (s->cursor_draw_line)
@@ -1679,14 +1691,8 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
        dpy_gfx_update(s->con, 0, y_start,
                       disp_width, y - y_start);
    }
-    /* reset modified pages */
-    if (page_max >= page_min) {
-        memory_region_reset_dirty(&s->vram,
-                                  page_min,
-                                  page_max - page_min,
-                                  DIRTY_MEMORY_VGA);
-    }
-    memset(s->invalidated_y_table, 0, ((height + 31) >> 5) * 4);
+    g_free(snap);
+    memset(s->invalidated_y_table, 0, sizeof(s->invalidated_y_table));
 }

 static void vga_draw_blank(VGACommonState *s, int full_update)
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -258,41 +258,22 @@ void virtio_gpu_get_display_info(VirtIOGPU *g,
 static pixman_format_code_t get_pixman_format(uint32_t virtio_gpu_format)
 {
    switch (virtio_gpu_format) {
-#ifdef HOST_WORDS_BIGENDIAN
    case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM:
-        return PIXMAN_b8g8r8x8;
+        return PIXMAN_BE_b8g8r8x8;
    case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM:
-        return PIXMAN_b8g8r8a8;
+        return PIXMAN_BE_b8g8r8a8;
    case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM:
-        return PIXMAN_x8r8g8b8;
+        return PIXMAN_BE_x8r8g8b8;
    case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM:
-        return PIXMAN_a8r8g8b8;
+        return PIXMAN_BE_a8r8g8b8;
    case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM:
-        return PIXMAN_r8g8b8x8;
+        return PIXMAN_BE_r8g8b8x8;
    case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM:
-        return PIXMAN_r8g8b8a8;
+        return PIXMAN_BE_r8g8b8a8;
    case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM:
-        return PIXMAN_x8b8g8r8;
+        return PIXMAN_BE_x8b8g8r8;
    case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM:
-        return PIXMAN_a8b8g8r8;
-#else
-    case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM:
-        return PIXMAN_x8r8g8b8;
-    case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM:
-        return PIXMAN_a8r8g8b8;
-    case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM:
-        return PIXMAN_b8g8r8x8;
-    case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM:
-        return PIXMAN_b8g8r8a8;
-    case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM:
-        return PIXMAN_x8b8g8r8;
-    case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM:
-        return PIXMAN_a8b8g8r8;
-    case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM:
-        return PIXMAN_r8g8b8x8;
-    case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM:
-        return PIXMAN_r8g8b8a8;
-#endif
+        return PIXMAN_BE_a8b8g8r8;
    default:
        return 0;
    }
@@ -1170,8 +1151,8 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
    virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU,
                g->config_size);

-    g->req_state[0].width = 1024;
-    g->req_state[0].height = 768;
+    g->req_state[0].width = g->conf.xres;
+    g->req_state[0].height = g->conf.yres;

    if (virtio_gpu_virgl_enabled(g->conf)) {
        /* use larger control queue in 3d mode */
@@ -1291,6 +1272,8 @@ static Property virtio_gpu_properties[] = {
    DEFINE_PROP_BIT("stats", VirtIOGPU, conf.flags,
                    VIRTIO_GPU_FLAG_STATS_ENABLED, false),
 #endif
+    DEFINE_PROP_UINT32("xres", VirtIOGPU, conf.xres, 1024),
+    DEFINE_PROP_UINT32("yres", VirtIOGPU, conf.yres, 768),
    DEFINE_PROP_END_OF_LIST(),
 };

--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -1118,9 +1118,9 @@ static void vmsvga_update_display(void *opaque)
 {
    struct vmsvga_state_s *s = opaque;
    DisplaySurface *surface;
-    bool dirty = false;

-    if (!s->enable) {
+    if (!s->enable || !s->config) {
+        /* in standard vga mode */
        s->vga.hw_ops->gfx_update(&s->vga);
        return;
    }
@@ -1131,26 +1131,11 @@ static void vmsvga_update_display(void *opaque)
    vmsvga_fifo_run(s);
    vmsvga_update_rect_flush(s);

-    /*
-     * Is it more efficient to look at vram VGA-dirty bits or wait
-     * for the driver to issue SVGA_CMD_UPDATE?
-     */
-    if (memory_region_is_logging(&s->vga.vram, DIRTY_MEMORY_VGA)) {
-        vga_sync_dirty_bitmap(&s->vga);
-        dirty = memory_region_get_dirty(&s->vga.vram, 0,
-            surface_stride(surface) * surface_height(surface),
-            DIRTY_MEMORY_VGA);
-    }
-    if (s->invalidated || dirty) {
+    if (s->invalidated) {
        s->invalidated = 0;
        dpy_gfx_update(s->vga.con, 0, 0,
                   surface_width(surface), surface_height(surface));
    }
-    if (dirty) {
-        memory_region_reset_dirty(&s->vga.vram, 0,
-            surface_stride(surface) * surface_height(surface),
-            DIRTY_MEMORY_VGA);
-    }
 }

 static void vmsvga_reset(DeviceState *dev)
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -595,6 +595,22 @@ static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce)
    return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9;
 }

+static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)
+{
+    uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
+    return 1ULL << MIN(ce_agaw, VTD_MGAW);
+}
+
+/* Return true if IOVA passes range check, otherwise false. */
+static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce)
+{
+    /*
+     * Check if @iova is above 2^X-1, where X is the minimum of MGAW
+     * in CAP_REG and AW in context-entry.
+     */
+    return !(iova & ~(vtd_iova_limit(ce) - 1));
+}
+
 static const uint64_t vtd_paging_entry_rsvd_field[] = {
    [0] = ~0ULL,
    /* For not large page */
@@ -630,13 +646,9 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
    uint32_t level = vtd_get_level_from_context_entry(ce);
    uint32_t offset;
    uint64_t slpte;
-    uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
    uint64_t access_right_check;

-    /* Check if @iova is above 2^X-1, where X is the minimum of MGAW
-     * in CAP_REG and AW in context-entry.
-     */
-    if (iova & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) {
+    if (!vtd_iova_range_check(iova, ce)) {
        VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova);
        return -VTD_FR_ADDR_BEYOND_MGAW;
    }
@@ -684,6 +696,135 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
    }
 }

+typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
+
+/**
+ * vtd_page_walk_level - walk over specific level for IOVA range
+ *
+ * @addr: base GPA addr to start the walk
+ * @start: IOVA range start address
+ * @end: IOVA range end address (start <= addr < end)
+ * @hook_fn: hook func to be called when detected page
+ * @private: private data to be passed into hook func
+ * @read: whether parent level has read permission
+ * @write: whether parent level has write permission
+ * @notify_unmap: whether we should notify invalid entries
+ */
+static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
+                               uint64_t end, vtd_page_walk_hook hook_fn,
+                               void *private, uint32_t level,
+                               bool read, bool write, bool notify_unmap)
+{
+    bool read_cur, write_cur, entry_valid;
+    uint32_t offset;
+    uint64_t slpte;
+    uint64_t subpage_size, subpage_mask;
+    IOMMUTLBEntry entry;
+    uint64_t iova = start;
+    uint64_t iova_next;
+    int ret = 0;
+
+    trace_vtd_page_walk_level(addr, level, start, end);
+
+    subpage_size = 1ULL << vtd_slpt_level_shift(level);
+    subpage_mask = vtd_slpt_level_page_mask(level);
+
+    while (iova < end) {
+        iova_next = (iova & subpage_mask) + subpage_size;
+
+        offset = vtd_iova_level_offset(iova, level);
+        slpte = vtd_get_slpte(addr, offset);
+
+        if (slpte == (uint64_t)-1) {
+            trace_vtd_page_walk_skip_read(iova, iova_next);
+            goto next;
+        }
+
+        if (vtd_slpte_nonzero_rsvd(slpte, level)) {
+            trace_vtd_page_walk_skip_reserve(iova, iova_next);
+            goto next;
+        }
+
+        /* Permissions are stacked with parents' */
+        read_cur = read && (slpte & VTD_SL_R);
+        write_cur = write && (slpte & VTD_SL_W);
+
+        /*
+         * As long as we have either read/write permission, this is a
+         * valid entry. The rule works for both page entries and page
+         * table entries.
+         */
+        entry_valid = read_cur | write_cur;
+
+        if (vtd_is_last_slpte(slpte, level)) {
+            entry.target_as = &address_space_memory;
+            entry.iova = iova & subpage_mask;
+            /* NOTE: this is only meaningful if entry_valid == true */
+            entry.translated_addr = vtd_get_slpte_addr(slpte);
+            entry.addr_mask = ~subpage_mask;
+            entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
+            if (!entry_valid && !notify_unmap) {
+                trace_vtd_page_walk_skip_perm(iova, iova_next);
+                goto next;
+            }
+            trace_vtd_page_walk_one(level, entry.iova, entry.translated_addr,
+                                    entry.addr_mask, entry.perm);
+            if (hook_fn) {
+                ret = hook_fn(&entry, private);
+                if (ret < 0) {
+                    return ret;
+                }
+            }
+        } else {
+            if (!entry_valid) {
+                trace_vtd_page_walk_skip_perm(iova, iova_next);
+                goto next;
+            }
+            ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova,
+                                      MIN(iova_next, end), hook_fn, private,
+                                      level - 1, read_cur, write_cur,
+                                      notify_unmap);
+            if (ret < 0) {
+                return ret;
+            }
+        }
+
+next:
+        iova = iova_next;
+    }
+
+    return 0;
+}
+
+/**
+ * vtd_page_walk - walk specific IOVA range, and call the hook
+ *
+ * @ce: context entry to walk upon
+ * @start: IOVA address to start the walk
+ * @end: IOVA range end address (start <= addr < end)
+ * @hook_fn: the hook that to be called for each detected area
+ * @private: private data for the hook function
+ */
+static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
+                         vtd_page_walk_hook hook_fn, void *private,
+                         bool notify_unmap)
+{
+    dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
+    uint32_t level = vtd_get_level_from_context_entry(ce);
+
+    if (!vtd_iova_range_check(start, ce)) {
+        return -VTD_FR_ADDR_BEYOND_MGAW;
+    }
+
+    if (!vtd_iova_range_check(end, ce)) {
+        /* Fix end so that it reaches the maximum */
+        end = vtd_iova_limit(ce);
+    }
+
+    return vtd_page_walk_level(addr, start, end, hook_fn, private,
+                               level, true, true, notify_unmap);
+}
+
 /* Map a device to its corresponding domain (context-entry) */
 static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
                                    uint8_t devfn, VTDContextEntry *ce)
@@ -898,6 +1039,15 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
                s->intr_root, s->intr_size);
 }

+static void vtd_iommu_replay_all(IntelIOMMUState *s)
+{
+    IntelIOMMUNotifierNode *node;
+
+    QLIST_FOREACH(node, &s->notifiers_list, next) {
+        memory_region_iommu_replay_all(&node->vtd_as->iommu);
+    }
+}
+
 static void vtd_context_global_invalidate(IntelIOMMUState *s)
 {
    trace_vtd_inv_desc_cc_global();
@@ -905,6 +1055,14 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
    if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
        vtd_reset_context_cache(s);
    }
+    /*
+     * From VT-d spec 6.5.2.1, a global context entry invalidation
+     * should be followed by a IOTLB global invalidation, so we should
+     * be safe even without this. Hoewever, let's replay the region as
+     * well to be safer, and go back here when we need finer tunes for
+     * VT-d emulation codes.
+     */
+    vtd_iommu_replay_all(s);
 }


@@ -971,6 +1129,16 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
                trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
                                             VTD_PCI_FUNC(devfn_it));
                vtd_as->context_cache_entry.context_cache_gen = 0;
+                /*
+                 * So a device is moving out of (or moving into) a
+                 * domain, a replay() suites here to notify all the
+                 * IOMMU_NOTIFIER_MAP registers about this change.
+                 * This won't bring bad even if we have no such
+                 * notifier registered - the IOMMU notification
+                 * framework will skip MAP notifications if that
+                 * happened.
+                 */
+                memory_region_iommu_replay_all(&vtd_as->iommu);
            }
        }
    }
@@ -1012,12 +1180,53 @@ static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
 {
    trace_vtd_iotlb_reset("global invalidation recved");
    vtd_reset_iotlb(s);
+    vtd_iommu_replay_all(s);
 }

 static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
 {
+    IntelIOMMUNotifierNode *node;
+    VTDContextEntry ce;
+    VTDAddressSpace *vtd_as;
+
    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
                                &domain_id);
+
+    QLIST_FOREACH(node, &s->notifiers_list, next) {
+        vtd_as = node->vtd_as;
+        if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+                                      vtd_as->devfn, &ce) &&
+            domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
+            memory_region_iommu_replay_all(&vtd_as->iommu);
+        }
+    }
+}
+
+static int vtd_page_invalidate_notify_hook(IOMMUTLBEntry *entry,
+                                           void *private)
+{
+    memory_region_notify_iommu((MemoryRegion *)private, *entry);
+    return 0;
+}
+
+static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
+                                           uint16_t domain_id, hwaddr addr,
+                                           uint8_t am)
+{
+    IntelIOMMUNotifierNode *node;
+    VTDContextEntry ce;
+    int ret;
+
+    QLIST_FOREACH(node, &(s->notifiers_list), next) {
+        VTDAddressSpace *vtd_as = node->vtd_as;
+        ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+                                       vtd_as->devfn, &ce);
+        if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
+            vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE,
+                          vtd_page_invalidate_notify_hook,
+                          (void *)&vtd_as->iommu, true);
+        }
+    }
 }

 static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
@@ -1030,6 +1239,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
    info.addr = addr;
    info.mask = ~((1 << am) - 1);
    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
+    vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
 }

 /* Flush IOTLB
@@ -1151,9 +1361,49 @@ static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
    vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
 }

+static void vtd_switch_address_space(VTDAddressSpace *as)
+{
+    assert(as);
+
+    trace_vtd_switch_address_space(pci_bus_num(as->bus),
+                                   VTD_PCI_SLOT(as->devfn),
+                                   VTD_PCI_FUNC(as->devfn),
+                                   as->iommu_state->dmar_enabled);
+
+    /* Turn off first then on the other */
+    if (as->iommu_state->dmar_enabled) {
+        memory_region_set_enabled(&as->sys_alias, false);
+        memory_region_set_enabled(&as->iommu, true);
+    } else {
+        memory_region_set_enabled(&as->iommu, false);
+        memory_region_set_enabled(&as->sys_alias, true);
+    }
+}
+
+static void vtd_switch_address_space_all(IntelIOMMUState *s)
+{
+    GHashTableIter iter;
+    VTDBus *vtd_bus;
+    int i;
+
+    g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
+    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
+        for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) {
+            if (!vtd_bus->dev_as[i]) {
+                continue;
+            }
+            vtd_switch_address_space(vtd_bus->dev_as[i]);
+        }
+    }
+}
+
 /* Handle Translation Enable/Disable */
 static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
 {
+    if (s->dmar_enabled == en) {
+        return;
+    }
+
    VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off"));

    if (en) {
@@ -1168,6 +1418,8 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
        /* Ok - report back to driver */
        vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0);
    }
+
+    vtd_switch_address_space_all(s);
 }

 /* Handle Interrupt Remap Enable/Disable */
@@ -1457,7 +1709,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
    entry.iova = addr;
    entry.perm = IOMMU_NONE;
    entry.translated_addr = 0;
-    memory_region_notify_iommu(entry.target_as->root, entry);
+    memory_region_notify_iommu(&vtd_dev_as->iommu, entry);

 done:
    return true;
@@ -2005,15 +2257,33 @@ static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu,
                                          IOMMUNotifierFlag new)
 {
    VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
+    IntelIOMMUState *s = vtd_as->iommu_state;
+    IntelIOMMUNotifierNode *node = NULL;
+    IntelIOMMUNotifierNode *next_node = NULL;

-    if (new & IOMMU_NOTIFIER_MAP) {
-        error_report("Device at bus %s addr %02x.%d requires iommu "
-                     "notifier which is currently not supported by "
-                     "intel-iommu emulation",
-                     vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
-                     PCI_FUNC(vtd_as->devfn));
+    if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
+        error_report("We need to set cache_mode=1 for intel-iommu to enable "
+                     "device assignment with IOMMU protection.");
        exit(1);
    }
+
+    if (old == IOMMU_NOTIFIER_NONE) {
+        node = g_malloc0(sizeof(*node));
+        node->vtd_as = vtd_as;
+        QLIST_INSERT_HEAD(&s->notifiers_list, node, next);
+        return;
+    }
+
+    /* update notifier node with new flags */
+    QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) {
+        if (node->vtd_as == vtd_as) {
+            if (new == IOMMU_NOTIFIER_NONE) {
+                QLIST_REMOVE(node, next);
+                g_free(node);
+            }
+            return;
+        }
+    }
 }

 static const VMStateDescription vtd_vmstate = {
@@ -2389,19 +2659,150 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
        vtd_dev_as->devfn = (uint8_t)devfn;
        vtd_dev_as->iommu_state = s;
        vtd_dev_as->context_cache_entry.context_cache_gen = 0;
+
+        /*
+         * Memory region relationships looks like (Address range shows
+         * only lower 32 bits to make it short in length...):
+         *
+         * |-----------------+-------------------+----------|
+         * | Name            | Address range     | Priority |
+         * |-----------------+-------------------+----------+
+         * | vtd_root        | 00000000-ffffffff |        0 |
+         * |  intel_iommu    | 00000000-ffffffff |        1 |
+         * |  vtd_sys_alias  | 00000000-ffffffff |        1 |
+         * |  intel_iommu_ir | fee00000-feefffff |       64 |
+         * |-----------------+-------------------+----------|
+         *
+         * We enable/disable DMAR by switching enablement for
+         * vtd_sys_alias and intel_iommu regions. IR region is always
+         * enabled.
+         */
        memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s),
-                                 &s->iommu_ops, "intel_iommu", UINT64_MAX);
+                                 &s->iommu_ops, "intel_iommu_dmar",
+                                 UINT64_MAX);
+        memory_region_init_alias(&vtd_dev_as->sys_alias, OBJECT(s),
+                                 "vtd_sys_alias", get_system_memory(),
+                                 0, memory_region_size(get_system_memory()));
        memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s),
                              &vtd_mem_ir_ops, s, "intel_iommu_ir",
                              VTD_INTERRUPT_ADDR_SIZE);
-        memory_region_add_subregion(&vtd_dev_as->iommu, VTD_INTERRUPT_ADDR_FIRST,
-                                    &vtd_dev_as->iommu_ir);
-        address_space_init(&vtd_dev_as->as,
-                           &vtd_dev_as->iommu, name);
+        memory_region_init(&vtd_dev_as->root, OBJECT(s),
+                           "vtd_root", UINT64_MAX);
+        memory_region_add_subregion_overlap(&vtd_dev_as->root,
+                                            VTD_INTERRUPT_ADDR_FIRST,
+                                            &vtd_dev_as->iommu_ir, 64);
+        address_space_init(&vtd_dev_as->as, &vtd_dev_as->root, name);
+        memory_region_add_subregion_overlap(&vtd_dev_as->root, 0,
+                                            &vtd_dev_as->sys_alias, 1);
+        memory_region_add_subregion_overlap(&vtd_dev_as->root, 0,
+                                            &vtd_dev_as->iommu, 1);
+        vtd_switch_address_space(vtd_dev_as);
    }
    return vtd_dev_as;
 }

+/* Unmap the whole range in the notifier's scope. */
+static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
+{
+    IOMMUTLBEntry entry;
+    hwaddr size;
+    hwaddr start = n->start;
+    hwaddr end = n->end;
+
+    /*
+     * Note: all the codes in this function has a assumption that IOVA
+     * bits are no more than VTD_MGAW bits (which is restricted by
+     * VT-d spec), otherwise we need to consider overflow of 64 bits.
+     */
+
+    if (end > VTD_ADDRESS_SIZE) {
+        /*
+         * Don't need to unmap regions that is bigger than the whole
+         * VT-d supported address space size
+         */
+        end = VTD_ADDRESS_SIZE;
+    }
+
+    assert(start <= end);
+    size = end - start;
+
+    if (ctpop64(size) != 1) {
+        /*
+         * This size cannot format a correct mask. Let's enlarge it to
+         * suite the minimum available mask.
+         */
+        int n = 64 - clz64(size);
+        if (n > VTD_MGAW) {
+            /* should not happen, but in case it happens, limit it */
+            n = VTD_MGAW;
+        }
+        size = 1ULL << n;
+    }
+
+    entry.target_as = &address_space_memory;
+    /* Adjust iova for the size */
+    entry.iova = n->start & ~(size - 1);
+    /* This field is meaningless for unmap */
+    entry.translated_addr = 0;
+    entry.perm = IOMMU_NONE;
+    entry.addr_mask = size - 1;
+
+    trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
+                             VTD_PCI_SLOT(as->devfn),
+                             VTD_PCI_FUNC(as->devfn),
+                             entry.iova, size);
+
+    memory_region_notify_one(n, &entry);
+}
+
+static void vtd_address_space_unmap_all(IntelIOMMUState *s)
+{
+    IntelIOMMUNotifierNode *node;
+    VTDAddressSpace *vtd_as;
+    IOMMUNotifier *n;
+
+    QLIST_FOREACH(node, &s->notifiers_list, next) {
+        vtd_as = node->vtd_as;
+        IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
+            vtd_address_space_unmap(vtd_as, n);
+        }
+    }
+}
+
+static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private)
+{
+    memory_region_notify_one((IOMMUNotifier *)private, entry);
+    return 0;
+}
+
+static void vtd_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n)
+{
+    VTDAddressSpace *vtd_as = container_of(mr, VTDAddressSpace, iommu);
+    IntelIOMMUState *s = vtd_as->iommu_state;
+    uint8_t bus_n = pci_bus_num(vtd_as->bus);
+    VTDContextEntry ce;
+
+    /*
+     * The replay can be triggered by either a invalidation or a newly
+     * created entry. No matter what, we release existing mappings
+     * (it means flushing caches for UNMAP-only registers).
+     */
+    vtd_address_space_unmap(vtd_as, n);
+
+    if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
+        trace_vtd_replay_ce_valid(bus_n, PCI_SLOT(vtd_as->devfn),
+                                  PCI_FUNC(vtd_as->devfn),
+                                  VTD_CONTEXT_ENTRY_DID(ce.hi),
+                                  ce.hi, ce.lo);
+        vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false);
+    } else {
+        trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
+                                    PCI_FUNC(vtd_as->devfn));
+    }
+
+    return;
+}
+
 /* Do the initialization. It will also be called when reset, so pay
 * attention when adding new initialization stuff.
 */
@@ -2416,6 +2817,7 @@ static void vtd_init(IntelIOMMUState *s)

    s->iommu_ops.translate = vtd_iommu_translate;
    s->iommu_ops.notify_flag_changed = vtd_iommu_notify_flag_changed;
+    s->iommu_ops.replay = vtd_iommu_replay;
    s->root = 0;
    s->root_extended = false;
    s->dmar_enabled = false;
@@ -2511,6 +2913,11 @@ static void vtd_reset(DeviceState *dev)

    VTD_DPRINTF(GENERAL, "");
    vtd_init(s);
+
+    /*
+     * When device reset, throw away all mappings and external caches
+     */
+    vtd_address_space_unmap_all(s);
 }

 static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
@@ -2574,6 +2981,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
        return;
    }

+    QLIST_INIT(&s->notifiers_list);
    memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
    memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
                          "intel_iommu", DMAR_REG_SIZE);
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -197,6 +197,7 @@
 #define VTD_DOMAIN_ID_MASK          ((1UL << VTD_DOMAIN_ID_SHIFT) - 1)
 #define VTD_CAP_ND                  (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
 #define VTD_MGAW                    39  /* Maximum Guest Address Width */
+#define VTD_ADDRESS_SIZE            (1ULL << VTD_MGAW)
 #define VTD_CAP_MGAW                (((VTD_MGAW - 1) & 0x3fULL) << 16)
 #define VTD_MAMV                    18ULL
 #define VTD_CAP_MAMV                (VTD_MAMV << 48)
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1104,9 +1104,7 @@ static void pc_new_cpu(const char *typename, int64_t apic_id, Error **errp)
    object_property_set_bool(cpu, true, "realized", &local_err);

    object_unref(cpu);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
+    error_propagate(errp, local_err);
 }

 void pc_hot_add_cpu(const int64_t id, Error **errp)
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -4,7 +4,6 @@
 x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32

 # hw/i386/intel_iommu.c
-vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
 vtd_inv_desc(const char *type, uint64_t hi, uint64_t lo) "invalidate desc type %s high 0x%"PRIx64" low 0x%"PRIx64
 vtd_inv_desc_invalid(uint64_t hi, uint64_t lo) "invalid inv desc hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_inv_desc_cc_domain(uint16_t domain) "context invalidate domain 0x%"PRIx16
@@ -30,6 +29,15 @@ vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32
 vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32
 vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)"
 vtd_fault_disabled(void) "Fault processing disabled for context entry"
+vtd_replay_ce_valid(uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint64_t hi, uint64_t lo) "replay valid context device %02"PRIx8":%02"PRIx8".%02"PRIx8" domain 0x%"PRIx16" hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_replay_ce_invalid(uint8_t bus, uint8_t dev, uint8_t fn) "replay invalid context device %02"PRIx8":%02"PRIx8".%02"PRIx8
+vtd_page_walk_level(uint64_t addr, uint32_t level, uint64_t start, uint64_t end) "walk (base=0x%"PRIx64", level=%"PRIu32") iova range 0x%"PRIx64" - 0x%"PRIx64
+vtd_page_walk_one(uint32_t level, uint64_t iova, uint64_t gpa, uint64_t mask, int perm) "detected page level 0x%"PRIx32" iova 0x%"PRIx64" -> gpa 0x%"PRIx64" mask 0x%"PRIx64" perm %d"
+vtd_page_walk_skip_read(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to unable to read"
+vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to perm empty"
+vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set"
+vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
+vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64

 # hw/i386/amd_iommu.c
 amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
--- a/hw/i386/xen/Makefile.objs
+++ b/hw/i386/xen/Makefile.objs
@@ -1 +1 @@
-obj-y += xen_platform.o xen_apic.o xen_pvdevice.o
+obj-y += xen_platform.o xen_apic.o xen_pvdevice.o xen-hvm.o xen-mapcache.o
--- a/hw/i386/xen/trace-events
+++ b/hw/i386/xen/trace-events
@@ -4,3 +4,20 @@ xen_platform_log(char *s) "xen platform: %s"
 # hw/i386/xen/xen_pvdevice.c
 xen_pv_mmio_read(uint64_t addr) "WARNING: read from Xen PV Device MMIO space (address %"PRIx64")"
 xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (address %"PRIx64")"
+
+# xen-hvm.c
+xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: %#lx, size %#lx"
+xen_client_set_memory(uint64_t start_addr, unsigned long size, bool log_dirty) "%#"PRIx64" size %#lx, log_dirty %i"
+handle_ioreq(void *req, uint32_t type, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p type=%d dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
+handle_ioreq_read(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p read type=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
+handle_ioreq_write(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p write type=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
+cpu_ioreq_pio(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p pio dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
+cpu_ioreq_pio_read_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio read reg data=%#"PRIx64" port=%#"PRIx64" size=%d"
+cpu_ioreq_pio_write_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio write reg data=%#"PRIx64" port=%#"PRIx64" size=%d"
+cpu_ioreq_move(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p copy dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
+
+# xen-mapcache.c
+xen_map_cache(uint64_t phys_addr) "want %#"PRIx64
+xen_remap_bucket(uint64_t index) "index %#"PRIx64
+xen_map_cache_return(void* ptr) "%p"
+
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -22,7 +22,7 @@
 #include "qemu/error-report.h"
 #include "qemu/range.h"
 #include "sysemu/xen-mapcache.h"
-#include "trace-root.h"
+#include "trace.h"
 #include "exec/address-spaces.h"

 #include <xen/hvm/ioreq.h>
@@ -125,8 +125,8 @@ int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)

 void xen_piix3_set_irq(void *opaque, int irq_num, int level)
 {
-    xc_hvm_set_pci_intx_level(xen_xc, xen_domid, 0, 0, irq_num >> 2,
-                              irq_num & 3, level);
+    xen_set_pci_intx_level(xen_domid, 0, 0, irq_num >> 2,
+                           irq_num & 3, level);
 }

 void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
@@ -141,7 +141,7 @@ void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
        }
        v &= 0xf;
        if (((address + i) >= 0x60) && ((address + i) <= 0x63)) {
-            xc_hvm_set_pci_link_route(xen_xc, xen_domid, address + i - 0x60, v);
+            xen_set_pci_link_route(xen_domid, address + i - 0x60, v);
        }
    }
 }
@@ -156,7 +156,7 @@ int xen_is_pirq_msi(uint32_t msi_data)

 void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
 {
-    xc_hvm_inject_msi(xen_xc, xen_domid, addr, data);
+    xen_inject_msi(xen_domid, addr, data);
 }

 static void xen_suspend_notifier(Notifier *notifier, void *data)
@@ -168,7 +168,7 @@ static void xen_suspend_notifier(Notifier *notifier, void *data)

 static void xen_set_irq(void *opaque, int irq, int level)
 {
-    xc_hvm_set_isa_irq_level(xen_xc, xen_domid, irq, level);
+    xen_set_isa_irq_level(xen_domid, irq, level);
 }

 qemu_irq *xen_interrupt_controller_init(void)
@@ -454,10 +454,10 @@ static void xen_set_memory(struct MemoryListener *listener,
        return;
    } else {
        if (add) {
-            xen_map_memory_section(xen_xc, xen_domid, state->ioservid,
+            xen_map_memory_section(xen_domid, state->ioservid,
                                   section);
        } else {
-            xen_unmap_memory_section(xen_xc, xen_domid, state->ioservid,
+            xen_unmap_memory_section(xen_domid, state->ioservid,
                                     section);
        }
    }
@@ -481,10 +481,10 @@ static void xen_set_memory(struct MemoryListener *listener,
                               section->mr, section->offset_within_region);
        } else {
            mem_type = HVMMEM_ram_ro;
-            if (xc_hvm_set_mem_type(xen_xc, xen_domid, mem_type,
-                                    start_addr >> TARGET_PAGE_BITS,
-                                    size >> TARGET_PAGE_BITS)) {
-                DPRINTF("xc_hvm_set_mem_type error, addr: "TARGET_FMT_plx"\n",
+            if (xen_set_mem_type(xen_domid, mem_type,
+                                 start_addr >> TARGET_PAGE_BITS,
+                                 size >> TARGET_PAGE_BITS)) {
+                DPRINTF("xen_set_mem_type error, addr: "TARGET_FMT_plx"\n",
                        start_addr);
            }
        }
@@ -521,7 +521,7 @@ static void xen_io_add(MemoryListener *listener,

    memory_region_ref(mr);

-    xen_map_io_section(xen_xc, xen_domid, state->ioservid, section);
+    xen_map_io_section(xen_domid, state->ioservid, section);
 }

 static void xen_io_del(MemoryListener *listener,
@@ -534,7 +534,7 @@ static void xen_io_del(MemoryListener *listener,
        return;
    }

-    xen_unmap_io_section(xen_xc, xen_domid, state->ioservid, section);
+    xen_unmap_io_section(xen_domid, state->ioservid, section);

    memory_region_unref(mr);
 }
@@ -547,7 +547,7 @@ static void xen_device_realize(DeviceListener *listener,
    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
        PCIDevice *pci_dev = PCI_DEVICE(dev);

-        xen_map_pcidev(xen_xc, xen_domid, state->ioservid, pci_dev);
+        xen_map_pcidev(xen_domid, state->ioservid, pci_dev);
    }
 }

@@ -559,7 +559,7 @@ static void xen_device_unrealize(DeviceListener *listener,
    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
        PCIDevice *pci_dev = PCI_DEVICE(dev);

-        xen_unmap_pcidev(xen_xc, xen_domid, state->ioservid, pci_dev);
+        xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev);
    }
 }

@@ -586,9 +586,8 @@ static void xen_sync_dirty_bitmap(XenIOState *state,
        return;
    }

-    rc = xc_hvm_track_dirty_vram(xen_xc, xen_domid,
-                                 start_addr >> TARGET_PAGE_BITS, npages,
-                                 bitmap);
+    rc = xen_track_dirty_vram(xen_domid, start_addr >> TARGET_PAGE_BITS,
+                              npages, bitmap);
    if (rc < 0) {
 #ifndef ENODATA
 #define ENODATA  ENOENT
@@ -634,7 +633,7 @@ static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section,
    if (old & ~new & (1 << DIRTY_MEMORY_VGA)) {
        state->log_for_dirtybit = NULL;
        /* Disable dirty bit tracking */
-        xc_hvm_track_dirty_vram(xen_xc, xen_domid, 0, 0, NULL);
+        xen_track_dirty_vram(xen_domid, 0, 0, NULL);
    }
 }

@@ -1139,7 +1138,7 @@ static void xen_hvm_change_state_handler(void *opaque, int running,
        xen_main_loop_prepare(state);
    }

-    xen_set_ioreq_server_state(xen_xc, xen_domid,
+    xen_set_ioreq_server_state(xen_domid,
                               state->ioservid,
                               (rstate == RUN_STATE_RUNNING));
 }
@@ -1227,7 +1226,15 @@ void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory)
        goto err;
    }

-    xen_create_ioreq_server(xen_xc, xen_domid, &state->ioservid);
+    if (xen_domid_restrict) {
+        rc = xen_restrict(xen_domid);
+        if (rc < 0) {
+            error_report("failed to restrict: error %d", errno);
+            goto err;
+        }
+    }
+
+    xen_create_ioreq_server(xen_domid, &state->ioservid);

    state->exit.notify = xen_exit_notifier;
    qemu_add_exit_notifier(&state->exit);
@@ -1238,7 +1245,7 @@ void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory)
    state->wakeup.notify = xen_wakeup_notifier;
    qemu_register_wakeup_notifier(&state->wakeup);

-    rc = xen_get_ioreq_server_info(xen_xc, xen_domid, state->ioservid,
+    rc = xen_get_ioreq_server_info(xen_domid, state->ioservid,
                                   &ioreq_pfn, &bufioreq_pfn,
                                   &bufioreq_evtchn);
    if (rc < 0) {
@@ -1288,7 +1295,7 @@ void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory)
    /* Note: cpus is empty at this point in init */
    state->cpu_by_vcpu_id = g_malloc0(max_cpus * sizeof(CPUState *));

-    rc = xen_set_ioreq_server_state(xen_xc, xen_domid, state->ioservid, true);
+    rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true);
    if (rc < 0) {
        error_report("failed to enable ioreq server info: error %d handle=%p",
                     errno, xen_xc);
@@ -1391,7 +1398,7 @@ void xen_shutdown_fatal_error(const char *fmt, ...)
    qemu_system_shutdown_request();
 }

-void xen_modified_memory(ram_addr_t start, ram_addr_t length)
+void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
 {
    if (unlikely(xen_in_migration)) {
        int rc;
@@ -1403,7 +1410,7 @@ void xen_modified_memory(ram_addr_t start, ram_addr_t length)
        start_pfn = start >> TARGET_PAGE_BITS;
        nb_pages = ((start + length + TARGET_PAGE_SIZE - 1) >> TARGET_PAGE_BITS)
            - start_pfn;
-        rc = xc_hvm_modified_memory(xen_xc, xen_domid, start_pfn, nb_pages);
+        rc = xen_modified_memory(xen_domid, start_pfn, nb_pages);
        if (rc) {
            fprintf(stderr,
                    "%s failed for "RAM_ADDR_FMT" ("RAM_ADDR_FMT"): %i, %s\n",
--- a/hw/i386/xen/xen-mapcache.c
+++ b/hw/i386/xen/xen-mapcache.c
@@ -19,7 +19,7 @@
 #include <xen/hvm/params.h>

 #include "sysemu/xen-mapcache.h"
-#include "trace-root.h"
+#include "trace.h"


 //#define MAPCACHE_DEBUG
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .8.92
 .9.50