Update version for v2.1.1 release

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
target-i386: Support migratable=no properly
2014-09-10 14:30:45 -05:00 · 2014-09-10 09:30:58 -05:00 · 2014-09-10 09:30:58 -05:00 · 2014-09-10 09:30:58 -05:00 · 2014-09-10 09:30:58 -05:00 · 2014-09-10 09:30:58 -05:00
349 changed files with 10564 additions and 5025 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,7 @@
 /qapi-generated
 /qapi-types.[ch]
 /qapi-visit.[ch]
+/qapi-event.[ch]
 /qmp-commands.h
 /qmp-marshal.c
 /qemu-doc.html
--- a/3
+++ b/3
@@ -563,6 +563,7 @@ Devices
 -------
 IDE
 M: Kevin Wolf <kwolf@redhat.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Odd Fixes
 F: include/hw/ide.h
 F: hw/ide/
@@ -853,7 +854,7 @@ S: Odd Fixes
 F: scripts/checkpatch.pl

 Seccomp
-M: Eduardo Otubo <otubo@linux.vnet.ibm.com>
+M: Eduardo Otubo <eduardo.otubo@profitbricks.com>
 S: Supported
 F: qemu-seccomp.c
 F: include/sysemu/seccomp.h
--- a/5
+++ b/5
@@ -248,7 +248,7 @@ $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)

 qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \
               $(SRC_PATH)/qapi/block.json $(SRC_PATH)/qapi/block-core.json \
-               $(SRC_PATH)/qapi-event.json
+               $(SRC_PATH)/qapi/event.json

 qapi-types.c qapi-types.h :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
@@ -344,7 +344,8 @@ multiboot.bin linuxboot.bin kvmvapic.bin \
 s390-zipl.rom \
 s390-ccw.img \
 spapr-rtas.bin slof.bin \
-palcode-clipper
+palcode-clipper \
+u-boot.e500
 else
 BLOBS=
 endif
--- a/Makefile.target
+++ b/Makefile.target
@@ -163,10 +163,6 @@ dummy := $(call unnest-vars,.., \
 all-obj-y += $(common-obj-y)
 all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)

-ifndef CONFIG_HAIKU
-LIBS+=-lm
-endif
-
 # build either PROG or PROGW
 $(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
 	$(call LINK,$^)
--- a/2
+++ b/2
@@ -1 +1 @@
-2.0.50
+2.1.1
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -125,7 +125,7 @@ static bool aio_dispatch(AioContext *ctx)
    bool progress = false;

    /*
-     * We have to walk very carefully in case qemu_aio_set_fd_handler is
+     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
    node = QLIST_FIRST(&ctx->aio_handlers);
@@ -175,27 +175,56 @@ static bool aio_dispatch(AioContext *ctx)
 bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
+    bool was_dispatching;
    int ret;
    bool progress;

+    was_dispatching = ctx->dispatching;
    progress = false;

+    /* aio_notify can avoid the expensive event_notifier_set if
+     * everything (file descriptors, bottom halves, timers) will
+     * be re-evaluated before the next blocking poll().  This happens
+     * in two cases:
+     *
+     * 1) when aio_poll is called with blocking == false
+     *
+     * 2) when we are called after poll().  If we are called before
+     *    poll(), bottom halves will not be re-evaluated and we need
+     *    aio_notify() if blocking == true.
+     *
+     * The first aio_dispatch() only does something when AioContext is
+     * running as a GSource, and in that case aio_poll is used only
+     * with blocking == false, so this optimization is already quite
+     * effective.  However, the code is ugly and should be restructured
+     * to have a single aio_dispatch() call.  To do this, we need to
+     * reorganize aio_poll into a prepare/poll/dispatch model like
+     * glib's.
+     *
+     * If we're in a nested event loop, ctx->dispatching might be true.
+     * In that case we can restore it just before returning, but we
+     * have to clear it now.
+     */
+    aio_set_dispatching(ctx, !blocking);
+
    /*
     * If there are callbacks left that have been queued, we need to call them.
     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for qemu_aio_wait loops).
+     * does not need a complete flush (as is the case for aio_poll loops).
     */
    if (aio_bh_poll(ctx)) {
        blocking = false;
        progress = true;
    }

+    /* Re-evaluate condition (1) above.  */
+    aio_set_dispatching(ctx, !blocking);
    if (aio_dispatch(ctx)) {
        progress = true;
    }

    if (progress && !blocking) {
-        return true;
+        goto out;
    }

    ctx->walking_handlers++;
@@ -234,9 +263,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
    }

    /* Run dispatch even if there were no readable fds to run timers */
+    aio_set_dispatching(ctx, true);
    if (aio_dispatch(ctx)) {
        progress = true;
    }

+out:
+    aio_set_dispatching(ctx, was_dispatching);
    return progress;
 }
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -102,7 +102,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
    /*
     * If there are callbacks left that have been queued, we need to call then.
     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for qemu_aio_wait loops).
+     * does not need a complete flush (as is the case for aio_poll loops).
     */
    if (aio_bh_poll(ctx)) {
        blocking = false;
@@ -115,7 +115,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
    /*
     * Then dispatch any pending callbacks from the GSource.
     *
-     * We have to walk very carefully in case qemu_aio_set_fd_handler is
+     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
    node = QLIST_FIRST(&ctx->aio_handlers);
@@ -177,7 +177,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
        blocking = false;

        /* we have to walk very carefully in case
-         * qemu_aio_set_fd_handler is called while we're walking */
+         * aio_set_fd_handler is called while we're walking */
        node = QLIST_FIRST(&ctx->aio_handlers);
        while (node) {
            AioHandler *tmp;
--- a/async.c
+++ b/async.c
@@ -26,6 +26,7 @@
 #include "block/aio.h"
 #include "block/thread-pool.h"
 #include "qemu/main-loop.h"
+#include "qemu/atomic.h"

 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */
@@ -247,9 +248,25 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx)
    return ctx->thread_pool;
 }

+void aio_set_dispatching(AioContext *ctx, bool dispatching)
+{
+    ctx->dispatching = dispatching;
+    if (!dispatching) {
+        /* Write ctx->dispatching before reading e.g. bh->scheduled.
+         * Optimization: this is only needed when we're entering the "unsafe"
+         * phase where other threads must call event_notifier_set.
+         */
+        smp_mb();
+    }
+}
+
 void aio_notify(AioContext *ctx)
 {
-    event_notifier_set(&ctx->notifier);
+    /* Write e.g. bh->scheduled before reading ctx->dispatching.  */
+    smp_mb();
+    if (!ctx->dispatching) {
+        event_notifier_set(&ctx->notifier);
+    }
 }

 static void aio_timerlist_notify(void *opaque)
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -304,7 +304,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
        /* ensure policy won't be ignored in case memory is preallocated
         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
         * this doesn't catch hugepage case. */
-        unsigned flags = MPOL_MF_STRICT;
+        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;

        /* check for invalid host-nodes and policies and give more verbose
         * error messages than mbind(). */
--- a/block-migration.c
+++ b/block-migration.c
@@ -861,7 +861,7 @@ static bool block_is_active(void *opaque)
    return block_mig_state.blk_enable == 1;
 }

-SaveVMHandlers savevm_block_handlers = {
+static SaveVMHandlers savevm_block_handlers = {
    .set_params = block_set_params,
    .save_live_setup = block_save_setup,
    .save_live_iterate = block_save_iterate,
--- a/block.c
+++ b/block.c
@@ -471,7 +471,7 @@ int bdrv_create(BlockDriver *drv, const char* filename,
        co = qemu_coroutine_create(bdrv_create_co_entry);
        qemu_coroutine_enter(co, &cco);
        while (cco.ret == NOT_DONE) {
-            qemu_aio_wait();
+            aio_poll(qemu_get_aio_context(), true);
        }
    }

@@ -508,19 +508,24 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
    return ret;
 }

-int bdrv_refresh_limits(BlockDriverState *bs)
+void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BlockDriver *drv = bs->drv;
+    Error *local_err = NULL;

    memset(&bs->bl, 0, sizeof(bs->bl));

    if (!drv) {
-        return 0;
+        return;
    }

    /* Take some limits from the children as a default */
    if (bs->file) {
-        bdrv_refresh_limits(bs->file);
+        bdrv_refresh_limits(bs->file, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
        bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
        bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
    } else {
@@ -528,7 +533,11 @@ int bdrv_refresh_limits(BlockDriverState *bs)
    }

    if (bs->backing_hd) {
-        bdrv_refresh_limits(bs->backing_hd);
+        bdrv_refresh_limits(bs->backing_hd, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
        bs->bl.opt_transfer_length =
            MAX(bs->bl.opt_transfer_length,
                bs->backing_hd->bl.opt_transfer_length);
@@ -539,10 +548,8 @@ int bdrv_refresh_limits(BlockDriverState *bs)

    /* Then let the driver override it */
    if (drv->bdrv_refresh_limits) {
-        return drv->bdrv_refresh_limits(bs);
+        drv->bdrv_refresh_limits(bs, errp);
    }
-
-    return 0;
 }

 /*
@@ -831,7 +838,7 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
     * Clear flags that are internal to the block layer before opening the
     * image.
     */
-    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);

    /*
     * Snapshots should be writable.
@@ -928,6 +935,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
    bs->zero_beyond_eof = true;
    open_flags = bdrv_open_flags(bs, flags);
    bs->read_only = !(open_flags & BDRV_O_RDWR);
+    bs->growable = !!(flags & BDRV_O_PROTOCOL);

    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
        error_setg(errp,
@@ -992,7 +1000,13 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
        goto free_and_fail;
    }

-    bdrv_refresh_limits(bs);
+    bdrv_refresh_limits(bs, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto free_and_fail;
+    }
+
    assert(bdrv_opt_mem_align(bs) != 0);
    assert((bs->request_alignment != 0) || bs->sg);
    return 0;
@@ -1005,94 +1019,124 @@ free_and_fail:
    return ret;
 }

-/*
- * Opens a file using a protocol (file, host_device, nbd, ...)
- *
- * options is an indirect pointer to a QDict of options to pass to the block
- * drivers, or pointer to NULL for an empty set of options. If this function
- * takes ownership of the QDict reference, it will set *options to NULL;
- * otherwise, it will contain unused/unrecognized options after this function
- * returns. Then, the caller is responsible for freeing it. If it intends to
- * reuse the QDict, QINCREF() should be called beforehand.
- */
-static int bdrv_file_open(BlockDriverState *bs, const char *filename,
-                          QDict **options, int flags, Error **errp)
+static QDict *parse_json_filename(const char *filename, Error **errp)
 {
-    BlockDriver *drv;
-    const char *drvname;
-    bool parse_filename = false;
-    Error *local_err = NULL;
+    QObject *options_obj;
+    QDict *options;
    int ret;

+    ret = strstart(filename, "json:", &filename);
+    assert(ret);
+
+    options_obj = qobject_from_json(filename);
+    if (!options_obj) {
+        error_setg(errp, "Could not parse the JSON options");
+        return NULL;
+    }
+
+    if (qobject_type(options_obj) != QTYPE_QDICT) {
+        qobject_decref(options_obj);
+        error_setg(errp, "Invalid JSON object given");
+        return NULL;
+    }
+
+    options = qobject_to_qdict(options_obj);
+    qdict_flatten(options);
+
+    return options;
+}
+
+/*
+ * Fills in default options for opening images and converts the legacy
+ * filename/flags pair to option QDict entries.
+ */
+static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
+                             BlockDriver *drv, Error **errp)
+{
+    const char *filename = *pfilename;
+    const char *drvname;
+    bool protocol = flags & BDRV_O_PROTOCOL;
+    bool parse_filename = false;
+    Error *local_err = NULL;
+
+    /* Parse json: pseudo-protocol */
+    if (filename && g_str_has_prefix(filename, "json:")) {
+        QDict *json_options = parse_json_filename(filename, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return -EINVAL;
+        }
+
+        /* Options given in the filename have lower priority than options
+         * specified directly */
+        qdict_join(*options, json_options, false);
+        QDECREF(json_options);
+        *pfilename = filename = NULL;
+    }
+
    /* Fetch the file name from the options QDict if necessary */
-    if (!filename) {
-        filename = qdict_get_try_str(*options, "filename");
-    } else if (filename && !qdict_haskey(*options, "filename")) {
-        qdict_put(*options, "filename", qstring_from_str(filename));
-        parse_filename = true;
-    } else {
-        error_setg(errp, "Can't specify 'file' and 'filename' options at the "
-                   "same time");
-        ret = -EINVAL;
-        goto fail;
+    if (protocol && filename) {
+        if (!qdict_haskey(*options, "filename")) {
+            qdict_put(*options, "filename", qstring_from_str(filename));
+            parse_filename = true;
+        } else {
+            error_setg(errp, "Can't specify 'file' and 'filename' options at "
+                             "the same time");
+            return -EINVAL;
+        }
    }

    /* Find the right block driver */
+    filename = qdict_get_try_str(*options, "filename");
    drvname = qdict_get_try_str(*options, "driver");
-    if (drvname) {
-        drv = bdrv_find_format(drvname);
-        if (!drv) {
-            error_setg(errp, "Unknown driver '%s'", drvname);
-        }
-        qdict_del(*options, "driver");
-    } else if (filename) {
-        drv = bdrv_find_protocol(filename, parse_filename);
-        if (!drv) {
-            error_setg(errp, "Unknown protocol");
+
+    if (drv) {
+        if (drvname) {
+            error_setg(errp, "Driver specified twice");
+            return -EINVAL;
        }
+        drvname = drv->format_name;
+        qdict_put(*options, "driver", qstring_from_str(drvname));
    } else {
-        error_setg(errp, "Must specify either driver or file");
-        drv = NULL;
+        if (!drvname && protocol) {
+            if (filename) {
+                drv = bdrv_find_protocol(filename, parse_filename);
+                if (!drv) {
+                    error_setg(errp, "Unknown protocol");
+                    return -EINVAL;
+                }
+
+                drvname = drv->format_name;
+                qdict_put(*options, "driver", qstring_from_str(drvname));
+            } else {
+                error_setg(errp, "Must specify either driver or file");
+                return -EINVAL;
+            }
+        } else if (drvname) {
+            drv = bdrv_find_format(drvname);
+            if (!drv) {
+                error_setg(errp, "Unknown driver '%s'", drvname);
+                return -ENOENT;
+            }
+        }
    }

-    if (!drv) {
-        /* errp has been set already */
-        ret = -ENOENT;
-        goto fail;
-    }
+    assert(drv || !protocol);

-    /* Parse the filename and open it */
-    if (drv->bdrv_parse_filename && parse_filename) {
+    /* Driver-specific filename parsing */
+    if (drv && drv->bdrv_parse_filename && parse_filename) {
        drv->bdrv_parse_filename(filename, *options, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
-            ret = -EINVAL;
-            goto fail;
+            return -EINVAL;
        }

        if (!drv->bdrv_needs_filename) {
            qdict_del(*options, "filename");
-        } else {
-            filename = qdict_get_str(*options, "filename");
        }
    }

-    if (!drv->bdrv_file_open) {
-        ret = bdrv_open(&bs, filename, NULL, *options, flags, drv, &local_err);
-        *options = NULL;
-    } else {
-        ret = bdrv_open_common(bs, NULL, *options, flags, drv, &local_err);
-    }
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto fail;
-    }
-
-    bs->growable = 1;
    return 0;
-
-fail:
-    return ret;
 }

 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
@@ -1123,7 +1167,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
    bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
                    bs->backing_blocker);
 out:
-    bdrv_refresh_limits(bs);
+    bdrv_refresh_limits(bs, NULL);
 }

 /*
@@ -1162,6 +1206,13 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
        bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
    }

+    if (!bs->drv || !bs->drv->supports_backing) {
+        ret = -EINVAL;
+        error_setg(errp, "Driver doesn't support backing files");
+        QDECREF(options);
+        goto free_exit;
+    }
+
    backing_hd = bdrv_new("", errp);

    if (bs->backing_format[0] != '\0') {
@@ -1240,7 +1291,7 @@ done:
    return ret;
 }

-void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
+int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
 {
    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
    char *tmp_filename = g_malloc0(PATH_MAX + 1);
@@ -1258,6 +1309,7 @@ void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
    /* Get the required size from the image */
    total_size = bdrv_getlength(bs);
    if (total_size < 0) {
+        ret = total_size;
        error_setg_errno(errp, -total_size, "Could not get image size");
        goto out;
    }
@@ -1304,33 +1356,7 @@ void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)

 out:
    g_free(tmp_filename);
-}
-
-static QDict *parse_json_filename(const char *filename, Error **errp)
-{
-    QObject *options_obj;
-    QDict *options;
-    int ret;
-
-    ret = strstart(filename, "json:", &filename);
-    assert(ret);
-
-    options_obj = qobject_from_json(filename);
-    if (!options_obj) {
-        error_setg(errp, "Could not parse the JSON options");
-        return NULL;
-    }
-
-    if (qobject_type(options_obj) != QTYPE_QDICT) {
-        qobject_decref(options_obj);
-        error_setg(errp, "Invalid JSON object given");
-        return NULL;
-    }
-
-    options = qobject_to_qdict(options_obj);
-    qdict_flatten(options);
-
-    return options;
+    return ret;
 }

 /*
@@ -1396,77 +1422,62 @@ int bdrv_open(BlockDriverState **pbs, const char *filename,
        options = qdict_new();
    }

-    if (filename && g_str_has_prefix(filename, "json:")) {
-        QDict *json_options = parse_json_filename(filename, &local_err);
-        if (local_err) {
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        /* Options given in the filename have lower priority than options
-         * specified directly */
-        qdict_join(options, json_options, false);
-        QDECREF(json_options);
-        filename = NULL;
-    }
-
-    bs->options = options;
-    options = qdict_clone_shallow(options);
-
-    if (flags & BDRV_O_PROTOCOL) {
-        assert(!drv);
-        ret = bdrv_file_open(bs, filename, &options, flags & ~BDRV_O_PROTOCOL,
-                             &local_err);
-        if (!ret) {
-            drv = bs->drv;
-            goto done;
-        } else if (bs->drv) {
-            goto close_and_fail;
-        } else {
-            goto fail;
-        }
-    }
-
-    /* Open image file without format layer */
-    if (flags & BDRV_O_RDWR) {
-        flags |= BDRV_O_ALLOW_RDWR;
-    }
-    if (flags & BDRV_O_SNAPSHOT) {
-        snapshot_flags = bdrv_temp_snapshot_flags(flags);
-        flags = bdrv_backing_flags(flags);
-    }
-
-    assert(file == NULL);
-    ret = bdrv_open_image(&file, filename, options, "file",
-                          bdrv_inherited_flags(flags),
-                          true, &local_err);
-    if (ret < 0) {
+    ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
+    if (local_err) {
        goto fail;
    }

    /* Find the right image format driver */
+    drv = NULL;
    drvname = qdict_get_try_str(options, "driver");
    if (drvname) {
        drv = bdrv_find_format(drvname);
        qdict_del(options, "driver");
        if (!drv) {
-            error_setg(errp, "Invalid driver: '%s'", drvname);
+            error_setg(errp, "Unknown driver: '%s'", drvname);
            ret = -EINVAL;
            goto fail;
        }
    }

-    if (!drv) {
-        if (file) {
-            ret = find_image_format(file, filename, &drv, &local_err);
-        } else {
-            error_setg(errp, "Must specify either driver or file");
-            ret = -EINVAL;
+    assert(drvname || !(flags & BDRV_O_PROTOCOL));
+    if (drv && !drv->bdrv_file_open) {
+        /* If the user explicitly wants a format driver here, we'll need to add
+         * another layer for the protocol in bs->file */
+        flags &= ~BDRV_O_PROTOCOL;
+    }
+
+    bs->options = options;
+    options = qdict_clone_shallow(options);
+
+    /* Open image file without format layer */
+    if ((flags & BDRV_O_PROTOCOL) == 0) {
+        if (flags & BDRV_O_RDWR) {
+            flags |= BDRV_O_ALLOW_RDWR;
+        }
+        if (flags & BDRV_O_SNAPSHOT) {
+            snapshot_flags = bdrv_temp_snapshot_flags(flags);
+            flags = bdrv_backing_flags(flags);
+        }
+
+        assert(file == NULL);
+        ret = bdrv_open_image(&file, filename, options, "file",
+                              bdrv_inherited_flags(flags),
+                              true, &local_err);
+        if (ret < 0) {
            goto fail;
        }
    }

-    if (!drv) {
+    /* Image format probing */
+    if (!drv && file) {
+        ret = find_image_format(file, filename, &drv, &local_err);
+        if (ret < 0) {
+            goto fail;
+        }
+    } else if (!drv) {
+        error_setg(errp, "Must specify either driver or file");
+        ret = -EINVAL;
        goto fail;
    }

@@ -1495,15 +1506,12 @@ int bdrv_open(BlockDriverState **pbs, const char *filename,
    /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
     * temporary snapshot afterwards. */
    if (snapshot_flags) {
-        bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
+        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
        if (local_err) {
-            error_propagate(errp, local_err);
            goto close_and_fail;
        }
    }

-
-done:
    /* Check if any unknown options were used */
    if (options && (qdict_size(options) != 0)) {
        const QDictEntry *entry = qdict_first(options);
@@ -1783,7 +1791,7 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
                                              BDRV_O_CACHE_WB);
    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);

-    bdrv_refresh_limits(reopen_state->bs);
+    bdrv_refresh_limits(reopen_state->bs, NULL);
 }

 /*
@@ -1910,6 +1918,7 @@ void bdrv_drain_all(void)
            bool bs_busy;

            aio_context_acquire(aio_context);
+            bdrv_flush_io_queue(bs);
            bdrv_start_throttled_reqs(bs);
            bs_busy = bdrv_requests_pending(bs);
            bs_busy |= aio_poll(aio_context, bs_busy);
@@ -2513,32 +2522,23 @@ int bdrv_change_backing_file(BlockDriverState *bs,
 *
 * Returns NULL if bs is not found in active's image chain,
 * or if active == bs.
+ *
+ * Returns the bottommost base image if bs == NULL.
 */
 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
                                    BlockDriverState *bs)
 {
-    BlockDriverState *overlay = NULL;
-    BlockDriverState *intermediate;
-
-    assert(active != NULL);
-    assert(bs != NULL);
-
-    /* if bs is the same as active, then by definition it has no overlay
-     */
-    if (active == bs) {
-        return NULL;
+    while (active && bs != active->backing_hd) {
+        active = active->backing_hd;
    }

-    intermediate = active;
-    while (intermediate->backing_hd) {
-        if (intermediate->backing_hd == bs) {
-            overlay = intermediate;
-            break;
-        }
-        intermediate = intermediate->backing_hd;
-    }
+    return active;
+}

-    return overlay;
+/* Given a BDS, searches for the base layer. */
+BlockDriverState *bdrv_find_base(BlockDriverState *bs)
+{
+    return bdrv_find_overlay(bs, NULL);
 }

 typedef struct BlkIntermediateStates {
@@ -2569,12 +2569,15 @@ typedef struct BlkIntermediateStates {
 *
 * base <- active
 *
+ * If backing_file_str is non-NULL, it will be used when modifying top's
+ * overlay image metadata.
+ *
 * Error conditions:
 *  if active == top, that is considered an error
 *
 */
 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
-                           BlockDriverState *base)
+                           BlockDriverState *base, const char *backing_file_str)
 {
    BlockDriverState *intermediate;
    BlockDriverState *base_bs = NULL;
@@ -2626,7 +2629,8 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
    }

    /* success - we can delete the intermediate states, and link top->base */
-    ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
+    backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
+    ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
                                   base_bs->drv ? base_bs->drv->format_name : "");
    if (ret) {
        goto exit;
@@ -3019,6 +3023,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,

    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert(!qiov || bytes == qiov->size);

    /* Handle Copy on Read and associated serialisation */
    if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -3063,8 +3068,20 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
        max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
                                  align >> BDRV_SECTOR_BITS);
        if (max_nb_sectors > 0) {
-            ret = drv->bdrv_co_readv(bs, sector_num,
-                                     MIN(nb_sectors, max_nb_sectors), qiov);
+            QEMUIOVector local_qiov;
+            size_t local_sectors;
+
+            max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
+            local_sectors = MIN(max_nb_sectors, nb_sectors);
+
+            qemu_iovec_init(&local_qiov, qiov->niov);
+            qemu_iovec_concat(&local_qiov, qiov, 0,
+                              local_sectors * BDRV_SECTOR_SIZE);
+
+            ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
+                                     &local_qiov);
+
+            qemu_iovec_destroy(&local_qiov);
        } else {
            ret = 0;
        }
@@ -3276,6 +3293,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,

    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert(!qiov || bytes == qiov->size);

    waited = wait_serialising_requests(req);
    assert(!waited || !req->serialising);
@@ -3489,9 +3507,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset)
        return -ENOTSUP;
    if (bs->read_only)
        return -EACCES;
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
-        return -EBUSY;
-    }
+
    ret = drv->bdrv_truncate(bs, offset);
    if (ret == 0) {
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
@@ -3790,6 +3806,17 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
    return NULL;
 }

+/* If 'base' is in the same chain as 'top', return true. Otherwise,
+ * return false.  If either argument is NULL, return false. */
+bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
+{
+    while (top && top != base) {
+        top = top->backing_hd;
+    }
+
+    return top != NULL;
+}
+
 BlockDriverState *bdrv_next(BlockDriverState *bs)
 {
    if (!bs) {
@@ -4040,7 +4067,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
    if (ret < 0) {
        return ret;
    }
-    return (ret & BDRV_BLOCK_ALLOCATED);
+    return !!(ret & BDRV_BLOCK_ALLOCATED);
 }

 /*
@@ -4333,22 +4360,6 @@ int bdrv_get_backing_file_depth(BlockDriverState *bs)
    return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
 }

-BlockDriverState *bdrv_find_base(BlockDriverState *bs)
-{
-    BlockDriverState *curr_bs = NULL;
-
-    if (!bs) {
-        return NULL;
-    }
-
-    curr_bs = bs;
-
-    while (curr_bs->backing_hd) {
-        curr_bs = curr_bs->backing_hd;
-    }
-    return curr_bs;
-}
-
 /**************************************************************/
 /* async I/Os */

@@ -5774,3 +5785,58 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)

    return false;
 }
+
+BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
+{
+    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
+    if (!to_replace_bs) {
+        error_setg(errp, "Node name '%s' not found", node_name);
+        return NULL;
+    }
+
+    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
+        return NULL;
+    }
+
+    /* We don't want arbitrary node of the BDS chain to be replaced only the top
+     * most non filter in order to prevent data corruption.
+     * Another benefit is that this tests exclude backing files which are
+     * blocked by the backing blockers.
+     */
+    if (!bdrv_is_first_non_filter(to_replace_bs)) {
+        error_setg(errp, "Only top most non filter can be replaced");
+        return NULL;
+    }
+
+    return to_replace_bs;
+}
+
+void bdrv_io_plug(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    if (drv && drv->bdrv_io_plug) {
+        drv->bdrv_io_plug(bs);
+    } else if (bs->file) {
+        bdrv_io_plug(bs->file);
+    }
+}
+
+void bdrv_io_unplug(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    if (drv && drv->bdrv_io_unplug) {
+        drv->bdrv_io_unplug(bs);
+    } else if (bs->file) {
+        bdrv_io_unplug(bs->file);
+    }
+}
+
+void bdrv_flush_io_queue(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    if (drv && drv->bdrv_flush_io_queue) {
+        drv->bdrv_flush_io_queue(bs);
+    } else if (bs->file) {
+        bdrv_flush_io_queue(bs->file);
+    }
+}
--- a/block/backup.c
+++ b/block/backup.c
@@ -307,7 +307,7 @@ static void coroutine_fn backup_run(void *opaque)
                                BACKUP_SECTORS_PER_CLUSTER - i, &n);
                    i += n;

-                    if (alloced == 1) {
+                    if (alloced == 1 || n == 0) {
                        break;
                    }
                }
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -449,6 +449,10 @@ static void error_callback_bh(void *opaque)
 static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb)
 {
    BlkdebugAIOCB *acb = container_of(blockacb, BlkdebugAIOCB, common);
+    if (acb->bh) {
+        qemu_bh_delete(acb->bh);
+        acb->bh = NULL;
+    }
    qemu_aio_release(acb);
 }

--- a/block/commit.c
+++ b/block/commit.c
@@ -37,6 +37,7 @@ typedef struct CommitBlockJob {
    BlockdevOnError on_error;
    int base_flags;
    int orig_overlay_flags;
+    char *backing_file_str;
 } CommitBlockJob;

 static int coroutine_fn commit_populate(BlockDriverState *bs,
@@ -141,7 +142,7 @@ wait:

    if (!block_job_is_cancelled(&s->common) && sector_num == end) {
        /* success */
-        ret = bdrv_drop_intermediate(active, top, base);
+        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
    }

 exit_free_buf:
@@ -158,7 +159,7 @@ exit_restore_reopen:
    if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
    }
-
+    g_free(s->backing_file_str);
    block_job_completed(&s->common, ret);
 }

@@ -182,7 +183,7 @@ static const BlockJobDriver commit_job_driver = {
 void commit_start(BlockDriverState *bs, BlockDriverState *base,
                  BlockDriverState *top, int64_t speed,
                  BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
-                  void *opaque, Error **errp)
+                  void *opaque, const char *backing_file_str, Error **errp)
 {
    CommitBlockJob *s;
    BlockReopenQueue *reopen_queue = NULL;
@@ -244,6 +245,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
    s->base_flags          = orig_base_flags;
    s->orig_overlay_flags  = orig_overlay_flags;

+    s->backing_file_str = g_strdup(backing_file_str);
+
    s->on_error = on_error;
    s->common.co = qemu_coroutine_create(commit_run);

--- a/block/cow.c
+++ b/block/cow.c
@@ -332,7 +332,7 @@ static int cow_create(const char *filename, QemuOpts *opts, Error **errp)
    char *image_filename = NULL;
    Error *local_err = NULL;
    int ret;
-    BlockDriverState *cow_bs;
+    BlockDriverState *cow_bs = NULL;

    /* Read out options */
    image_sectors = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
@@ -344,7 +344,6 @@ static int cow_create(const char *filename, QemuOpts *opts, Error **errp)
        goto exit;
    }

-    cow_bs = NULL;
    ret = bdrv_open(&cow_bs, filename, NULL, NULL,
                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
    if (ret < 0) {
@@ -383,7 +382,9 @@ static int cow_create(const char *filename, QemuOpts *opts, Error **errp)

 exit:
    g_free(image_filename);
-    bdrv_unref(cow_bs);
+    if (cow_bs) {
+        bdrv_unref(cow_bs);
+    }
    return ret;
 }

@@ -414,6 +415,7 @@ static BlockDriver bdrv_cow = {
    .bdrv_close     = cow_close,
    .bdrv_create    = cow_create,
    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
+    .supports_backing       = true,

    .bdrv_read              = cow_co_read,
    .bdrv_write             = cow_co_write,
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1450,7 +1450,7 @@ static void iscsi_close(BlockDriverState *bs)
    memset(iscsilun, 0, sizeof(IscsiLun));
 }

-static int iscsi_refresh_limits(BlockDriverState *bs)
+static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    IscsiLun *iscsilun = bs->opaque;

@@ -1475,7 +1475,6 @@ static int iscsi_refresh_limits(BlockDriverState *bs)
    }
    bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
                                                 iscsilun);
-    return 0;
 }

 /* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
@@ -1510,7 +1509,8 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
    if (iscsilun->allocationmap != NULL) {
        g_free(iscsilun->allocationmap);
        iscsilun->allocationmap =
-            bitmap_new(DIV_ROUND_UP(bs->total_sectors,
+            bitmap_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
+                                                    iscsilun),
                                    iscsilun->cluster_sectors));
    }

--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -25,6 +25,8 @@
 */
 #define MAX_EVENTS 128

+#define MAX_QUEUED_IO  128
+
 struct qemu_laiocb {
    BlockDriverAIOCB common;
    struct qemu_laio_state *ctx;
@@ -36,9 +38,19 @@ struct qemu_laiocb {
    QLIST_ENTRY(qemu_laiocb) node;
 };

+typedef struct {
+    struct iocb *iocbs[MAX_QUEUED_IO];
+    int plugged;
+    unsigned int size;
+    unsigned int idx;
+} LaioQueue;
+
 struct qemu_laio_state {
    io_context_t ctx;
    EventNotifier e;
+
+    /* io queue for submit at batch */
+    LaioQueue io_q;
 };

 static inline ssize_t io_event_ret(struct io_event *ev)
@@ -135,6 +147,79 @@ static const AIOCBInfo laio_aiocb_info = {
    .cancel             = laio_cancel,
 };

+static void ioq_init(LaioQueue *io_q)
+{
+    io_q->size = MAX_QUEUED_IO;
+    io_q->idx = 0;
+    io_q->plugged = 0;
+}
+
+static int ioq_submit(struct qemu_laio_state *s)
+{
+    int ret, i = 0;
+    int len = s->io_q.idx;
+
+    do {
+        ret = io_submit(s->ctx, len, s->io_q.iocbs);
+    } while (i++ < 3 && ret == -EAGAIN);
+
+    /* empty io queue */
+    s->io_q.idx = 0;
+
+    if (ret < 0) {
+        i = 0;
+    } else {
+        i = ret;
+    }
+
+    for (; i < len; i++) {
+        struct qemu_laiocb *laiocb =
+            container_of(s->io_q.iocbs[i], struct qemu_laiocb, iocb);
+
+        laiocb->ret = (ret < 0) ? ret : -EIO;
+        qemu_laio_process_completion(s, laiocb);
+    }
+    return ret;
+}
+
+static void ioq_enqueue(struct qemu_laio_state *s, struct iocb *iocb)
+{
+    unsigned int idx = s->io_q.idx;
+
+    s->io_q.iocbs[idx++] = iocb;
+    s->io_q.idx = idx;
+
+    /* submit immediately if queue is full */
+    if (idx == s->io_q.size) {
+        ioq_submit(s);
+    }
+}
+
+void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
+{
+    struct qemu_laio_state *s = aio_ctx;
+
+    s->io_q.plugged++;
+}
+
+int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
+{
+    struct qemu_laio_state *s = aio_ctx;
+    int ret = 0;
+
+    assert(s->io_q.plugged > 0 || !unplug);
+
+    if (unplug && --s->io_q.plugged > 0) {
+        return 0;
+    }
+
+    if (s->io_q.idx > 0) {
+        ret = ioq_submit(s);
+    }
+
+    return ret;
+}
+
 BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque, int type)
@@ -168,8 +253,13 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
    }
    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));

-    if (io_submit(s->ctx, 1, &iocbs) < 0)
-        goto out_free_aiocb;
+    if (!s->io_q.plugged) {
+        if (io_submit(s->ctx, 1, &iocbs) < 0) {
+            goto out_free_aiocb;
+        }
+    } else {
+        ioq_enqueue(s, iocbs);
+    }
    return &laiocb->common;

 out_free_aiocb:
@@ -204,6 +294,8 @@ void *laio_init(void)
        goto out_close_efd;
    }

+    ioq_init(&s->io_q);
+
    return s;

 out_close_efd:
@@ -218,5 +310,10 @@ void laio_cleanup(void *s_)
    struct qemu_laio_state *s = s_;

    event_notifier_cleanup(&s->e);
+
+    if (io_destroy(s->ctx) != 0) {
+        fprintf(stderr, "%s: destroy AIO context %p failed\n",
+                        __func__, &s->ctx);
+    }
    g_free(s);
 }
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -32,6 +32,12 @@ typedef struct MirrorBlockJob {
    RateLimit limit;
    BlockDriverState *target;
    BlockDriverState *base;
+    /* The name of the graph node to replace */
+    char *replaces;
+    /* The BDS to replace */
+    BlockDriverState *to_replace;
+    /* Used to block operations on the drive-mirror-replace target */
+    Error *replace_blocker;
    bool is_none_mode;
    BlockdevOnError on_source_error, on_target_error;
    bool synced;
@@ -259,9 +265,11 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    next_sector = sector_num;
    while (nb_chunks-- > 0) {
        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
+        size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size;
+
        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
        s->buf_free_count--;
-        qemu_iovec_add(&op->qiov, buf, s->granularity);
+        qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));

        /* Advance the HBitmapIter in parallel, so that we do not examine
         * the same sector twice.
@@ -324,9 +332,18 @@ static void coroutine_fn mirror_run(void *opaque)
    }

    s->common.len = bdrv_getlength(bs);
-    if (s->common.len <= 0) {
+    if (s->common.len < 0) {
        ret = s->common.len;
        goto immediate_exit;
+    } else if (s->common.len == 0) {
+        /* Report BLOCK_JOB_READY and wait for complete. */
+        block_job_event_ready(&s->common);
+        s->synced = true;
+        while (!block_job_is_cancelled(&s->common) && !s->should_complete) {
+            block_job_yield(&s->common);
+        }
+        s->common.cancelled = false;
+        goto immediate_exit;
    }

    length = DIV_ROUND_UP(s->common.len, s->granularity);
@@ -491,10 +508,14 @@ immediate_exit:
    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
    bdrv_iostatus_disable(s->target);
    if (s->should_complete && ret == 0) {
-        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
-            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
+        BlockDriverState *to_replace = s->common.bs;
+        if (s->to_replace) {
+            to_replace = s->to_replace;
        }
-        bdrv_swap(s->target, s->common.bs);
+        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
+            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
+        }
+        bdrv_swap(s->target, to_replace);
        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
            /* drop the bs loop chain formed by the swap: break the loop then
             * trigger the unref from the top one */
@@ -503,6 +524,12 @@ immediate_exit:
            bdrv_unref(p);
        }
    }
+    if (s->to_replace) {
+        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+        error_free(s->replace_blocker);
+        bdrv_unref(s->to_replace);
+    }
+    g_free(s->replaces);
    bdrv_unref(s->target);
    block_job_completed(&s->common, ret);
 }
@@ -541,6 +568,20 @@ static void mirror_complete(BlockJob *job, Error **errp)
        return;
    }

+    /* check the target bs is not blocked and block all operations on it */
+    if (s->replaces) {
+        s->to_replace = check_to_replace_node(s->replaces, &local_err);
+        if (!s->to_replace) {
+            error_propagate(errp, local_err);
+            return;
+        }
+
+        error_setg(&s->replace_blocker,
+                   "block device is in use by block-job-complete");
+        bdrv_op_block_all(s->to_replace, s->replace_blocker);
+        bdrv_ref(s->to_replace);
+    }
+
    s->should_complete = true;
    block_job_resume(job);
 }
@@ -563,14 +604,15 @@ static const BlockJobDriver commit_active_job_driver = {
 };

 static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
-                            int64_t speed, int64_t granularity,
-                            int64_t buf_size,
-                            BlockdevOnError on_source_error,
-                            BlockdevOnError on_target_error,
-                            BlockDriverCompletionFunc *cb,
-                            void *opaque, Error **errp,
-                            const BlockJobDriver *driver,
-                            bool is_none_mode, BlockDriverState *base)
+                             const char *replaces,
+                             int64_t speed, int64_t granularity,
+                             int64_t buf_size,
+                             BlockdevOnError on_source_error,
+                             BlockdevOnError on_target_error,
+                             BlockDriverCompletionFunc *cb,
+                             void *opaque, Error **errp,
+                             const BlockJobDriver *driver,
+                             bool is_none_mode, BlockDriverState *base)
 {
    MirrorBlockJob *s;

@@ -601,6 +643,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
        return;
    }

+    s->replaces = g_strdup(replaces);
    s->on_source_error = on_source_error;
    s->on_target_error = on_target_error;
    s->target = target;
@@ -622,6 +665,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
 }

 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  const char *replaces,
                  int64_t speed, int64_t granularity, int64_t buf_size,
                  MirrorSyncMode mode, BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
@@ -633,7 +677,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,

    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
    base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
-    mirror_start_job(bs, target, speed, granularity, buf_size,
+    mirror_start_job(bs, target, replaces,
+                     speed, granularity, buf_size,
                     on_source_error, on_target_error, cb, opaque, errp,
                     &mirror_job_driver, is_none_mode, base);
 }
@@ -681,7 +726,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
    }

    bdrv_ref(base);
-    mirror_start_job(bs, base, speed, 0, 0,
+    mirror_start_job(bs, base, NULL, speed, 0, 0,
                     on_error, on_error, cb, opaque, &local_err,
                     &commit_active_job_driver, false, base);
    if (local_err) {
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -304,17 +304,27 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,

    qp = query_params_parse(uri->query);
    for (i = 0; i < qp->n; i++) {
+        unsigned long long val;
        if (!qp->p[i].value) {
            error_setg(errp, "Value for NFS parameter expected: %s",
                       qp->p[i].name);
            goto fail;
        }
-        if (!strncmp(qp->p[i].name, "uid", 3)) {
-            nfs_set_uid(client->context, atoi(qp->p[i].value));
-        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
-            nfs_set_gid(client->context, atoi(qp->p[i].value));
-        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
-            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
+        if (parse_uint_full(qp->p[i].value, &val, 0)) {
+            error_setg(errp, "Illegal value for NFS parameter: %s",
+                       qp->p[i].name);
+            goto fail;
+        }
+        if (!strcmp(qp->p[i].name, "uid")) {
+            nfs_set_uid(client->context, val);
+        } else if (!strcmp(qp->p[i].name, "gid")) {
+            nfs_set_gid(client->context, val);
+        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
+            nfs_set_tcp_syncnt(client->context, val);
+#ifdef LIBNFS_FEATURE_READAHEAD
+        } else if (!strcmp(qp->p[i].name, "readahead")) {
+            nfs_set_readahead(client->context, val);
+#endif
        } else {
            error_setg(errp, "Unknown NFS parameter name: %s",
                       qp->p[i].name);
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -293,7 +293,7 @@ void bdrv_query_info(BlockDriverState *bs,
    qapi_free_BlockInfo(info);
 }

-BlockStats *bdrv_query_stats(const BlockDriverState *bs)
+static BlockStats *bdrv_query_stats(const BlockDriverState *bs)
 {
    BlockStats *s;

@@ -360,7 +360,11 @@ BlockStatsList *qmp_query_blockstats(Error **errp)

     while ((bs = bdrv_next(bs))) {
        BlockStatsList *info = g_malloc0(sizeof(*info));
+        AioContext *ctx = bdrv_get_aio_context(bs);
+
+        aio_context_acquire(ctx);
        info->value = bdrv_query_stats(bs);
+        aio_context_release(ctx);

        *p_next = info;
        p_next = &info->next;
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -941,6 +941,7 @@ static BlockDriver bdrv_qcow = {
    .bdrv_reopen_prepare    = qcow_reopen_prepare,
    .bdrv_create            = qcow_create,
    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
+    .supports_backing       = true,

    .bdrv_co_readv          = qcow_co_readv,
    .bdrv_co_writev         = qcow_co_writev,
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -210,20 +210,31 @@ static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs,
 static void report_unsupported_feature(BlockDriverState *bs,
    Error **errp, Qcow2Feature *table, uint64_t mask)
 {
+    char *features = g_strdup("");
+    char *old;
+
    while (table && table->name[0] != '\0') {
        if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
-            if (mask & (1 << table->bit)) {
-                report_unsupported(bs, errp, "%.46s", table->name);
-                mask &= ~(1 << table->bit);
+            if (mask & (1ULL << table->bit)) {
+                old = features;
+                features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "",
+                                           table->name);
+                g_free(old);
+                mask &= ~(1ULL << table->bit);
            }
        }
        table++;
    }

    if (mask) {
-        report_unsupported(bs, errp, "Unknown incompatible feature: %" PRIx64,
-                           mask);
+        old = features;
+        features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64,
+                                   old, *old ? ", " : "", mask);
+        g_free(old);
    }
+
+    report_unsupported(bs, errp, "%s", features);
+    g_free(features);
 }

 /*
@@ -855,13 +866,11 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    return ret;
 }

-static int qcow2_refresh_limits(BlockDriverState *bs)
+static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BDRVQcowState *s = bs->opaque;

    bs->bl.write_zeroes_alignment = s->cluster_sectors;
-
-    return 0;
 }

 static int qcow2_set_key(BlockDriverState *bs, const char *key)
@@ -1020,11 +1029,20 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
                    sector_num, cur_nr_sectors);
                if (n1 > 0) {
+                    QEMUIOVector local_qiov;
+
+                    qemu_iovec_init(&local_qiov, hd_qiov.niov);
+                    qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
+                                      n1 * BDRV_SECTOR_SIZE);
+
                    BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                    qemu_co_mutex_unlock(&s->lock);
                    ret = bdrv_co_readv(bs->backing_hd, sector_num,
-                                        n1, &hd_qiov);
+                                        n1, &local_qiov);
                    qemu_co_mutex_lock(&s->lock);
+
+                    qemu_iovec_destroy(&local_qiov);
+
                    if (ret < 0) {
                        goto fail;
                    }
@@ -2418,6 +2436,7 @@ static BlockDriver bdrv_qcow2 = {
    .bdrv_save_vmstate    = qcow2_save_vmstate,
    .bdrv_load_vmstate    = qcow2_load_vmstate,

+    .supports_backing           = true,
    .bdrv_change_backing_file   = qcow2_change_backing_file,

    .bdrv_refresh_limits        = qcow2_refresh_limits,
--- a/block/qed.c
+++ b/block/qed.c
@@ -528,13 +528,11 @@ out:
    return ret;
 }

-static int bdrv_qed_refresh_limits(BlockDriverState *bs)
+static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BDRVQEDState *s = bs->opaque;

    bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
-
-    return 0;
 }

 /* We have nothing to do for QED reopen, stubs just return
@@ -567,7 +565,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
 static int qed_create(const char *filename, uint32_t cluster_size,
                      uint64_t image_size, uint32_t table_size,
                      const char *backing_file, const char *backing_fmt,
-                      Error **errp)
+                      QemuOpts *opts, Error **errp)
 {
    QEDHeader header = {
        .magic = QED_MAGIC,
@@ -586,7 +584,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    int ret = 0;
    BlockDriverState *bs;

-    ret = bdrv_create_file(filename, NULL, &local_err);
+    ret = bdrv_create_file(filename, opts, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        return ret;
@@ -682,7 +680,7 @@ static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    ret = qed_create(filename, cluster_size, image_size, table_size,
-                     backing_file, backing_fmt, errp);
+                     backing_file, backing_fmt, opts, errp);

 finish:
    g_free(backing_file);
@@ -761,17 +759,19 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
 /**
 * Read from the backing file or zero-fill if no backing file
 *
- * @s:          QED state
- * @pos:        Byte position in device
- * @qiov:       Destination I/O vector
- * @cb:         Completion function
- * @opaque:     User data for completion function
+ * @s:              QED state
+ * @pos:            Byte position in device
+ * @qiov:           Destination I/O vector
+ * @backing_qiov:   Possibly shortened copy of qiov, to be allocated here
+ * @cb:             Completion function
+ * @opaque:         User data for completion function
 *
 * This function reads qiov->size bytes starting at pos from the backing file.
 * If there is no backing file then zeroes are read.
 */
 static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
                                  QEMUIOVector *qiov,
+                                  QEMUIOVector **backing_qiov,
                                  BlockDriverCompletionFunc *cb, void *opaque)
 {
    uint64_t backing_length = 0;
@@ -804,15 +804,21 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
    /* If the read straddles the end of the backing file, shorten it */
    size = MIN((uint64_t)backing_length - pos, qiov->size);

+    assert(*backing_qiov == NULL);
+    *backing_qiov = g_new(QEMUIOVector, 1);
+    qemu_iovec_init(*backing_qiov, qiov->niov);
+    qemu_iovec_concat(*backing_qiov, qiov, 0, size);
+
    BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
    bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
-                   qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
+                   *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
 }

 typedef struct {
    GenericCB gencb;
    BDRVQEDState *s;
    QEMUIOVector qiov;
+    QEMUIOVector *backing_qiov;
    struct iovec iov;
    uint64_t offset;
 } CopyFromBackingFileCB;
@@ -829,6 +835,12 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
    CopyFromBackingFileCB *copy_cb = opaque;
    BDRVQEDState *s = copy_cb->s;

+    if (copy_cb->backing_qiov) {
+        qemu_iovec_destroy(copy_cb->backing_qiov);
+        g_free(copy_cb->backing_qiov);
+        copy_cb->backing_qiov = NULL;
+    }
+
    if (ret) {
        qed_copy_from_backing_file_cb(copy_cb, ret);
        return;
@@ -866,11 +878,12 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
    copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
    copy_cb->s = s;
    copy_cb->offset = offset;
+    copy_cb->backing_qiov = NULL;
    copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
    copy_cb->iov.iov_len = len;
    qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);

-    qed_read_backing_file(s, pos, &copy_cb->qiov,
+    qed_read_backing_file(s, pos, &copy_cb->qiov, &copy_cb->backing_qiov,
                          qed_copy_from_backing_file_write, copy_cb);
 }

@@ -1313,7 +1326,7 @@ static void qed_aio_read_data(void *opaque, int ret,
        return;
    } else if (ret != QED_CLUSTER_FOUND) {
        qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
-                              qed_aio_next_io, acb);
+                              &acb->backing_qiov, qed_aio_next_io, acb);
        return;
    }

@@ -1339,6 +1352,12 @@ static void qed_aio_next_io(void *opaque, int ret)

    trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);

+    if (acb->backing_qiov) {
+        qemu_iovec_destroy(acb->backing_qiov);
+        g_free(acb->backing_qiov);
+        acb->backing_qiov = NULL;
+    }
+
    /* Handle I/O error */
    if (ret) {
        qed_aio_complete(acb, ret);
@@ -1378,6 +1397,7 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
    acb->qiov_offset = 0;
    acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
    acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
+    acb->backing_qiov = NULL;
    acb->request.l2_table = NULL;
    qemu_iovec_init(&acb->cur_qiov, qiov->niov);

@@ -1652,6 +1672,7 @@ static BlockDriver bdrv_qed = {
    .format_name              = "qed",
    .instance_size            = sizeof(BDRVQEDState),
    .create_opts              = &qed_create_opts,
+    .supports_backing         = true,

    .bdrv_probe               = bdrv_qed_probe,
    .bdrv_rebind              = bdrv_qed_rebind,
--- a/block/qed.h
+++ b/block/qed.h
@@ -142,6 +142,7 @@ typedef struct QEDAIOCB {

    /* Current cluster scatter-gather list */
    QEMUIOVector cur_qiov;
+    QEMUIOVector *backing_qiov;
    uint64_t cur_pos;               /* position on block device, in bytes */
    uint64_t cur_cluster;           /* cluster offset in image file */
    unsigned int cur_nclusters;     /* number of clusters being accessed */
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -23,6 +23,7 @@

 #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
 #define QUORUM_OPT_BLKVERIFY      "blkverify"
+#define QUORUM_OPT_REWRITE        "rewrite-corrupted"

 /* This union holds a vote hash value */
 typedef union QuorumVoteValue {
@@ -70,6 +71,9 @@ typedef struct BDRVQuorumState {
                            * It is useful to debug other block drivers by
                            * comparing them with a reference one.
                            */
+    bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
+                            * block if Quorum is reached.
+                            */
 } BDRVQuorumState;

 typedef struct QuorumAIOCB QuorumAIOCB;
@@ -105,13 +109,17 @@ struct QuorumAIOCB {
    int count;                  /* number of completed AIOCB */
    int success_count;          /* number of successfully completed AIOCB */

+    int rewrite_count;          /* number of replica to rewrite: count down to
+                                 * zero once writes are fired
+                                 */
+
    QuorumVotes votes;

    bool is_read;
    int vote_ret;
 };

-static void quorum_vote(QuorumAIOCB *acb);
+static bool quorum_vote(QuorumAIOCB *acb);

 static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
 {
@@ -183,6 +191,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
    acb->count = 0;
    acb->success_count = 0;
+    acb->rewrite_count = 0;
    acb->votes.compare = quorum_sha256_compare;
    QLIST_INIT(&acb->votes.vote_list);
    acb->is_read = false;
@@ -232,11 +241,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
    return false;
 }

+static void quorum_rewrite_aio_cb(void *opaque, int ret)
+{
+    QuorumAIOCB *acb = opaque;
+
+    /* one less rewrite to do */
+    acb->rewrite_count--;
+
+    /* wait until all rewrite callbacks have completed */
+    if (acb->rewrite_count) {
+        return;
+    }
+
+    quorum_aio_finalize(acb);
+}
+
 static void quorum_aio_cb(void *opaque, int ret)
 {
    QuorumChildRequest *sacb = opaque;
    QuorumAIOCB *acb = sacb->parent;
    BDRVQuorumState *s = acb->common.bs->opaque;
+    bool rewrite = false;

    sacb->ret = ret;
    acb->count++;
@@ -253,12 +278,15 @@ static void quorum_aio_cb(void *opaque, int ret)

    /* Do the vote on read */
    if (acb->is_read) {
-        quorum_vote(acb);
+        rewrite = quorum_vote(acb);
    } else {
        quorum_has_too_much_io_failed(acb);
    }

-    quorum_aio_finalize(acb);
+    /* if no rewrite is done the code will finish right away */
+    if (!rewrite) {
+        quorum_aio_finalize(acb);
+    }
 }

 static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -278,6 +306,43 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
    }
 }

+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+                                        QuorumVoteValue *value)
+{
+    QuorumVoteVersion *version;
+    QuorumVoteItem *item;
+    int count = 0;
+
+    /* first count the number of bad versions: done first to avoid concurrency
+     * issues.
+     */
+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+        if (acb->votes.compare(&version->value, value)) {
+            continue;
+        }
+        QLIST_FOREACH(item, &version->items, next) {
+            count++;
+        }
+    }
+
+    /* quorum_rewrite_aio_cb will count down this to zero */
+    acb->rewrite_count = count;
+
+    /* now fire the correcting rewrites */
+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+        if (acb->votes.compare(&version->value, value)) {
+            continue;
+        }
+        QLIST_FOREACH(item, &version->items, next) {
+            bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
+                            acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+        }
+    }
+
+    /* return true if any rewrite is done else false */
+    return count;
+}
+
 static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
 {
    int i;
@@ -468,16 +533,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
    return ret;
 }

-static void quorum_vote(QuorumAIOCB *acb)
+static bool quorum_vote(QuorumAIOCB *acb)
 {
    bool quorum = true;
+    bool rewrite = false;
    int i, j, ret;
    QuorumVoteValue hash;
    BDRVQuorumState *s = acb->common.bs->opaque;
    QuorumVoteVersion *winner;

    if (quorum_has_too_much_io_failed(acb)) {
-        return;
+        return false;
    }

    /* get the index of the first successful read */
@@ -505,7 +571,7 @@ static void quorum_vote(QuorumAIOCB *acb)
    /* Every successful read agrees */
    if (quorum) {
        quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
-        return;
+        return false;
    }

    /* compute hashes for each successful read, also store indexes */
@@ -538,9 +604,15 @@ static void quorum_vote(QuorumAIOCB *acb)
    /* some versions are bad print them */
    quorum_report_bad_versions(s, acb, &winner->value);

+    /* corruption correction is enabled */
+    if (s->rewrite_corrupted) {
+        rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+    }
+
 free_exit:
    /* free lists */
    quorum_free_vote_list(&acb->votes);
+    return rewrite;
 }

 static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
@@ -705,6 +777,11 @@ static QemuOptsList quorum_runtime_opts = {
            .type = QEMU_OPT_BOOL,
            .help = "Trigger block verify mode if set",
        },
+        {
+            .name = QUORUM_OPT_REWRITE,
+            .type = QEMU_OPT_BOOL,
+            .help = "Rewrite corrupted block on read quorum",
+        },
        { /* end of list */ }
    },
 };
@@ -766,6 +843,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
                "and using two files with vote_threshold=2\n");
    }

+    s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
+    if (s->rewrite_corrupted && s->is_blkverify) {
+        error_setg(&local_err,
+                   "rewrite-corrupted=on cannot be used with blkverify=on");
+        ret = -EINVAL;
+        goto exit;
+    }
+
    /* allocate the children BlockDriverState array */
    s->bs = g_new0(BlockDriverState *, s->num_children);
    opened = g_new0(bool, s->num_children);
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -40,6 +40,8 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        BlockDriverCompletionFunc *cb, void *opaque, int type);
 void laio_detach_aio_context(void *s, AioContext *old_context);
 void laio_attach_aio_context(void *s, AioContext *new_context);
+void laio_io_plug(BlockDriverState *bs, void *aio_ctx);
+int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
 #endif

 #ifdef _WIN32
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -55,6 +55,9 @@
 #include <linux/cdrom.h>
 #include <linux/fd.h>
 #include <linux/fs.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
+#endif
 #endif
 #ifdef CONFIG_FIEMAP
 #include <linux/fiemap.h>
@@ -218,7 +221,7 @@ static int raw_normalize_devicepath(const char **filename)
 }
 #endif

-static void raw_probe_alignment(BlockDriverState *bs)
+static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
 {
    BDRVRawState *s = bs->opaque;
    char *buf;
@@ -237,24 +240,24 @@ static void raw_probe_alignment(BlockDriverState *bs)
    s->buf_align = 0;

 #ifdef BLKSSZGET
-    if (ioctl(s->fd, BLKSSZGET, &sector_size) >= 0) {
+    if (ioctl(fd, BLKSSZGET, &sector_size) >= 0) {
        bs->request_alignment = sector_size;
    }
 #endif
 #ifdef DKIOCGETBLOCKSIZE
-    if (ioctl(s->fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
+    if (ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
        bs->request_alignment = sector_size;
    }
 #endif
 #ifdef DIOCGSECTORSIZE
-    if (ioctl(s->fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
+    if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
        bs->request_alignment = sector_size;
    }
 #endif
 #ifdef CONFIG_XFS
    if (s->is_xfs) {
        struct dioattr da;
-        if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
+        if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) {
            bs->request_alignment = da.d_miniosz;
            /* The kernel returns wrong information for d_mem */
            /* s->buf_align = da.d_mem; */
@@ -267,7 +270,7 @@ static void raw_probe_alignment(BlockDriverState *bs)
        size_t align;
        buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
-            if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
+            if (pread(fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
                s->buf_align = align;
                break;
            }
@@ -279,13 +282,18 @@ static void raw_probe_alignment(BlockDriverState *bs)
        size_t align;
        buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
-            if (pread(s->fd, buf, align, 0) >= 0) {
+            if (pread(fd, buf, align, 0) >= 0) {
                bs->request_alignment = align;
                break;
            }
        }
        qemu_vfree(buf);
    }
+
+    if (!s->buf_align || !bs->request_alignment) {
+        error_setg(errp, "Could not find working O_DIRECT alignment. "
+                         "Try cache.direct=off.");
+    }
 }

 static void raw_parse_flags(int bdrv_flags, int *open_flags)
@@ -502,6 +510,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
    BDRVRawState *s;
    BDRVRawReopenState *raw_s;
    int ret = 0;
+    Error *local_err = NULL;

    assert(state != NULL);
    assert(state->bs != NULL);
@@ -574,6 +583,19 @@ static int raw_reopen_prepare(BDRVReopenState *state,
            ret = -1;
        }
    }
+
+    /* Fail already reopen_prepare() if we can't get a working O_DIRECT
+     * alignment with the new fd. */
+    if (raw_s->fd != -1) {
+        raw_probe_alignment(state->bs, raw_s->fd, &local_err);
+        if (local_err) {
+            qemu_close(raw_s->fd);
+            raw_s->fd = -1;
+            error_propagate(errp, local_err);
+            ret = -EINVAL;
+        }
+    }
+
    return ret;
 }

@@ -612,14 +634,12 @@ static void raw_reopen_abort(BDRVReopenState *state)
    state->opaque = NULL;
 }

-static int raw_refresh_limits(BlockDriverState *bs)
+static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BDRVRawState *s = bs->opaque;

-    raw_probe_alignment(bs);
+    raw_probe_alignment(bs, s->fd, errp);
    bs->bl.opt_mem_alignment = s->buf_align;
-
-    return 0;
 }

 static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
@@ -727,6 +747,15 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
        }
        if (len == -1 && errno == EINTR) {
            continue;
+        } else if (len == -1 && errno == EINVAL &&
+                   (aiocb->bs->open_flags & BDRV_O_NOCACHE) &&
+                   !(aiocb->aio_type & QEMU_AIO_WRITE) &&
+                   offset > 0) {
+            /* O_DIRECT pread() may fail with EINVAL when offset is unaligned
+             * after a short read.  Assume that O_DIRECT short reads only occur
+             * at EOF.  Therefore this is a short read, not an I/O error.
+             */
+            break;
        } else if (len == -1) {
            offset = -errno;
            break;
@@ -787,6 +816,7 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
            p += aiocb->aio_iov[i].iov_len;
        }
+        assert(p - buf == aiocb->aio_nbytes);
    }

    nbytes = handle_aiocb_rw_linear(aiocb, buf);
@@ -801,9 +831,11 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
                copy = aiocb->aio_iov[i].iov_len;
            }
            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
+            assert(count >= copy);
            p     += copy;
            count -= copy;
        }
+        assert(count == 0);
    }
    qemu_vfree(buf);

@@ -990,12 +1022,14 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
    acb->aio_type = type;
    acb->aio_fildes = fd;

+    acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
+    acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
+
    if (qiov) {
        acb->aio_iov = qiov->iov;
        acb->aio_niov = qiov->niov;
+        assert(qiov->size == acb->aio_nbytes);
    }
-    acb->aio_nbytes = nb_sectors * 512;
-    acb->aio_offset = sector_num * 512;

    trace_paio_submit_co(sector_num, nb_sectors, type);
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
@@ -1013,12 +1047,14 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
    acb->aio_type = type;
    acb->aio_fildes = fd;

+    acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
+    acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
+
    if (qiov) {
        acb->aio_iov = qiov->iov;
        acb->aio_niov = qiov->niov;
+        assert(qiov->size == acb->aio_nbytes);
    }
-    acb->aio_nbytes = nb_sectors * 512;
-    acb->aio_offset = sector_num * 512;

    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
@@ -1054,6 +1090,36 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
                       cb, opaque, type);
 }

+static void raw_aio_plug(BlockDriverState *bs)
+{
+#ifdef CONFIG_LINUX_AIO
+    BDRVRawState *s = bs->opaque;
+    if (s->use_aio) {
+        laio_io_plug(bs, s->aio_ctx);
+    }
+#endif
+}
+
+static void raw_aio_unplug(BlockDriverState *bs)
+{
+#ifdef CONFIG_LINUX_AIO
+    BDRVRawState *s = bs->opaque;
+    if (s->use_aio) {
+        laio_io_unplug(bs, s->aio_ctx, true);
+    }
+#endif
+}
+
+static void raw_aio_flush_io_queue(BlockDriverState *bs)
+{
+#ifdef CONFIG_LINUX_AIO
+    BDRVRawState *s = bs->opaque;
+    if (s->use_aio) {
+        laio_io_unplug(bs, s->aio_ctx, false);
+    }
+#endif
+}
+
 static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque)
@@ -1130,12 +1196,12 @@ static int64_t raw_getlength(BlockDriverState *bs)
    struct stat st;

    if (fstat(fd, &st))
-        return -1;
+        return -errno;
    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
        struct disklabel dl;

        if (ioctl(fd, DIOCGDINFO, &dl))
-            return -1;
+            return -errno;
        return (uint64_t)dl.d_secsize *
            dl.d_partitions[DISKPART(st.st_rdev)].p_size;
    } else
@@ -1149,7 +1215,7 @@ static int64_t raw_getlength(BlockDriverState *bs)
    struct stat st;

    if (fstat(fd, &st))
-        return -1;
+        return -errno;
    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
        struct dkwedge_info dkw;

@@ -1159,7 +1225,7 @@ static int64_t raw_getlength(BlockDriverState *bs)
            struct disklabel dl;

            if (ioctl(fd, DIOCGDINFO, &dl))
-                return -1;
+                return -errno;
            return (uint64_t)dl.d_secsize *
                dl.d_partitions[DISKPART(st.st_rdev)].p_size;
        }
@@ -1172,6 +1238,7 @@ static int64_t raw_getlength(BlockDriverState *bs)
    BDRVRawState *s = bs->opaque;
    struct dk_minfo minfo;
    int ret;
+    int64_t size;

    ret = fd_open(bs);
    if (ret < 0) {
@@ -1190,7 +1257,11 @@ static int64_t raw_getlength(BlockDriverState *bs)
     * There are reports that lseek on some devices fails, but
     * irc discussion said that contingency on contingency was overkill.
     */
-    return lseek(s->fd, 0, SEEK_END);
+    size = lseek(s->fd, 0, SEEK_END);
+    if (size < 0) {
+        return -errno;
+    }
+    return size;
 }
 #elif defined(CONFIG_BSD)
 static int64_t raw_getlength(BlockDriverState *bs)
@@ -1228,6 +1299,9 @@ again:
        size = LLONG_MAX;
 #else
        size = lseek(fd, 0LL, SEEK_END);
+        if (size < 0) {
+            return -errno;
+        }
 #endif
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
        switch(s->type) {
@@ -1244,6 +1318,9 @@ again:
 #endif
    } else {
        size = lseek(fd, 0, SEEK_END);
+        if (size < 0) {
+            return -errno;
+        }
    }
    return size;
 }
@@ -1252,13 +1329,18 @@ static int64_t raw_getlength(BlockDriverState *bs)
 {
    BDRVRawState *s = bs->opaque;
    int ret;
+    int64_t size;

    ret = fd_open(bs);
    if (ret < 0) {
        return ret;
    }

-    return lseek(s->fd, 0, SEEK_END);
+    size = lseek(s->fd, 0, SEEK_END);
+    if (size < 0) {
+        return -errno;
+    }
+    return size;
 }
 #endif

@@ -1278,12 +1360,14 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
    int fd;
    int result = 0;
    int64_t total_size = 0;
+    bool nocow = false;

    strstart(filename, "file:", &filename);

    /* Read out options */
    total_size =
        qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
+    nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false);

    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
                   0644);
@@ -1291,6 +1375,21 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
        result = -errno;
        error_setg_errno(errp, -result, "Could not create file");
    } else {
+        if (nocow) {
+#ifdef __linux__
+            /* Set NOCOW flag to solve performance issue on fs like btrfs.
+             * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value
+             * will be ignored since any failure of this operation should not
+             * block the left work.
+             */
+            int attr;
+            if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+                attr |= FS_NOCOW_FL;
+                ioctl(fd, FS_IOC_SETFLAGS, &attr);
+            }
+#endif
+        }
+
        if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
            result = -errno;
            error_setg_errno(errp, -result, "Could not resize file");
@@ -1477,6 +1576,11 @@ static QemuOptsList raw_create_opts = {
            .type = QEMU_OPT_SIZE,
            .help = "Virtual disk size"
        },
+        {
+            .name = BLOCK_OPT_NOCOW,
+            .type = QEMU_OPT_BOOL,
+            .help = "Turn off copy-on-write (valid only on btrfs)"
+        },
        { /* end of list */ }
    }
 };
@@ -1503,6 +1607,9 @@ static BlockDriver bdrv_file = {
    .bdrv_aio_flush = raw_aio_flush,
    .bdrv_aio_discard = raw_aio_discard,
    .bdrv_refresh_limits = raw_refresh_limits,
+    .bdrv_io_plug = raw_aio_plug,
+    .bdrv_io_unplug = raw_aio_unplug,
+    .bdrv_flush_io_queue = raw_aio_flush_io_queue,

    .bdrv_truncate = raw_truncate,
    .bdrv_getlength = raw_getlength,
@@ -1902,6 +2009,9 @@ static BlockDriver bdrv_host_device = {
    .bdrv_aio_flush	= raw_aio_flush,
    .bdrv_aio_discard   = hdev_aio_discard,
    .bdrv_refresh_limits = raw_refresh_limits,
+    .bdrv_io_plug = raw_aio_plug,
+    .bdrv_io_unplug = raw_aio_unplug,
+    .bdrv_flush_io_queue = raw_aio_flush_io_queue,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength	= raw_getlength,
@@ -2047,6 +2157,9 @@ static BlockDriver bdrv_host_floppy = {
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
    .bdrv_refresh_limits = raw_refresh_limits,
+    .bdrv_io_plug = raw_aio_plug,
+    .bdrv_io_unplug = raw_aio_unplug,
+    .bdrv_flush_io_queue = raw_aio_flush_io_queue,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength      = raw_getlength,
@@ -2175,6 +2288,9 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
    .bdrv_refresh_limits = raw_refresh_limits,
+    .bdrv_io_plug = raw_aio_plug,
+    .bdrv_io_unplug = raw_aio_unplug,
+    .bdrv_flush_io_queue = raw_aio_flush_io_queue,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength      = raw_getlength,
@@ -2309,6 +2425,9 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
    .bdrv_refresh_limits = raw_refresh_limits,
+    .bdrv_io_plug = raw_aio_plug,
+    .bdrv_io_unplug = raw_aio_unplug,
+    .bdrv_flush_io_queue = raw_aio_flush_io_queue,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength      = raw_getlength,
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -94,10 +94,9 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    return bdrv_get_info(bs->file, bdi);
 }

-static int raw_refresh_limits(BlockDriverState *bs)
+static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    bs->bl = bs->file->bl;
-    return 0;
 }

 static int raw_truncate(BlockDriverState *bs, int64_t offset)
--- a/block/stream.c
+++ b/block/stream.c
@@ -32,7 +32,7 @@ typedef struct StreamBlockJob {
    RateLimit limit;
    BlockDriverState *base;
    BlockdevOnError on_error;
-    char backing_file_id[1024];
+    char *backing_file_str;
 } StreamBlockJob;

 static int coroutine_fn stream_populate(BlockDriverState *bs,
@@ -76,7 +76,7 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
        bdrv_unref(unused);
    }

-    bdrv_refresh_limits(top);
+    bdrv_refresh_limits(top, NULL);
 }

 static void coroutine_fn stream_run(void *opaque)
@@ -186,7 +186,7 @@ wait:
    if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
        const char *base_id = NULL, *base_fmt = NULL;
        if (base) {
-            base_id = s->backing_file_id;
+            base_id = s->backing_file_str;
            if (base->drv) {
                base_fmt = base->drv->format_name;
            }
@@ -196,6 +196,7 @@ wait:
    }

    qemu_vfree(buf);
+    g_free(s->backing_file_str);
    block_job_completed(&s->common, ret);
 }

@@ -217,7 +218,7 @@ static const BlockJobDriver stream_job_driver = {
 };

 void stream_start(BlockDriverState *bs, BlockDriverState *base,
-                  const char *base_id, int64_t speed,
+                  const char *backing_file_str, int64_t speed,
                  BlockdevOnError on_error,
                  BlockDriverCompletionFunc *cb,
                  void *opaque, Error **errp)
@@ -237,9 +238,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
    }

    s->base = base;
-    if (base_id) {
-        pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id);
-    }
+    s->backing_file_str = g_strdup(backing_file_str);

    s->on_error = on_error;
    s->common.co = qemu_coroutine_create(stream_run);
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -53,6 +53,13 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
+#endif
+#endif

 #if defined(CONFIG_UUID)
 #include <uuid/uuid.h>
@@ -683,6 +690,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    VdiHeader header;
    size_t i;
    size_t bmap_size;
+    bool nocow = false;

    logout("\n");

@@ -699,6 +707,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
        image_type = VDI_TYPE_STATIC;
    }
 #endif
+    nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);

    if (bytes > VDI_DISK_SIZE_MAX) {
        result = -ENOTSUP;
@@ -716,6 +725,21 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
        goto exit;
    }

+    if (nocow) {
+#ifdef __linux__
+        /* Set NOCOW flag to solve performance issue on fs like btrfs.
+         * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
+         * be ignored since any failure of this operation should not block the
+         * left work.
+         */
+        int attr;
+        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+            attr |= FS_NOCOW_FL;
+            ioctl(fd, FS_IOC_SETFLAGS, &attr);
+        }
+#endif
+    }
+
    /* We need enough blocks to store the given disk size,
       so always round up. */
    blocks = (bytes + block_size - 1) / block_size;
@@ -818,6 +842,11 @@ static QemuOptsList vdi_create_opts = {
            .def_value_str = "off"
        },
 #endif
+        {
+            .name = BLOCK_OPT_NOCOW,
+            .type = QEMU_OPT_BOOL,
+            .help = "Turn off copy-on-write (valid only on btrfs)"
+        },
        /* TODO: An additional option to set UUID values might be useful. */
        { /* end of list */ }
    }
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -938,7 +938,7 @@ fail:
 }


-static int vmdk_refresh_limits(BlockDriverState *bs)
+static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BDRVVmdkState *s = bs->opaque;
    int i;
@@ -950,8 +950,6 @@ static int vmdk_refresh_limits(BlockDriverState *bs)
                    s->extents[i].cluster_sectors);
        }
    }
-
-    return 0;
 }

 static int get_whole_cluster(BlockDriverState *bs,
@@ -1529,7 +1527,7 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,

 static int vmdk_create_extent(const char *filename, int64_t filesize,
                              bool flat, bool compress, bool zeroed_grain,
-                              Error **errp)
+                              QemuOpts *opts, Error **errp)
 {
    int ret, i;
    BlockDriverState *bs = NULL;
@@ -1539,7 +1537,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    uint32_t *gd_buf = NULL;
    int gd_buf_size;

-    ret = bdrv_create_file(filename, NULL, &local_err);
+    ret = bdrv_create_file(filename, opts, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        goto exit;
@@ -1845,7 +1843,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
                path, desc_filename);

        if (vmdk_create_extent(ext_filename, size,
-                               flat, compress, zeroed_grain, errp)) {
+                               flat, compress, zeroed_grain, opts, errp)) {
            ret = -EINVAL;
            goto exit;
        }
@@ -2180,6 +2178,7 @@ static BlockDriver bdrv_vmdk = {
    .bdrv_detach_aio_context      = vmdk_detach_aio_context,
    .bdrv_attach_aio_context      = vmdk_attach_aio_context,

+    .supports_backing             = true,
    .create_opts                  = &vmdk_create_opts,
 };

--- a/block/vpc.c
+++ b/block/vpc.c
@@ -29,6 +29,13 @@
 #if defined(CONFIG_UUID)
 #include <uuid/uuid.h>
 #endif
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
+#endif
+#endif

 /**************************************************************/

@@ -751,6 +758,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
    int64_t total_size;
    int disk_type;
    int ret = -EIO;
+    bool nocow = false;

    /* Read out options */
    total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
@@ -767,6 +775,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
    } else {
        disk_type = VHD_DYNAMIC;
    }
+    nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);

    /* Create the file */
    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
@@ -775,6 +784,21 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
        goto out;
    }

+    if (nocow) {
+#ifdef __linux__
+        /* Set NOCOW flag to solve performance issue on fs like btrfs.
+         * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
+         * be ignored since any failure of this operation should not block the
+         * left work.
+         */
+        int attr;
+        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+            attr |= FS_NOCOW_FL;
+            ioctl(fd, FS_IOC_SETFLAGS, &attr);
+        }
+#endif
+    }
+
    /*
     * Calculate matching total_size and geometry. Increase the number of
     * sectors requested until we get enough (or fail). This ensures that
@@ -884,6 +908,11 @@ static QemuOptsList vpc_create_opts = {
                "Type of virtual hard disk format. Supported formats are "
                "{dynamic (default) | fixed} "
        },
+        {
+            .name = BLOCK_OPT_NOCOW,
+            .type = QEMU_OPT_BOOL,
+            .help = "Turn off copy-on-write (valid only on btrfs)"
+        },
        { /* end of list */ }
    }
 };
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -28,6 +28,7 @@ static void nbd_accept(void *opaque)

    int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len);
    if (fd >= 0 && !nbd_client_new(NULL, fd, nbd_client_put)) {
+        shutdown(fd, 2);
        close(fd);
    }
 }
@@ -91,6 +92,10 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
        return;
    }
+    if (!bdrv_is_inserted(bs)) {
+        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+        return;
+    }

    if (!has_writable) {
        writable = false;
--- a/blockdev.c
+++ b/blockdev.c
@@ -1819,6 +1819,11 @@ void qmp_block_resize(bool has_device, const char *device,
        return;
    }

+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
+        error_set(errp, QERR_DEVICE_IN_USE, device);
+        return;
+    }
+
    /* complete all in-flight operations before resizing the device */
    bdrv_drain_all();

@@ -1866,14 +1871,17 @@ static void block_job_cb(void *opaque, int ret)
    bdrv_put_ref_bh_schedule(bs);
 }

-void qmp_block_stream(const char *device, bool has_base,
-                      const char *base, bool has_speed, int64_t speed,
+void qmp_block_stream(const char *device,
+                      bool has_base, const char *base,
+                      bool has_backing_file, const char *backing_file,
+                      bool has_speed, int64_t speed,
                      bool has_on_error, BlockdevOnError on_error,
                      Error **errp)
 {
    BlockDriverState *bs;
    BlockDriverState *base_bs = NULL;
    Error *local_err = NULL;
+    const char *base_name = NULL;

    if (!has_on_error) {
        on_error = BLOCKDEV_ON_ERROR_REPORT;
@@ -1889,15 +1897,27 @@ void qmp_block_stream(const char *device, bool has_base,
        return;
    }

-    if (base) {
+    if (has_base) {
        base_bs = bdrv_find_backing_image(bs, base);
        if (base_bs == NULL) {
            error_set(errp, QERR_BASE_NOT_FOUND, base);
            return;
        }
+        base_name = base;
    }

-    stream_start(bs, base_bs, base, has_speed ? speed : 0,
+    /* if we are streaming the entire chain, the result will have no backing
+     * file, and specifying one is therefore an error */
+    if (base_bs == NULL && has_backing_file) {
+        error_setg(errp, "backing file specified, but streaming the "
+                         "entire chain");
+        return;
+    }
+
+    /* backing_file string overrides base bs filename */
+    base_name = has_backing_file ? backing_file : base_name;
+
+    stream_start(bs, base_bs, base_name, has_speed ? speed : 0,
                 on_error, block_job_cb, bs, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
@@ -1908,7 +1928,9 @@ void qmp_block_stream(const char *device, bool has_base,
 }

 void qmp_block_commit(const char *device,
-                      bool has_base, const char *base, const char *top,
+                      bool has_base, const char *base,
+                      bool has_top, const char *top,
+                      bool has_backing_file, const char *backing_file,
                      bool has_speed, int64_t speed,
                      Error **errp)
 {
@@ -1927,6 +1949,11 @@ void qmp_block_commit(const char *device,
    /* drain all i/o before commits */
    bdrv_drain_all();

+    /* Important Note:
+     *  libvirt relies on the DeviceNotFound error class in order to probe for
+     *  live commit feature versions; for this to work, we must make sure to
+     *  perform the device lookup before any generic errors that may occur in a
+     *  scenario in which all optional arguments are omitted. */
    bs = bdrv_find(device);
    if (!bs) {
        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
@@ -1940,7 +1967,7 @@ void qmp_block_commit(const char *device,
    /* default top_bs is the active layer */
    top_bs = bs;

-    if (top) {
+    if (has_top && top) {
        if (strcmp(bs->filename, top) != 0) {
            top_bs = bdrv_find_backing_image(bs, top);
        }
@@ -1962,12 +1989,23 @@ void qmp_block_commit(const char *device,
        return;
    }

+    /* Do not allow attempts to commit an image into itself */
+    if (top_bs == base_bs) {
+        error_setg(errp, "cannot commit an image into itself");
+        return;
+    }
+
    if (top_bs == bs) {
+        if (has_backing_file) {
+            error_setg(errp, "'backing-file' specified,"
+                             " but 'top' is the active layer");
+            return;
+        }
        commit_active_start(bs, base_bs, speed, on_error, block_job_cb,
                            bs, &local_err);
    } else {
        commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
-                    &local_err);
+                     has_backing_file ? backing_file : NULL, &local_err);
    }
    if (local_err != NULL) {
        error_propagate(errp, local_err);
@@ -2094,6 +2132,8 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)

 void qmp_drive_mirror(const char *device, const char *target,
                      bool has_format, const char *format,
+                      bool has_node_name, const char *node_name,
+                      bool has_replaces, const char *replaces,
                      enum MirrorSyncMode sync,
                      bool has_mode, enum NewImageMode mode,
                      bool has_speed, int64_t speed,
@@ -2107,6 +2147,7 @@ void qmp_drive_mirror(const char *device, const char *target,
    BlockDriverState *source, *target_bs;
    BlockDriver *drv = NULL;
    Error *local_err = NULL;
+    QDict *options = NULL;
    int flags;
    int64_t size;
    int ret;
@@ -2180,6 +2221,29 @@ void qmp_drive_mirror(const char *device, const char *target,
        return;
    }

+    if (has_replaces) {
+        BlockDriverState *to_replace_bs;
+
+        if (!has_node_name) {
+            error_setg(errp, "a node-name must be provided when replacing a"
+                             " named node of the graph");
+            return;
+        }
+
+        to_replace_bs = check_to_replace_node(replaces, &local_err);
+
+        if (!to_replace_bs) {
+            error_propagate(errp, local_err);
+            return;
+        }
+
+        if (size != bdrv_getlength(to_replace_bs)) {
+            error_setg(errp, "cannot replace image with a mirror image of "
+                             "different size");
+            return;
+        }
+    }
+
    if ((sync == MIRROR_SYNC_MODE_FULL || !source)
        && mode != NEW_IMAGE_MODE_EXISTING)
    {
@@ -2208,18 +2272,28 @@ void qmp_drive_mirror(const char *device, const char *target,
        return;
    }

+    if (has_node_name) {
+        options = qdict_new();
+        qdict_put(options, "node-name", qstring_from_str(node_name));
+    }
+
    /* Mirroring takes care of copy-on-write using the source's backing
     * file.
     */
    target_bs = NULL;
-    ret = bdrv_open(&target_bs, target, NULL, NULL, flags | BDRV_O_NO_BACKING,
-                    drv, &local_err);
+    ret = bdrv_open(&target_bs, target, NULL, options,
+                    flags | BDRV_O_NO_BACKING, drv, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        return;
    }

-    mirror_start(bs, target_bs, speed, granularity, buf_size, sync,
+    /* pass the node name to replace to mirror start since it's loose coupling
+     * and will allow to check whether the node still exist at mirror completion
+     */
+    mirror_start(bs, target_bs,
+                 has_replaces ? replaces : NULL,
+                 speed, granularity, buf_size, sync,
                 on_source_error, on_target_error,
                 block_job_cb, bs, &local_err);
    if (local_err != NULL) {
@@ -2314,6 +2388,85 @@ void qmp_block_job_complete(const char *device, Error **errp)
    block_job_complete(job, errp);
 }

+void qmp_change_backing_file(const char *device,
+                             const char *image_node_name,
+                             const char *backing_file,
+                             Error **errp)
+{
+    BlockDriverState *bs = NULL;
+    BlockDriverState *image_bs = NULL;
+    Error *local_err = NULL;
+    bool ro;
+    int open_flags;
+    int ret;
+
+    /* find the top layer BDS of the chain */
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (!image_bs) {
+        error_setg(errp, "image file not found");
+        return;
+    }
+
+    if (bdrv_find_base(image_bs) == image_bs) {
+        error_setg(errp, "not allowing backing file change on an image "
+                         "without a backing file");
+        return;
+    }
+
+    /* even though we are not necessarily operating on bs, we need it to
+     * determine if block ops are currently prohibited on the chain */
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_CHANGE, errp)) {
+        return;
+    }
+
+    /* final sanity check */
+    if (!bdrv_chain_contains(bs, image_bs)) {
+        error_setg(errp, "'%s' and image file are not in the same chain",
+                   device);
+        return;
+    }
+
+    /* if not r/w, reopen to make r/w */
+    open_flags = image_bs->open_flags;
+    ro = bdrv_is_read_only(image_bs);
+
+    if (ro) {
+        bdrv_reopen(image_bs, open_flags | BDRV_O_RDWR, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
+    ret = bdrv_change_backing_file(image_bs, backing_file,
+                               image_bs->drv ? image_bs->drv->format_name : "");
+
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not change backing file to '%s'",
+                         backing_file);
+        /* don't exit here, so we can try to restore open flags if
+         * appropriate */
+    }
+
+    if (ro) {
+        bdrv_reopen(image_bs, open_flags, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err); /* will preserve prior errp */
+        }
+    }
+}
+
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
    QmpOutputVisitor *ov = qmp_output_visitor_new();
--- a/blockjob.c
+++ b/blockjob.c
@@ -187,7 +187,7 @@ int block_job_cancel_sync(BlockJob *job)
    job->opaque = &data;
    block_job_cancel(job);
    while (data.ret == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(bs), true);
    }
    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
 }
@@ -210,6 +210,20 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
    job->busy = true;
 }

+void block_job_yield(BlockJob *job)
+{
+    assert(job->busy);
+
+    /* Check cancellation *before* setting busy = false, too!  */
+    if (block_job_is_cancelled(job)) {
+        return;
+    }
+
+    job->busy = false;
+    qemu_coroutine_yield();
+    job->busy = true;
+}
+
 BlockJobInfo *block_job_query(BlockJob *job)
 {
    BlockJobInfo *info = g_new0(BlockJobInfo, 1);
@@ -256,7 +270,11 @@ void block_job_event_completed(BlockJob *job, const char *msg)

 void block_job_event_ready(BlockJob *job)
 {
-    qapi_event_send_block_job_ready(bdrv_get_device_name(job->bs), &error_abort);
+    qapi_event_send_block_job_ready(job->driver->job_type,
+                                    bdrv_get_device_name(job->bs),
+                                    job->len,
+                                    job->offset,
+                                    job->speed, &error_abort);
 }

 BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
@@ -282,7 +300,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
    default:
        abort();
    }
-    qapi_event_send_block_job_error(bdrv_get_device_name(bs),
+    qapi_event_send_block_job_error(bdrv_get_device_name(job->bs),
                                    is_read ? IO_OPERATION_TYPE_READ :
                                    IO_OPERATION_TYPE_WRITE,
                                    action, &error_abort);
--- a/40
+++ b/40
@@ -1489,8 +1489,9 @@ for flag in $gcc_flags; do
    fi
 done

-if test "$stack_protector" != "no" ; then
+if test "$stack_protector" != "no"; then
  gcc_flags="-fstack-protector-strong -fstack-protector-all"
+  sp_on=0
  for flag in $gcc_flags; do
    # We need to check both a compile and a link, since some compiler
    # setups fail only on a .c->.o compile and some only at link time
@@ -1498,9 +1499,15 @@ if test "$stack_protector" != "no" ; then
       compile_prog "-Werror $flag" ""; then
      QEMU_CFLAGS="$QEMU_CFLAGS $flag"
      LIBTOOLFLAGS="$LIBTOOLFLAGS -Wc,$flag"
+      sp_on=1
      break
    fi
  done
+  if test "$stack_protector" = yes; then
+    if test $sp_on = 0; then
+      error_exit "Stack protector not supported"
+    fi
+  fi
 fi

 # Workaround for http://gcc.gnu.org/PR55489.  Happens with -fPIE/-fPIC and
@@ -1711,6 +1718,20 @@ else
    echo big/little test failed
 fi

+##########################################
+# L2TPV3 probe
+
+cat > $TMPC <<EOF
+#include <sys/socket.h>
+#include <linux/ip.h>
+int main(void) { return sizeof(struct mmsghdr); }
+EOF
+if compile_prog "" "" ; then
+  l2tpv3=yes
+else
+  l2tpv3=no
+fi
+
 ##########################################
 # pkg-config probe

@@ -3453,7 +3474,7 @@ fi
 # Do we need libm
 cat > $TMPC << EOF
 #include <math.h>
-int main(void) { return isnan(sin(0.0)); }
+int main(int argc, char **argv) { return isnan(sin((double)argc)); }
 EOF
 if compile_prog "" "" ; then
  :
@@ -4343,6 +4364,9 @@ fi
 if test "$netmap" = "yes" ; then
  echo "CONFIG_NETMAP=y" >> $config_host_mak
 fi
+if test "$l2tpv3" = "yes" ; then
+  echo "CONFIG_L2TPV3=y" >> $config_host_mak
+fi
 if test "$cap_ng" = "yes" ; then
  echo "CONFIG_LIBCAP=y" >> $config_host_mak
 fi
@@ -5273,6 +5297,18 @@ if test "$docs" = "yes" ; then
  mkdir -p QMP
 fi

+# set up qemu-iotests in this build directory
+iotests_common_env="tests/qemu-iotests/common.env"
+iotests_check="tests/qemu-iotests/check"
+
+echo "# Automatically generated by configure - do not modify" > "$iotests_common_env"
+echo >> "$iotests_common_env"
+echo "export PYTHON='$python'" >> "$iotests_common_env"
+
+if [ ! -e "$iotests_check" ]; then
+    symlink "$source_path/$iotests_check" "$iotests_check"
+fi
+
 # Save the configure command line for later reuse.
 cat <<EOD >config.status
 #!/bin/sh
--- a/coroutine-win32.c
+++ b/coroutine-win32.c
@@ -36,8 +36,17 @@ typedef struct
 static __thread CoroutineWin32 leader;
 static __thread Coroutine *current;

-CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
-                                      CoroutineAction action)
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the SwitchToFiber() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
 {
    CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
    CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
--- a/disas/libvixl/Makefile.objs
+++ b/disas/libvixl/Makefile.objs
@@ -3,6 +3,6 @@ libvixl_OBJS = utils.o \
               a64/decoder-a64.o \
               a64/disasm-a64.o

-$(addprefix $(obj)/,$(libvixl_OBJS)): QEMU_CFLAGS += -I$(SRC_PATH)/disas/libvixl
+$(addprefix $(obj)/,$(libvixl_OBJS)): QEMU_CFLAGS := -I$(SRC_PATH)/disas/libvixl $(QEMU_CFLAGS)

 common-obj-$(CONFIG_ARM_A64_DIS) += $(libvixl_OBJS)
--- a/disas/libvixl/README
+++ b/disas/libvixl/README
@@ -2,7 +2,7 @@
 The code in this directory is a subset of libvixl:
 https://github.com/armvixl/vixl
 (specifically, it is the set of files needed for disassembly only,
-taken from libvixl 1.1).
+taken from libvixl 1.4).
 Bugfixes should preferably be sent upstream initially.

 The disassembler does not currently support the entire A64 instruction
--- a/disas/libvixl/a64/disasm-a64.cc
+++ b/disas/libvixl/a64/disasm-a64.cc
@@ -1369,7 +1369,7 @@ int Disassembler::SubstituteImmediateField(Instruction* instr,
        VIXL_ASSERT(format[5] == 'L');
        AppendToOutput("#0x%" PRIx64, instr->ImmMoveWide());
        if (instr->ShiftMoveWide() > 0) {
-          AppendToOutput(", lsl #%d", 16 * instr->ShiftMoveWide());
+          AppendToOutput(", lsl #%" PRId64, 16 * instr->ShiftMoveWide());
        }
      }
      return 8;
@@ -1418,7 +1418,7 @@ int Disassembler::SubstituteImmediateField(Instruction* instr,
    }
    case 'F': {  // IFPSingle, IFPDouble or IFPFBits.
      if (format[3] == 'F') {  // IFPFbits.
-        AppendToOutput("#%d", 64 - instr->FPScale());
+        AppendToOutput("#%" PRId64, 64 - instr->FPScale());
        return 8;
      } else {
        AppendToOutput("#0x%" PRIx64 " (%.4f)", instr->ImmFP(),
@@ -1439,23 +1439,23 @@ int Disassembler::SubstituteImmediateField(Instruction* instr,
      return 5;
    }
    case 'P': {  // IP - Conditional compare.
-      AppendToOutput("#%d", instr->ImmCondCmp());
+      AppendToOutput("#%" PRId64, instr->ImmCondCmp());
      return 2;
    }
    case 'B': {  // Bitfields.
      return SubstituteBitfieldImmediateField(instr, format);
    }
    case 'E': {  // IExtract.
-      AppendToOutput("#%d", instr->ImmS());
+      AppendToOutput("#%" PRId64, instr->ImmS());
      return 8;
    }
    case 'S': {  // IS - Test and branch bit.
-      AppendToOutput("#%d", (instr->ImmTestBranchBit5() << 5) |
-                            instr->ImmTestBranchBit40());
+      AppendToOutput("#%" PRId64, (instr->ImmTestBranchBit5() << 5) |
+                                  instr->ImmTestBranchBit40());
      return 2;
    }
    case 'D': {  // IDebug - HLT and BRK instructions.
-      AppendToOutput("#0x%x", instr->ImmException());
+      AppendToOutput("#0x%" PRIx64, instr->ImmException());
      return 6;
    }
    default: {
@@ -1626,12 +1626,12 @@ int Disassembler::SubstituteExtendField(Instruction* instr,
      (((instr->ExtendMode() == UXTW) && (instr->SixtyFourBits() == 0)) ||
       (instr->ExtendMode() == UXTX))) {
    if (instr->ImmExtendShift() > 0) {
-      AppendToOutput(", lsl #%d", instr->ImmExtendShift());
+      AppendToOutput(", lsl #%" PRId64, instr->ImmExtendShift());
    }
  } else {
    AppendToOutput(", %s", extend_mode[instr->ExtendMode()]);
    if (instr->ImmExtendShift() > 0) {
-      AppendToOutput(" #%d", instr->ImmExtendShift());
+      AppendToOutput(" #%" PRId64, instr->ImmExtendShift());
    }
  }
  return 3;
@@ -1660,7 +1660,7 @@ int Disassembler::SubstituteLSRegOffsetField(Instruction* instr,
  if (!((ext == UXTX) && (shift == 0))) {
    AppendToOutput(", %s", extend_mode[ext]);
    if (shift != 0) {
-      AppendToOutput(" #%d", instr->SizeLS());
+      AppendToOutput(" #%" PRId64, instr->SizeLS());
    }
  }
  return 9;
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -170,6 +170,10 @@ static void dma_bdrv_cb(void *opaque, int ret)
        return;
    }

+    if (dbs->iov.size & ~BDRV_SECTOR_MASK) {
+        qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
+    }
+
    dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
                            dbs->iov.size / 512, dma_bdrv_cb, dbs);
    assert(dbs->acb);
--- a/docs/aio_notify.promela
+++ b/docs/aio_notify.promela
@@ -0,0 +1,104 @@
+/*
+ * This model describes the interaction between aio_set_dispatching()
+ * and aio_notify().
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This file is in the public domain.  If you really want a license,
+ * the WTFPL will do.
+ *
+ * To simulate it:
+ *     spin -p docs/aio_notify.promela
+ *
+ * To verify it:
+ *     spin -a docs/aio_notify.promela
+ *     gcc -O2 pan.c
+ *     ./a.out -a
+ */
+
+#define MAX   4
+#define LAST  (1 << (MAX - 1))
+#define FINAL ((LAST << 1) - 1)
+
+bool dispatching;
+bool event;
+
+int req, done;
+
+active proctype waiter()
+{
+     int fetch, blocking;
+
+     do
+        :: done != FINAL -> {
+            // Computing "blocking" is separate from execution of the
+            // "bottom half"
+            blocking = (req == 0);
+
+            // This is our "bottom half"
+            atomic { fetch = req; req = 0; }
+            done = done | fetch;
+
+            // Wait for a nudge from the other side
+            do
+                :: event == 1 -> { event = 0; break; }
+                :: !blocking  -> break;
+            od;
+
+            dispatching = 1;
+
+            // If you are simulating this model, you may want to add
+            // something like this here:
+            //
+            //      int foo; foo++; foo++; foo++;
+            //
+            // This only wastes some time and makes it more likely
+            // that the notifier process hits the "fast path".
+
+            dispatching = 0;
+        }
+        :: else -> break;
+    od
+}
+
+active proctype notifier()
+{
+    int next = 1;
+    int sets = 0;
+
+    do
+        :: next <= LAST -> {
+            // generate a request
+            req = req | next;
+            next = next << 1;
+
+            // aio_notify
+            if
+                :: dispatching == 0 -> sets++; event = 1;
+                :: else             -> skip;
+            fi;
+
+            // Test both synchronous and asynchronous delivery
+            if
+                :: 1 -> do
+                            :: req == 0 -> break;
+                        od;
+                :: 1 -> skip;
+            fi;
+        }
+        :: else -> break;
+    od;
+    printf("Skipped %d event_notifier_set\n", MAX - sets);
+}
+
+#define p (done == FINAL)
+
+never  {
+    do
+        :: 1                      // after an arbitrarily long prefix
+        :: p -> break             // p becomes true
+    od;
+    do
+        :: !p -> accept: break    // it then must remains true forever after
+    od
+}
--- a/docs/qapi-code-gen.txt
+++ b/docs/qapi-code-gen.txt
@@ -218,10 +218,10 @@ An example command is:
 === Events ===

 Events are defined with the keyword 'event'.  When 'data' is also specified,
-additional info will be carried on.  Finally there will be C API generated
-in qapi-event.h; when called by QEMU code, a message with timestamp will
-be emitted on the wire.  If timestamp is -1, it means failure to retrieve host
-time.
+additional info will be included in the event.  Finally there will be C API
+generated in qapi-event.h; when called by QEMU code, a message with timestamp
+will be emitted on the wire.  If timestamp is -1, it means failure to retrieve
+host time.

 An example event is:

--- a/docs/qmp/qmp-events.txt
+++ b/docs/qmp/qmp-events.txt
@@ -0,0 +1,627 @@
+                   QEMU Machine Protocol Events
+                   ============================
+
+ACPI_DEVICE_OST
+---------------
+
+Emitted when guest executes ACPI _OST method.
+
+ - data: ACPIOSTInfo type as described in qapi-schema.json
+
+{ "event": "ACPI_DEVICE_OST",
+     "data": { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0 } }
+
+BALLOON_CHANGE
+--------------
+
+Emitted when the guest changes the actual BALLOON level. This
+value is equivalent to the 'actual' field return by the
+'query-balloon' command
+
+Data:
+
+- "actual": actual level of the guest memory balloon in bytes (json-number)
+
+Example:
+
+{ "event": "BALLOON_CHANGE",
+    "data": { "actual": 944766976 },
+    "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+
+BLOCK_IMAGE_CORRUPTED
+---------------------
+
+Emitted when a disk image is being marked corrupt.
+
+Data:
+
+- "device": Device name (json-string)
+- "msg":    Informative message (e.g., reason for the corruption) (json-string)
+- "offset": If the corruption resulted from an image access, this is the access
+            offset into the image (json-int)
+- "size":   If the corruption resulted from an image access, this is the access
+            size (json-int)
+
+Example:
+
+{ "event": "BLOCK_IMAGE_CORRUPTED",
+    "data": { "device": "ide0-hd0",
+        "msg": "Prevented active L1 table overwrite", "offset": 196608,
+        "size": 65536 },
+    "timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
+
+BLOCK_IO_ERROR
+--------------
+
+Emitted when a disk I/O error occurs.
+
+Data:
+
+- "device": device name (json-string)
+- "operation": I/O operation (json-string, "read" or "write")
+- "action": action that has been taken, it's one of the following (json-string):
+    "ignore": error has been ignored
+    "report": error has been reported to the device
+    "stop": the VM is going to stop because of the error
+
+Example:
+
+{ "event": "BLOCK_IO_ERROR",
+    "data": { "device": "ide0-hd1",
+              "operation": "write",
+              "action": "stop" },
+    "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+Note: If action is "stop", a STOP event will eventually follow the
+BLOCK_IO_ERROR event.
+
+BLOCK_JOB_CANCELLED
+-------------------
+
+Emitted when a block job has been cancelled.
+
+Data:
+
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
+- "device":   Device name (json-string)
+- "len":      Maximum progress value (json-int)
+- "offset":   Current progress value (json-int)
+              On success this is equal to len.
+              On failure this is less than len.
+- "speed":    Rate limit, bytes per second (json-int)
+
+Example:
+
+{ "event": "BLOCK_JOB_CANCELLED",
+     "data": { "type": "stream", "device": "virtio-disk0",
+               "len": 10737418240, "offset": 134217728,
+               "speed": 0 },
+     "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+BLOCK_JOB_COMPLETED
+-------------------
+
+Emitted when a block job has completed.
+
+Data:
+
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
+- "device":   Device name (json-string)
+- "len":      Maximum progress value (json-int)
+- "offset":   Current progress value (json-int)
+              On success this is equal to len.
+              On failure this is less than len.
+- "speed":    Rate limit, bytes per second (json-int)
+- "error":    Error message (json-string, optional)
+              Only present on failure.  This field contains a human-readable
+              error message.  There are no semantics other than that streaming
+              has failed and clients should not try to interpret the error
+              string.
+
+Example:
+
+{ "event": "BLOCK_JOB_COMPLETED",
+     "data": { "type": "stream", "device": "virtio-disk0",
+               "len": 10737418240, "offset": 10737418240,
+               "speed": 0 },
+     "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+BLOCK_JOB_ERROR
+---------------
+
+Emitted when a block job encounters an error.
+
+Data:
+
+- "device": device name (json-string)
+- "operation": I/O operation (json-string, "read" or "write")
+- "action": action that has been taken, it's one of the following (json-string):
+    "ignore": error has been ignored, the job may fail later
+    "report": error will be reported and the job canceled
+    "stop": error caused job to be paused
+
+Example:
+
+{ "event": "BLOCK_JOB_ERROR",
+    "data": { "device": "ide0-hd1",
+              "operation": "write",
+              "action": "stop" },
+    "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+BLOCK_JOB_READY
+---------------
+
+Emitted when a block job is ready to complete.
+
+Data:
+
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
+- "device":   Device name (json-string)
+- "len":      Maximum progress value (json-int)
+- "offset":   Current progress value (json-int)
+              On success this is equal to len.
+              On failure this is less than len.
+- "speed":    Rate limit, bytes per second (json-int)
+
+Example:
+
+{ "event": "BLOCK_JOB_READY",
+    "data": { "device": "drive0", "type": "mirror", "speed": 0,
+              "len": 2097152, "offset": 2097152 }
+    "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+Note: The "ready to complete" status is always reset by a BLOCK_JOB_ERROR
+event.
+
+DEVICE_DELETED
+--------------
+
+Emitted whenever the device removal completion is acknowledged
+by the guest.
+At this point, it's safe to reuse the specified device ID.
+Device removal can be initiated by the guest or by HMP/QMP commands.
+
+Data:
+
+- "device": device name (json-string, optional)
+- "path": device path (json-string)
+
+{ "event": "DEVICE_DELETED",
+  "data": { "device": "virtio-net-pci-0",
+            "path": "/machine/peripheral/virtio-net-pci-0" },
+  "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+DEVICE_TRAY_MOVED
+-----------------
+
+It's emitted whenever the tray of a removable device is moved by the guest
+or by HMP/QMP commands.
+
+Data:
+
+- "device": device name (json-string)
+- "tray-open": true if the tray has been opened or false if it has been closed
+               (json-bool)
+
+{ "event": "DEVICE_TRAY_MOVED",
+  "data": { "device": "ide1-cd0",
+            "tray-open": true
+  },
+  "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+GUEST_PANICKED
+--------------
+
+Emitted when guest OS panic is detected.
+
+Data:
+
+- "action": Action that has been taken (json-string, currently always "pause").
+
+Example:
+
+{ "event": "GUEST_PANICKED",
+     "data": { "action": "pause" } }
+
+NIC_RX_FILTER_CHANGED
+---------------------
+
+The event is emitted once until the query command is executed,
+the first event will always be emitted.
+
+Data:
+
+- "name": net client name (json-string)
+- "path": device path (json-string)
+
+{ "event": "NIC_RX_FILTER_CHANGED",
+  "data": { "name": "vnet0",
+            "path": "/machine/peripheral/vnet0/virtio-backend" },
+  "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
+}
+
+POWERDOWN
+---------
+
+Emitted when the Virtual Machine is powered down through the power
+control system, such as via ACPI.
+
+Data: None.
+
+Example:
+
+{ "event": "POWERDOWN",
+    "timestamp": { "seconds": 1267040730, "microseconds": 682951 } }
+
+QUORUM_FAILURE
+--------------
+
+Emitted by the Quorum block driver if it fails to establish a quorum.
+
+Data:
+
+- "reference":     device name if defined else node name.
+- "sector-num":    Number of the first sector of the failed read operation.
+- "sectors-count": Failed read operation sector count.
+
+Example:
+
+{ "event": "QUORUM_FAILURE",
+     "data": { "reference": "usr1", "sector-num": 345435, "sectors-count": 5 },
+     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+QUORUM_REPORT_BAD
+-----------------
+
+Emitted to report a corruption of a Quorum file.
+
+Data:
+
+- "error":         Error message (json-string, optional)
+                   Only present on failure.  This field contains a human-readable
+                   error message.  There are no semantics other than that the
+                   block layer reported an error and clients should not try to
+                   interpret the error string.
+- "node-name":     The graph node name of the block driver state.
+- "sector-num":    Number of the first sector of the failed read operation.
+- "sectors-count": Failed read operation sector count.
+
+Example:
+
+{ "event": "QUORUM_REPORT_BAD",
+     "data": { "node-name": "1.raw", "sector-num": 345435, "sectors-count": 5 },
+     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+RESET
+-----
+
+Emitted when the Virtual Machine is reset.
+
+Data: None.
+
+Example:
+
+{ "event": "RESET",
+    "timestamp": { "seconds": 1267041653, "microseconds": 9518 } }
+
+RESUME
+------
+
+Emitted when the Virtual Machine resumes execution.
+
+Data: None.
+
+Example:
+
+{ "event": "RESUME",
+    "timestamp": { "seconds": 1271770767, "microseconds": 582542 } }
+
+RTC_CHANGE
+----------
+
+Emitted when the guest changes the RTC time.
+
+Data:
+
+- "offset": Offset between base RTC clock (as specified by -rtc base), and
+new RTC clock value (json-number)
+
+Example:
+
+{ "event": "RTC_CHANGE",
+    "data": { "offset": 78 },
+    "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+
+SHUTDOWN
+--------
+
+Emitted when the Virtual Machine has shut down, indicating that qemu
+is about to exit.
+
+Data: None.
+
+Example:
+
+{ "event": "SHUTDOWN",
+    "timestamp": { "seconds": 1267040730, "microseconds": 682951 } }
+
+Note: If the command-line option "-no-shutdown" has been specified, a STOP
+event will eventually follow the SHUTDOWN event.
+
+SPICE_CONNECTED
+---------------
+
+Emitted when a SPICE client connects.
+
+Data:
+
+- "server": Server information (json-object)
+  - "host": IP address (json-string)
+  - "port": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+- "client": Client information (json-object)
+  - "host": IP address (json-string)
+  - "port": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+
+Example:
+
+{ "timestamp": {"seconds": 1290688046, "microseconds": 388707},
+  "event": "SPICE_CONNECTED",
+  "data": {
+    "server": { "port": "5920", "family": "ipv4", "host": "127.0.0.1"},
+    "client": {"port": "52873", "family": "ipv4", "host": "127.0.0.1"}
+}}
+
+SPICE_DISCONNECTED
+------------------
+
+Emitted when a SPICE client disconnects.
+
+Data:
+
+- "server": Server information (json-object)
+  - "host": IP address (json-string)
+  - "port": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+- "client": Client information (json-object)
+  - "host": IP address (json-string)
+  - "port": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+
+Example:
+
+{ "timestamp": {"seconds": 1290688046, "microseconds": 388707},
+  "event": "SPICE_DISCONNECTED",
+  "data": {
+    "server": { "port": "5920", "family": "ipv4", "host": "127.0.0.1"},
+    "client": {"port": "52873", "family": "ipv4", "host": "127.0.0.1"}
+}}
+
+SPICE_INITIALIZED
+-----------------
+
+Emitted after initial handshake and authentication takes place (if any)
+and the SPICE channel is up and running
+
+Data:
+
+- "server": Server information (json-object)
+  - "host": IP address (json-string)
+  - "port": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+  - "host": IP address (json-string)
+  - "port": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "connection-id": spice connection id.  All channels with the same id
+                     belong to the same spice session (json-int)
+  - "channel-type": channel type.  "1" is the main control channel, filter for
+                    this one if you want track spice sessions only (json-int)
+  - "channel-id": channel id.  Usually "0", might be different needed when
+                  multiple channels of the same type exist, such as multiple
+                  display channels in a multihead setup (json-int)
+  - "tls": whevener the channel is encrypted (json-bool)
+
+Example:
+
+{ "timestamp": {"seconds": 1290688046, "microseconds": 417172},
+  "event": "SPICE_INITIALIZED",
+  "data": {"server": {"auth": "spice", "port": "5921",
+                      "family": "ipv4", "host": "127.0.0.1"},
+           "client": {"port": "49004", "family": "ipv4", "channel-type": 3,
+                      "connection-id": 1804289383, "host": "127.0.0.1",
+                      "channel-id": 0, "tls": true}
+}}
+
+SPICE_MIGRATE_COMPLETED
+-----------------------
+
+Emitted when SPICE migration has completed
+
+Data: None.
+
+Example:
+
+{ "timestamp": {"seconds": 1290688046, "microseconds": 417172},
+  "event": "SPICE_MIGRATE_COMPLETED" }
+
+
+STOP
+----
+
+Emitted when the Virtual Machine is stopped.
+
+Data: None.
+
+Example:
+
+{ "event": "STOP",
+    "timestamp": { "seconds": 1267041730, "microseconds": 281295 } }
+
+SUSPEND
+-------
+
+Emitted when guest enters S3 state.
+
+Data: None.
+
+Example:
+
+{ "event": "SUSPEND",
+     "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
+
+SUSPEND_DISK
+------------
+
+Emitted when the guest makes a request to enter S4 state.
+
+Data: None.
+
+Example:
+
+{ "event": "SUSPEND_DISK",
+     "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
+
+Note: QEMU shuts down when entering S4 state.
+
+VNC_CONNECTED
+-------------
+
+Emitted when a VNC client establishes a connection.
+
+Data:
+
+- "server": Server information (json-object)
+  - "host": IP address (json-string)
+  - "service": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+  - "host": IP address (json-string)
+  - "service": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+
+Example:
+
+{ "event": "VNC_CONNECTED",
+    "data": {
+        "server": { "auth": "sasl", "family": "ipv4",
+                    "service": "5901", "host": "0.0.0.0" },
+        "client": { "family": "ipv4", "service": "58425",
+                    "host": "127.0.0.1" } },
+    "timestamp": { "seconds": 1262976601, "microseconds": 975795 } }
+
+
+Note: This event is emitted before any authentication takes place, thus
+the authentication ID is not provided.
+
+VNC_DISCONNECTED
+----------------
+
+Emitted when the connection is closed.
+
+Data:
+
+- "server": Server information (json-object)
+  - "host": IP address (json-string)
+  - "service": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+  - "host": IP address (json-string)
+  - "service": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "x509_dname": TLS dname (json-string, optional)
+  - "sasl_username": SASL username (json-string, optional)
+
+Example:
+
+{ "event": "VNC_DISCONNECTED",
+    "data": {
+        "server": { "auth": "sasl", "family": "ipv4",
+                    "service": "5901", "host": "0.0.0.0" },
+        "client": { "family": "ipv4", "service": "58425",
+                    "host": "127.0.0.1", "sasl_username": "luiz" } },
+    "timestamp": { "seconds": 1262976601, "microseconds": 975795 } }
+
+VNC_INITIALIZED
+---------------
+
+Emitted after authentication takes place (if any) and the VNC session is
+made active.
+
+Data:
+
+- "server": Server information (json-object)
+  - "host": IP address (json-string)
+  - "service": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+  - "host": IP address (json-string)
+  - "service": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "x509_dname": TLS dname (json-string, optional)
+  - "sasl_username": SASL username (json-string, optional)
+
+Example:
+
+{ "event": "VNC_INITIALIZED",
+    "data": {
+        "server": { "auth": "sasl", "family": "ipv4",
+                    "service": "5901", "host": "0.0.0.0"},
+        "client": { "family": "ipv4", "service": "46089",
+                    "host": "127.0.0.1", "sasl_username": "luiz" } },
+        "timestamp": { "seconds": 1263475302, "microseconds": 150772 } }
+
+VSERPORT_CHANGE
+---------------
+
+Emitted when the guest opens or closes a virtio-serial port.
+
+Data:
+
+- "id": device identifier of the virtio-serial port (json-string)
+- "open": true if the guest has opened the virtio-serial port (json-bool)
+
+Example:
+
+{ "event": "VSERPORT_CHANGE",
+    "data": { "id": "channel0", "open": true },
+    "timestamp": { "seconds": 1401385907, "microseconds": 422329 } }
+
+WAKEUP
+------
+
+Emitted when the guest has woken up from S3 and is running.
+
+Data: None.
+
+Example:
+
+{ "event": "WAKEUP",
+     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+WATCHDOG
+--------
+
+Emitted when the watchdog device's timer is expired.
+
+Data:
+
+- "action": Action that has been taken, it's one of the following (json-string):
+            "reset", "shutdown", "poweroff", "pause", "debug", or "none"
+
+Example:
+
+{ "event": "WATCHDOG",
+     "data": { "action": "reset" },
+     "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
+followed respectively by the RESET, SHUTDOWN, or STOP events.
--- a/docs/specs/vhost-user.txt
+++ b/docs/specs/vhost-user.txt
@@ -78,14 +78,14 @@ Depending on the request type, payload can be:
   Padding: 32-bit

   A region is:
-   ---------------------------------------
-   | guest address | size | user address |
-   ---------------------------------------
+   -----------------------------------------------------
+   | guest address | size | user address | mmap offset |
+   -----------------------------------------------------

   Guest address: a 64-bit guest address of the region
   Size: a 64-bit size
   User address: a 64-bit user address
-
+   mmap offset: 64-bit offset where region starts in the mapped memory

 In QEMU the vhost-user message is implemented with the following struct:

@@ -132,7 +132,7 @@ Message types

 * VHOST_USER_GET_FEATURES

-      Id: 2
+      Id: 1
      Equivalent ioctl: VHOST_GET_FEATURES
      Master payload: N/A
      Slave payload: u64
@@ -141,7 +141,7 @@ Message types

 * VHOST_USER_SET_FEATURES

-      Id: 3
+      Id: 2
      Ioctl: VHOST_SET_FEATURES
      Master payload: u64

@@ -149,7 +149,7 @@ Message types

 * VHOST_USER_SET_OWNER

-      Id: 4
+      Id: 3
      Equivalent ioctl: VHOST_SET_OWNER
      Master payload: N/A

@@ -159,7 +159,7 @@ Message types

 * VHOST_USER_RESET_OWNER

-      Id: 5
+      Id: 4
      Equivalent ioctl: VHOST_RESET_OWNER
      Master payload: N/A

@@ -168,7 +168,7 @@ Message types

 * VHOST_USER_SET_MEM_TABLE

-      Id: 6
+      Id: 5
      Equivalent ioctl: VHOST_SET_MEM_TABLE
      Master payload: memory regions description

@@ -179,7 +179,7 @@ Message types

 * VHOST_USER_SET_LOG_BASE

-      Id: 7
+      Id: 6
      Equivalent ioctl: VHOST_SET_LOG_BASE
      Master payload: u64

@@ -187,7 +187,7 @@ Message types

 * VHOST_USER_SET_LOG_FD

-      Id: 8
+      Id: 7
      Equivalent ioctl: VHOST_SET_LOG_FD
      Master payload: N/A

@@ -195,7 +195,7 @@ Message types

 * VHOST_USER_SET_VRING_NUM

-      Id: 9
+      Id: 8
      Equivalent ioctl: VHOST_SET_VRING_NUM
      Master payload: vring state description

@@ -203,7 +203,7 @@ Message types

 * VHOST_USER_SET_VRING_ADDR

-      Id: 10
+      Id: 9
      Equivalent ioctl: VHOST_SET_VRING_ADDR
      Master payload: vring address description
      Slave payload: N/A
@@ -212,7 +212,7 @@ Message types

 * VHOST_USER_SET_VRING_BASE

-      Id: 11
+      Id: 10
      Equivalent ioctl: VHOST_SET_VRING_BASE
      Master payload: vring state description

@@ -220,7 +220,7 @@ Message types

 * VHOST_USER_GET_VRING_BASE

-      Id: 12
+      Id: 11
      Equivalent ioctl: VHOST_USER_GET_VRING_BASE
      Master payload: vring state description
      Slave payload: vring state description
@@ -229,7 +229,7 @@ Message types

 * VHOST_USER_SET_VRING_KICK

-      Id: 13
+      Id: 12
      Equivalent ioctl: VHOST_SET_VRING_KICK
      Master payload: u64

@@ -242,7 +242,7 @@ Message types

 * VHOST_USER_SET_VRING_CALL

-      Id: 14
+      Id: 13
      Equivalent ioctl: VHOST_SET_VRING_CALL
      Master payload: u64

@@ -255,7 +255,7 @@ Message types

 * VHOST_USER_SET_VRING_ERR

-      Id: 15
+      Id: 14
      Equivalent ioctl: VHOST_SET_VRING_ERR
      Master payload: u64

--- a/exec.c
+++ b/exec.c
@@ -430,15 +430,50 @@ static int cpu_common_post_load(void *opaque, int version_id)
    return 0;
 }

+static int cpu_common_pre_load(void *opaque)
+{
+    CPUState *cpu = opaque;
+
+    cpu->exception_index = 0;
+
+    return 0;
+}
+
+static bool cpu_common_exception_index_needed(void *opaque)
+{
+    CPUState *cpu = opaque;
+
+    return cpu->exception_index != 0;
+}
+
+static const VMStateDescription vmstate_cpu_common_exception_index = {
+    .name = "cpu_common/exception_index",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT32(exception_index, CPUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 const VMStateDescription vmstate_cpu_common = {
    .name = "cpu_common",
    .version_id = 1,
    .minimum_version_id = 1,
+    .pre_load = cpu_common_pre_load,
    .post_load = cpu_common_post_load,
    .fields = (VMStateField[]) {
        VMSTATE_UINT32(halted, CPUState),
        VMSTATE_UINT32(interrupt_request, CPUState),
        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (VMStateSubsection[]) {
+        {
+            .vmsd = &vmstate_cpu_common_exception_index,
+            .needed = cpu_common_exception_index_needed,
+        } , {
+            /* empty */
+        }
    }
 };

@@ -883,7 +918,7 @@ static void phys_section_destroy(MemoryRegion *mr)

    if (mr->subpage) {
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
-        memory_region_destroy(&subpage->iomem);
+        object_unref(OBJECT(&subpage->iomem));
        g_free(subpage);
    }
 }
@@ -1456,6 +1491,13 @@ int qemu_get_ram_fd(ram_addr_t addr)
    return block->fd;
 }

+void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
+{
+    RAMBlock *block = qemu_get_ram_block(addr);
+
+    return block->host;
+}
+
 /* Return a host pointer to ram allocated with qemu_ram_alloc.
   With the exception of the softmmu code in this file, this should
   only be used for local memory (e.g. video ram) that the device owns,
@@ -1561,8 +1603,7 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
    default:
        abort();
    }
-    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
-    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
+    cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
    /* we remove the notdirty callback only if the code has been
       flushed */
    if (!cpu_physical_memory_is_clean(ram_addr)) {
@@ -1761,7 +1802,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
    mmio->as = as;
    mmio->base = base;
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
-                          "subpage", TARGET_PAGE_SIZE);
+                          NULL, TARGET_PAGE_SIZE);
    mmio->iomem.subpage = true;
 #if defined(DEBUG_SUBPAGE)
    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
@@ -1794,13 +1835,13 @@ MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)

 static void io_mem_init(void)
 {
-    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
+    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
-                          "unassigned", UINT64_MAX);
+                          NULL, UINT64_MAX);
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
-                          "notdirty", UINT64_MAX);
+                          NULL, UINT64_MAX);
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
-                          "watch", UINT64_MAX);
+                          NULL, UINT64_MAX);
 }

 static void mem_begin(MemoryListener *listener)
@@ -1971,8 +2012,7 @@ static void invalidate_and_set_dirty(hwaddr addr,
        /* invalidate code */
        tb_invalidate_phys_page_range(addr, addr + length, 0);
        /* set dirty bit */
-        cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
-        cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
+        cpu_physical_memory_set_dirty_range_nocode(addr, length);
    }
    xen_modified_memory(addr, length);
 }
@@ -2328,15 +2368,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
        mr = qemu_ram_addr_from_host(buffer, &addr1);
        assert(mr != NULL);
        if (is_write) {
-            while (access_len) {
-                unsigned l;
-                l = TARGET_PAGE_SIZE;
-                if (l > access_len)
-                    l = access_len;
-                invalidate_and_set_dirty(addr1, l);
-                addr1 += l;
-                access_len -= l;
-            }
+            invalidate_and_set_dirty(addr1, access_len);
        }
        if (xen_enabled()) {
            xen_invalidate_map_cache_entry(buffer);
@@ -2574,9 +2606,7 @@ void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
                /* invalidate code */
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
                /* set dirty bit */
-                cpu_physical_memory_set_dirty_flag(addr1,
-                                                   DIRTY_MEMORY_MIGRATION);
-                cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
+                cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
            }
        }
    }
@@ -2752,14 +2782,12 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
 }
 #endif

-#if !defined(CONFIG_USER_ONLY)
-
 /*
 * A helper function for the _utterly broken_ virtio device model to find out if
 * it's running on a big endian machine. Don't do this at home kids!
 */
-bool virtio_is_big_endian(void);
-bool virtio_is_big_endian(void)
+bool target_words_bigendian(void);
+bool target_words_bigendian(void)
 {
 #if defined(TARGET_WORDS_BIGENDIAN)
    return true;
@@ -2768,8 +2796,6 @@ bool virtio_is_big_endian(void)
 #endif
 }

-#endif
-
 #ifndef CONFIG_USER_ONLY
 bool cpu_physical_memory_is_io(hwaddr phys_addr)
 {
--- a/hmp.c
+++ b/hmp.c
@@ -933,6 +933,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
    }

    qmp_drive_mirror(device, filename, !!format, format,
+                     false, NULL, false, NULL,
                     full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
                     true, mode, false, 0, false, 0, false, 0,
                     false, 0, false, 0, &err);
@@ -1175,7 +1176,7 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
    const char *base = qdict_get_try_str(qdict, "base");
    int64_t speed = qdict_get_try_int(qdict, "speed", 0);

-    qmp_block_stream(device, base != NULL, base,
+    qmp_block_stream(device, base != NULL, base, false, NULL,
                     qdict_haskey(qdict, "speed"), speed,
                     true, BLOCKDEV_ON_ERROR_REPORT, &error);

--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -19,6 +19,7 @@
 #include "fsdev/qemu-fsdev.h"
 #include "virtio-9p-xattr.h"
 #include "virtio-9p-coth.h"
+#include "hw/virtio/virtio-access.h"

 static uint32_t virtio_9p_get_features(VirtIODevice *vdev, uint32_t features)
 {
@@ -34,7 +35,7 @@ static void virtio_9p_get_config(VirtIODevice *vdev, uint8_t *config)

    len = strlen(s->tag);
    cfg = g_malloc0(sizeof(struct virtio_9p_config) + len);
-    stw_p(&cfg->tag_len, len);
+    virtio_stw_p(vdev, &cfg->tag_len, len);
    /* We don't copy the terminating null to config space */
    memcpy(cfg->tag, s->tag, len);
    memcpy(config, cfg, s->config_size);
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -232,11 +232,11 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,

    acpi_gpe_init(&pm->acpi_regs, ICH9_PMIO_GPE0_LEN);
    memory_region_init_io(&pm->io_gpe, OBJECT(lpc_pci), &ich9_gpe_ops, pm,
-                          "apci-gpe0", ICH9_PMIO_GPE0_LEN);
+                          "acpi-gpe0", ICH9_PMIO_GPE0_LEN);
    memory_region_add_subregion(&pm->io, ICH9_PMIO_GPE0_STS, &pm->io_gpe);

    memory_region_init_io(&pm->io_smi, OBJECT(lpc_pci), &ich9_smi_ops, pm,
-                          "apci-smi", 8);
+                          "acpi-smi", 8);
    memory_region_add_subregion(&pm->io, ICH9_PMIO_SMI_EN, &pm->io_smi);

    pm->irq = sci_irq;
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c
@@ -159,7 +159,7 @@ void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner,

    state->devs = g_malloc0(sizeof(*state->devs) * state->dev_count);
    memory_region_init_io(&state->io, owner, &acpi_memory_hotplug_ops, state,
-                          "apci-mem-hotplug", ACPI_MEMORY_HOTPLUG_IO_LEN);
+                          "acpi-mem-hotplug", ACPI_MEMORY_HOTPLUG_IO_LEN);
    memory_region_add_subregion(as, ACPI_MEMORY_HOTPLUG_BASE, &state->io);
 }

--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -231,7 +231,7 @@ static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size)
    uint32_t val = 0;
    int bsel = s->hotplug_select;

-    if (bsel < 0 || bsel > ACPI_PCIHP_MAX_HOTPLUG_BUS) {
+    if (bsel < 0 || bsel >= ACPI_PCIHP_MAX_HOTPLUG_BUS) {
        return 0;
    }

--- a/hw/arm/omap1.c
+++ b/hw/arm/omap1.c
@@ -172,7 +172,7 @@ static void omap_timer_clk_update(void *opaque, int line, int on)
 static void omap_timer_clk_setup(struct omap_mpu_timer_s *timer)
 {
    omap_clk_adduser(timer->clk,
-                    qemu_allocate_irqs(omap_timer_clk_update, timer, 1)[0]);
+                    qemu_allocate_irq(omap_timer_clk_update, timer, 0));
    timer->rate = omap_clk_getrate(timer->clk);
 }

@@ -2098,7 +2098,7 @@ static struct omap_mpuio_s *omap_mpuio_init(MemoryRegion *memory,
                          "omap-mpuio", 0x800);
    memory_region_add_subregion(memory, base, &s->iomem);

-    omap_clk_adduser(clk, qemu_allocate_irqs(omap_mpuio_onoff, s, 1)[0]);
+    omap_clk_adduser(clk, qemu_allocate_irq(omap_mpuio_onoff, s, 0));

    return s;
 }
@@ -2401,7 +2401,7 @@ static struct omap_pwl_s *omap_pwl_init(MemoryRegion *system_memory,
                          "omap-pwl", 0x800);
    memory_region_add_subregion(system_memory, base, &s->iomem);

-    omap_clk_adduser(clk, qemu_allocate_irqs(omap_pwl_clk_update, s, 1)[0]);
+    omap_clk_adduser(clk, qemu_allocate_irq(omap_pwl_clk_update, s, 0));
    return s;
 }

@@ -3485,8 +3485,8 @@ static void omap_mcbsp_i2s_start(void *opaque, int line, int level)
 void omap_mcbsp_i2s_attach(struct omap_mcbsp_s *s, I2SCodec *slave)
 {
    s->codec = slave;
-    slave->rx_swallow = qemu_allocate_irqs(omap_mcbsp_i2s_swallow, s, 1)[0];
-    slave->tx_start = qemu_allocate_irqs(omap_mcbsp_i2s_start, s, 1)[0];
+    slave->rx_swallow = qemu_allocate_irq(omap_mcbsp_i2s_swallow, s, 0);
+    slave->tx_start = qemu_allocate_irq(omap_mcbsp_i2s_start, s, 0);
 }

 /* LED Pulse Generators */
@@ -3634,7 +3634,7 @@ static struct omap_lpg_s *omap_lpg_init(MemoryRegion *system_memory,
    memory_region_init_io(&s->iomem, NULL, &omap_lpg_ops, s, "omap-lpg", 0x800);
    memory_region_add_subregion(system_memory, base, &s->iomem);

-    omap_clk_adduser(clk, qemu_allocate_irqs(omap_lpg_clk_update, s, 1)[0]);
+    omap_clk_adduser(clk, qemu_allocate_irq(omap_lpg_clk_update, s, 0));

    return s;
 }
@@ -3848,7 +3848,7 @@ struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *system_memory,
    s->sdram_size = sdram_size;
    s->sram_size = OMAP15XX_SRAM_SIZE;

-    s->wakeup = qemu_allocate_irqs(omap_mpu_wakeup, s, 1)[0];
+    s->wakeup = qemu_allocate_irq(omap_mpu_wakeup, s, 0);

    /* Clocks */
    omap_clk_init(s);
--- a/hw/arm/omap2.c
+++ b/hw/arm/omap2.c
@@ -2260,7 +2260,7 @@ struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sysmem,
    s->sdram_size = sdram_size;
    s->sram_size = OMAP242X_SRAM_SIZE;

-    s->wakeup = qemu_allocate_irqs(omap_mpu_wakeup, s, 1)[0];
+    s->wakeup = qemu_allocate_irq(omap_mpu_wakeup, s, 0);

    /* Clocks */
    omap_clk_init(s);
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -2052,7 +2052,7 @@ PXA2xxState *pxa270_init(MemoryRegion *address_space,
        fprintf(stderr, "Unable to find CPU definition\n");
        exit(1);
    }
-    s->reset = qemu_allocate_irqs(pxa2xx_reset, s, 1)[0];
+    s->reset = qemu_allocate_irq(pxa2xx_reset, s, 0);

    /* SDRAM & Internal Memory Storage */
    memory_region_init_ram(&s->sdram, NULL, "pxa270.sdram", sdram_size);
@@ -2183,7 +2183,7 @@ PXA2xxState *pxa255_init(MemoryRegion *address_space, unsigned int sdram_size)
        fprintf(stderr, "Unable to find CPU definition\n");
        exit(1);
    }
-    s->reset = qemu_allocate_irqs(pxa2xx_reset, s, 1)[0];
+    s->reset = qemu_allocate_irq(pxa2xx_reset, s, 0);

    /* SDRAM & Internal Memory Storage */
    memory_region_init_ram(&s->sdram, NULL, "pxa255.sdram", sdram_size);
--- a/hw/arm/pxa2xx_gpio.c
+++ b/hw/arm/pxa2xx_gpio.c
@@ -36,7 +36,6 @@ struct PXA2xxGPIOInfo {
    uint32_t rising[PXA2XX_GPIO_BANKS];
    uint32_t falling[PXA2XX_GPIO_BANKS];
    uint32_t status[PXA2XX_GPIO_BANKS];
-    uint32_t gpsr[PXA2XX_GPIO_BANKS];
    uint32_t gafr[PXA2XX_GPIO_BANKS * 2];

    uint32_t prev_level[PXA2XX_GPIO_BANKS];
@@ -162,14 +161,14 @@ static uint64_t pxa2xx_gpio_read(void *opaque, hwaddr offset,
        return s->dir[bank];

    case GPSR:		/* GPIO Pin-Output Set registers */
-        printf("%s: Read from a write-only register " REG_FMT "\n",
-                        __FUNCTION__, offset);
-        return s->gpsr[bank];	/* Return last written value.  */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pxa2xx GPIO: read from write only register GPSR\n");
+        return 0;

    case GPCR:		/* GPIO Pin-Output Clear registers */
-        printf("%s: Read from a write-only register " REG_FMT "\n",
-                        __FUNCTION__, offset);
-        return 31337;		/* Specified as unpredictable in the docs.  */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pxa2xx GPIO: read from write only register GPCR\n");
+        return 0;

    case GRER:		/* GPIO Rising-Edge Detect Enable registers */
        return s->rising[bank];
@@ -217,7 +216,6 @@ static void pxa2xx_gpio_write(void *opaque, hwaddr offset,
    case GPSR:		/* GPIO Pin-Output Set registers */
        s->olevel[bank] |= value;
        pxa2xx_gpio_handler_update(s);
-        s->gpsr[bank] = value;
        break;

    case GPCR:		/* GPIO Pin-Output Clear registers */
@@ -314,7 +312,6 @@ static const VMStateDescription vmstate_pxa2xx_gpio_regs = {
    .version_id = 1,
    .minimum_version_id = 1,
    .fields = (VMStateField[]) {
-        VMSTATE_INT32(lines, PXA2xxGPIOInfo),
        VMSTATE_UINT32_ARRAY(ilevel, PXA2xxGPIOInfo, PXA2XX_GPIO_BANKS),
        VMSTATE_UINT32_ARRAY(olevel, PXA2xxGPIOInfo, PXA2XX_GPIO_BANKS),
        VMSTATE_UINT32_ARRAY(dir, PXA2xxGPIOInfo, PXA2XX_GPIO_BANKS),
@@ -322,6 +319,7 @@ static const VMStateDescription vmstate_pxa2xx_gpio_regs = {
        VMSTATE_UINT32_ARRAY(falling, PXA2xxGPIOInfo, PXA2XX_GPIO_BANKS),
        VMSTATE_UINT32_ARRAY(status, PXA2xxGPIOInfo, PXA2XX_GPIO_BANKS),
        VMSTATE_UINT32_ARRAY(gafr, PXA2xxGPIOInfo, PXA2XX_GPIO_BANKS * 2),
+        VMSTATE_UINT32_ARRAY(prev_level, PXA2xxGPIOInfo, PXA2XX_GPIO_BANKS),
        VMSTATE_END_OF_LIST(),
    },
 };
@@ -340,6 +338,7 @@ static void pxa2xx_gpio_class_init(ObjectClass *klass, void *data)
    k->init = pxa2xx_gpio_initfn;
    dc->desc = "PXA2xx GPIO controller";
    dc->props = pxa2xx_gpio_properties;
+    dc->vmsd = &vmstate_pxa2xx_gpio_regs;
 }

 static const TypeInfo pxa2xx_gpio_info = {
--- a/hw/arm/spitz.c
+++ b/hw/arm/spitz.c
@@ -752,7 +752,7 @@ static void spitz_i2c_setup(PXA2xxState *cpu)

    spitz_wm8750_addr(wm, 0, 0);
    qdev_connect_gpio_out(cpu->gpio, SPITZ_GPIO_WM,
-                    qemu_allocate_irqs(spitz_wm8750_addr, wm, 1)[0]);
+                          qemu_allocate_irq(spitz_wm8750_addr, wm, 0));
    /* .. and to the sound interface.  */
    cpu->i2s->opaque = wm;
    cpu->i2s->codec_out = wm8750_dac_dat;
@@ -858,7 +858,7 @@ static void spitz_gpio_setup(PXA2xxState *cpu, int slots)
     * wouldn't guarantee that a guest ever exits the loop.
     */
    spitz_hsync = 0;
-    lcd_hsync = qemu_allocate_irqs(spitz_lcd_hsync_handler, cpu, 1)[0];
+    lcd_hsync = qemu_allocate_irq(spitz_lcd_hsync_handler, cpu, 0);
    pxa2xx_gpio_read_notifier(cpu->gpio, lcd_hsync);
    pxa2xx_lcd_vsync_notifier(cpu->lcd, lcd_hsync);

--- a/hw/arm/strongarm.c
+++ b/hw/arm/strongarm.c
@@ -480,7 +480,6 @@ struct StrongARMGPIOInfo {
    uint32_t rising;
    uint32_t falling;
    uint32_t status;
-    uint32_t gpsr;
    uint32_t gafr;

    uint32_t prev_level;
@@ -544,14 +543,14 @@ static uint64_t strongarm_gpio_read(void *opaque, hwaddr offset,
        return s->dir;

    case GPSR:        /* GPIO Pin-Output Set registers */
-        DPRINTF("%s: Read from a write-only register 0x" TARGET_FMT_plx "\n",
-                        __func__, offset);
-        return s->gpsr;    /* Return last written value.  */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "strongarm GPIO: read from write only register GPSR\n");
+        return 0;

    case GPCR:        /* GPIO Pin-Output Clear registers */
-        DPRINTF("%s: Read from a write-only register 0x" TARGET_FMT_plx "\n",
-                        __func__, offset);
-        return 31337;        /* Specified as unpredictable in the docs.  */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "strongarm GPIO: read from write only register GPCR\n");
+        return 0;

    case GRER:        /* GPIO Rising-Edge Detect Enable registers */
        return s->rising;
@@ -590,7 +589,6 @@ static void strongarm_gpio_write(void *opaque, hwaddr offset,
    case GPSR:        /* GPIO Pin-Output Set registers */
        s->olevel |= value;
        strongarm_gpio_handler_update(s);
-        s->gpsr = value;
        break;

    case GPCR:        /* GPIO Pin-Output Clear registers */
@@ -676,6 +674,7 @@ static const VMStateDescription vmstate_strongarm_gpio_regs = {
        VMSTATE_UINT32(falling, StrongARMGPIOInfo),
        VMSTATE_UINT32(status, StrongARMGPIOInfo),
        VMSTATE_UINT32(gafr, StrongARMGPIOInfo),
+        VMSTATE_UINT32(prev_level, StrongARMGPIOInfo),
        VMSTATE_END_OF_LIST(),
    },
 };
@@ -687,6 +686,7 @@ static void strongarm_gpio_class_init(ObjectClass *klass, void *data)

    k->init = strongarm_gpio_initfn;
    dc->desc = "StrongARM GPIO controller";
+    dc->vmsd = &vmstate_strongarm_gpio_regs;
 }

 static const TypeInfo strongarm_gpio_info = {
@@ -846,6 +846,7 @@ static const VMStateDescription vmstate_strongarm_ppc_regs = {
        VMSTATE_UINT32(ppar, StrongARMPPCInfo),
        VMSTATE_UINT32(psdr, StrongARMPPCInfo),
        VMSTATE_UINT32(ppfr, StrongARMPPCInfo),
+        VMSTATE_UINT32(prev_level, StrongARMPPCInfo),
        VMSTATE_END_OF_LIST(),
    },
 };
@@ -857,6 +858,7 @@ static void strongarm_ppc_class_init(ObjectClass *klass, void *data)

    k->init = strongarm_ppc_init;
    dc->desc = "StrongARM PPC controller";
+    dc->vmsd = &vmstate_strongarm_ppc_regs;
 }

 static const TypeInfo strongarm_ppc_info = {
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -84,6 +84,7 @@ enum {
 };

 static hwaddr motherboard_legacy_map[] = {
+    [VE_NORFLASHALIAS] = 0,
    /* CS7: 0x10000000 .. 0x10020000 */
    [VE_SYSREGS] = 0x10000000,
    [VE_SP810] = 0x10001000,
@@ -114,7 +115,6 @@ static hwaddr motherboard_legacy_map[] = {
    [VE_VIDEORAM] = 0x4c000000,
    [VE_ETHERNET] = 0x4e000000,
    [VE_USB] = 0x4f000000,
-    [VE_NORFLASHALIAS] = -1, /* not present */
 };

 static hwaddr motherboard_aseries_map[] = {
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -65,6 +65,7 @@ enum {
    VIRT_GIC_CPU,
    VIRT_UART,
    VIRT_MMIO,
+    VIRT_RTC,
 };

 typedef struct MemMapEntry {
@@ -92,6 +93,8 @@ typedef struct VirtBoardInfo {
 * high memory region beyond 4GB).
 * This represents a compromise between how much RAM can be given to
 * a 32 bit VM and leaving space for expansion and in particular for PCI.
+ * Note that devices should generally be placed at multiples of 0x10000,
+ * to accommodate guests using 64K pages.
 */
 static const MemMapEntry a15memmap[] = {
    /* Space up to 0x8000000 is reserved for a boot ROM */
@@ -101,6 +104,7 @@ static const MemMapEntry a15memmap[] = {
    [VIRT_GIC_DIST] = { 0x8000000, 0x10000 },
    [VIRT_GIC_CPU] = { 0x8010000, 0x10000 },
    [VIRT_UART] = { 0x9000000, 0x1000 },
+    [VIRT_RTC] = { 0x9010000, 0x1000 },
    [VIRT_MMIO] = { 0xa000000, 0x200 },
    /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
    /* 0x10000000 .. 0x40000000 reserved for PCI */
@@ -109,6 +113,7 @@ static const MemMapEntry a15memmap[] = {

 static const int a15irqmap[] = {
    [VIRT_UART] = 1,
+    [VIRT_RTC] = 2,
    [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
 };

@@ -189,20 +194,41 @@ static void fdt_add_psci_node(const VirtBoardInfo *vbi)

    /* No PSCI for TCG yet */
    if (kvm_enabled()) {
+        uint32_t cpu_suspend_fn;
+        uint32_t cpu_off_fn;
+        uint32_t cpu_on_fn;
+        uint32_t migrate_fn;
+
        qemu_fdt_add_subnode(fdt, "/psci");
        if (armcpu->psci_version == 2) {
            const char comp[] = "arm,psci-0.2\0arm,psci";
            qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp));
+
+            cpu_off_fn = QEMU_PSCI_0_2_FN_CPU_OFF;
+            if (arm_feature(&armcpu->env, ARM_FEATURE_AARCH64)) {
+                cpu_suspend_fn = QEMU_PSCI_0_2_FN64_CPU_SUSPEND;
+                cpu_on_fn = QEMU_PSCI_0_2_FN64_CPU_ON;
+                migrate_fn = QEMU_PSCI_0_2_FN64_MIGRATE;
+            } else {
+                cpu_suspend_fn = QEMU_PSCI_0_2_FN_CPU_SUSPEND;
+                cpu_on_fn = QEMU_PSCI_0_2_FN_CPU_ON;
+                migrate_fn = QEMU_PSCI_0_2_FN_MIGRATE;
+            }
        } else {
            qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci");
+
+            cpu_suspend_fn = QEMU_PSCI_0_1_FN_CPU_SUSPEND;
+            cpu_off_fn = QEMU_PSCI_0_1_FN_CPU_OFF;
+            cpu_on_fn = QEMU_PSCI_0_1_FN_CPU_ON;
+            migrate_fn = QEMU_PSCI_0_1_FN_MIGRATE;
        }

        qemu_fdt_setprop_string(fdt, "/psci", "method", "hvc");
-        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend",
-                                  PSCI_FN_CPU_SUSPEND);
-        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", PSCI_FN_CPU_OFF);
-        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", PSCI_FN_CPU_ON);
-        qemu_fdt_setprop_cell(fdt, "/psci", "migrate", PSCI_FN_MIGRATE);
+
+        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend", cpu_suspend_fn);
+        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
+        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
+        qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
    }
 }

@@ -353,6 +379,29 @@ static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
    g_free(nodename);
 }

+static void create_rtc(const VirtBoardInfo *vbi, qemu_irq *pic)
+{
+    char *nodename;
+    hwaddr base = vbi->memmap[VIRT_RTC].base;
+    hwaddr size = vbi->memmap[VIRT_RTC].size;
+    int irq = vbi->irqmap[VIRT_RTC];
+    const char compat[] = "arm,pl031\0arm,primecell";
+
+    sysbus_create_simple("pl031", base, pic[irq]);
+
+    nodename = g_strdup_printf("/pl031@%" PRIx64, base);
+    qemu_fdt_add_subnode(vbi->fdt, nodename);
+    qemu_fdt_setprop(vbi->fdt, nodename, "compatible", compat, sizeof(compat));
+    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+                                 2, base, 2, size);
+    qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+                           GIC_FDT_IRQ_TYPE_SPI, irq,
+                           GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
+    qemu_fdt_setprop_cell(vbi->fdt, nodename, "clocks", vbi->clock_phandle);
+    qemu_fdt_setprop_string(vbi->fdt, nodename, "clock-names", "apb_pclk");
+    g_free(nodename);
+}
+
 static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
 {
    int i;
@@ -469,6 +518,8 @@ static void machvirt_init(MachineState *machine)

    create_uart(vbi, pic);

+    create_rtc(vbi, pic);
+
    /* Create mmio transports, so the user can create virtio backends
     * (which will be automatically plugged in to the transports). If
     * no backend is created the transport will just sit harmlessly idle.
--- a/hw/arm/z2.c
+++ b/hw/arm/z2.c
@@ -363,7 +363,7 @@ static void z2_init(MachineState *machine)
    wm8750_data_req_set(wm, mpu->i2s->data_req, mpu->i2s);

    qdev_connect_gpio_out(mpu->gpio, Z2_GPIO_LCD_CS,
-        qemu_allocate_irqs(z2_lcd_cs, z2_lcd, 1)[0]);
+                          qemu_allocate_irq(z2_lcd_cs, z2_lcd, 0));

    z2_binfo.kernel_filename = kernel_filename;
    z2_binfo.kernel_cmdline = kernel_cmdline;
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -24,16 +24,6 @@
 #include "hw/virtio/virtio-bus.h"
 #include "qom/object_interfaces.h"

-typedef struct {
-    VirtIOBlockDataPlane *s;
-    QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
-    VirtQueueElement *elem;         /* saved data from the virtqueue */
-    QEMUIOVector qiov;              /* original request iovecs */
-    struct iovec bounce_iov;        /* used if guest buffers are unaligned */
-    QEMUIOVector bounce_qiov;       /* bounce buffer iovecs */
-    bool read;                      /* read or write? */
-} VirtIOBlockRequest;
-
 struct VirtIOBlockDataPlane {
    bool started;
    bool starting;
@@ -44,6 +34,7 @@ struct VirtIOBlockDataPlane {
    VirtIODevice *vdev;
    Vring vring;                    /* virtqueue vring */
    EventNotifier *guest_notifier;  /* irq */
+    QEMUBH *bh;                     /* bh for guest notification */

    /* Note that these EventNotifiers are assigned by value.  This is
     * fine as long as you do not call event_notifier_cleanup on them
@@ -57,6 +48,8 @@ struct VirtIOBlockDataPlane {

    /* Operation blocker on BDS */
    Error *blocker;
+    void (*saved_complete_request)(struct VirtIOBlockReq *req,
+                                   unsigned char status);
 };

 /* Raise an interrupt to signal guest, if necessary */
@@ -69,248 +62,65 @@ static void notify_guest(VirtIOBlockDataPlane *s)
    event_notifier_set(s->guest_notifier);
 }

-static void complete_rdwr(void *opaque, int ret)
+static void notify_guest_bh(void *opaque)
 {
-    VirtIOBlockRequest *req = opaque;
-    struct virtio_blk_inhdr hdr;
-    int len;
+    VirtIOBlockDataPlane *s = opaque;

-    if (likely(ret == 0)) {
-        hdr.status = VIRTIO_BLK_S_OK;
-        len = req->qiov.size;
-    } else {
-        hdr.status = VIRTIO_BLK_S_IOERR;
-        len = 0;
-    }
-
-    trace_virtio_blk_data_plane_complete_request(req->s, req->elem->index, ret);
-
-    if (req->read && req->bounce_iov.iov_base) {
-        qemu_iovec_from_buf(&req->qiov, 0, req->bounce_iov.iov_base, len);
-    }
-
-    if (req->bounce_iov.iov_base) {
-        qemu_vfree(req->bounce_iov.iov_base);
-    }
-
-    qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
-    qemu_iovec_destroy(req->inhdr);
-    g_slice_free(QEMUIOVector, req->inhdr);
-
-    /* According to the virtio specification len should be the number of bytes
-     * written to, but for virtio-blk it seems to be the number of bytes
-     * transferred plus the status bytes.
-     */
-    vring_push(&req->s->vring, req->elem, len + sizeof(hdr));
-    notify_guest(req->s);
-    g_slice_free(VirtIOBlockRequest, req);
-}
-
-static void complete_request_early(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
-                                   QEMUIOVector *inhdr, unsigned char status)
-{
-    struct virtio_blk_inhdr hdr = {
-        .status = status,
-    };
-
-    qemu_iovec_from_buf(inhdr, 0, &hdr, sizeof(hdr));
-    qemu_iovec_destroy(inhdr);
-    g_slice_free(QEMUIOVector, inhdr);
-
-    vring_push(&s->vring, elem, sizeof(hdr));
    notify_guest(s);
 }

-/* Get disk serial number */
-static void do_get_id_cmd(VirtIOBlockDataPlane *s,
-                          struct iovec *iov, unsigned int iov_cnt,
-                          VirtQueueElement *elem, QEMUIOVector *inhdr)
+static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
 {
-    char id[VIRTIO_BLK_ID_BYTES];
+    VirtIOBlockDataPlane *s = req->dev->dataplane;
+    stb_p(&req->in->status, status);

-    /* Serial number not NUL-terminated when longer than buffer */
-    strncpy(id, s->blk->serial ? s->blk->serial : "", sizeof(id));
-    iov_from_buf(iov, iov_cnt, 0, id, sizeof(id));
-    complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK);
-}
+    vring_push(&req->dev->dataplane->vring, &req->elem,
+               req->qiov.size + sizeof(*req->in));

-static void do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
-                        struct iovec *iov, unsigned iov_cnt,
-                        int64_t sector_num, VirtQueueElement *elem,
-                        QEMUIOVector *inhdr)
-{
-    VirtIOBlockRequest *req = g_slice_new0(VirtIOBlockRequest);
-    QEMUIOVector *qiov;
-    int nb_sectors;
-
-    /* Fill in virtio block metadata needed for completion */
-    req->s = s;
-    req->elem = elem;
-    req->inhdr = inhdr;
-    req->read = read;
-    qemu_iovec_init_external(&req->qiov, iov, iov_cnt);
-
-    qiov = &req->qiov;
-
-    if (!bdrv_qiov_is_aligned(s->blk->conf.bs, qiov)) {
-        void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov->size);
-
-        /* Populate bounce buffer with data for writes */
-        if (!read) {
-            qemu_iovec_to_buf(qiov, 0, bounce_buffer, qiov->size);
-        }
-
-        /* Redirect I/O to aligned bounce buffer */
-        req->bounce_iov.iov_base = bounce_buffer;
-        req->bounce_iov.iov_len = qiov->size;
-        qemu_iovec_init_external(&req->bounce_qiov, &req->bounce_iov, 1);
-        qiov = &req->bounce_qiov;
-    }
-
-    nb_sectors = qiov->size / BDRV_SECTOR_SIZE;
-
-    if (read) {
-        bdrv_aio_readv(s->blk->conf.bs, sector_num, qiov, nb_sectors,
-                       complete_rdwr, req);
-    } else {
-        bdrv_aio_writev(s->blk->conf.bs, sector_num, qiov, nb_sectors,
-                        complete_rdwr, req);
-    }
-}
-
-static void complete_flush(void *opaque, int ret)
-{
-    VirtIOBlockRequest *req = opaque;
-    unsigned char status;
-
-    if (ret == 0) {
-        status = VIRTIO_BLK_S_OK;
-    } else {
-        status = VIRTIO_BLK_S_IOERR;
-    }
-
-    complete_request_early(req->s, req->elem, req->inhdr, status);
-    g_slice_free(VirtIOBlockRequest, req);
-}
-
-static void do_flush_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
-                         QEMUIOVector *inhdr)
-{
-    VirtIOBlockRequest *req = g_slice_new(VirtIOBlockRequest);
-    req->s = s;
-    req->elem = elem;
-    req->inhdr = inhdr;
-
-    bdrv_aio_flush(s->blk->conf.bs, complete_flush, req);
-}
-
-static void do_scsi_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
-                        QEMUIOVector *inhdr)
-{
-    int status;
-
-    status = virtio_blk_handle_scsi_req(VIRTIO_BLK(s->vdev), elem);
-    complete_request_early(s, elem, inhdr, status);
-}
-
-static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
-{
-    struct iovec *iov = elem->out_sg;
-    struct iovec *in_iov = elem->in_sg;
-    unsigned out_num = elem->out_num;
-    unsigned in_num = elem->in_num;
-    struct virtio_blk_outhdr outhdr;
-    QEMUIOVector *inhdr;
-    size_t in_size;
-
-    /* Copy in outhdr */
-    if (unlikely(iov_to_buf(iov, out_num, 0, &outhdr,
-                            sizeof(outhdr)) != sizeof(outhdr))) {
-        error_report("virtio-blk request outhdr too short");
-        return -EFAULT;
-    }
-    iov_discard_front(&iov, &out_num, sizeof(outhdr));
-
-    /* Grab inhdr for later */
-    in_size = iov_size(in_iov, in_num);
-    if (in_size < sizeof(struct virtio_blk_inhdr)) {
-        error_report("virtio_blk request inhdr too short");
-        return -EFAULT;
-    }
-    inhdr = g_slice_new(QEMUIOVector);
-    qemu_iovec_init(inhdr, 1);
-    qemu_iovec_concat_iov(inhdr, in_iov, in_num,
-            in_size - sizeof(struct virtio_blk_inhdr),
-            sizeof(struct virtio_blk_inhdr));
-    iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
-
-    /* TODO Linux sets the barrier bit even when not advertised! */
-    outhdr.type &= ~VIRTIO_BLK_T_BARRIER;
-
-    switch (outhdr.type) {
-    case VIRTIO_BLK_T_IN:
-        do_rdwr_cmd(s, true, in_iov, in_num,
-                    outhdr.sector * 512 / BDRV_SECTOR_SIZE,
-                    elem, inhdr);
-        return 0;
-
-    case VIRTIO_BLK_T_OUT:
-        do_rdwr_cmd(s, false, iov, out_num,
-                    outhdr.sector * 512 / BDRV_SECTOR_SIZE,
-                    elem, inhdr);
-        return 0;
-
-    case VIRTIO_BLK_T_SCSI_CMD:
-        do_scsi_cmd(s, elem, inhdr);
-        return 0;
-
-    case VIRTIO_BLK_T_FLUSH:
-        do_flush_cmd(s, elem, inhdr);
-        return 0;
-
-    case VIRTIO_BLK_T_GET_ID:
-        do_get_id_cmd(s, in_iov, in_num, elem, inhdr);
-        return 0;
-
-    default:
-        error_report("virtio-blk unsupported request type %#x", outhdr.type);
-        qemu_iovec_destroy(inhdr);
-        g_slice_free(QEMUIOVector, inhdr);
-        return -EFAULT;
-    }
+    /* Suppress notification to guest by BH and its scheduled
+     * flag because requests are completed as a batch after io
+     * plug & unplug is introduced, and the BH can still be
+     * executed in dataplane aio context even after it is
+     * stopped, so needn't worry about notification loss with BH.
+     */
+    qemu_bh_schedule(s->bh);
 }

 static void handle_notify(EventNotifier *e)
 {
    VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
                                           host_notifier);
-
-    VirtQueueElement *elem;
-    int ret;
+    VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);

    event_notifier_test_and_clear(&s->host_notifier);
+    bdrv_io_plug(s->blk->conf.bs);
    for (;;) {
+        MultiReqBuffer mrb = {
+            .num_writes = 0,
+        };
+        int ret;
+
        /* Disable guest->host notifies to avoid unnecessary vmexits */
        vring_disable_notification(s->vdev, &s->vring);

        for (;;) {
-            ret = vring_pop(s->vdev, &s->vring, &elem);
+            VirtIOBlockReq *req = virtio_blk_alloc_request(vblk);
+
+            ret = vring_pop(s->vdev, &s->vring, &req->elem);
            if (ret < 0) {
-                assert(elem == NULL);
+                virtio_blk_free_request(req);
                break; /* no more requests */
            }

-            trace_virtio_blk_data_plane_process_request(s, elem->out_num,
-                                                        elem->in_num, elem->index);
+            trace_virtio_blk_data_plane_process_request(s, req->elem.out_num,
+                                                        req->elem.in_num,
+                                                        req->elem.index);

-            if (process_request(s, elem) < 0) {
-                vring_set_broken(&s->vring);
-                vring_free_element(elem);
-                ret = -EFAULT;
-                break;
-            }
+            virtio_blk_handle_request(req, &mrb);
        }

+        virtio_submit_multiwrite(s->blk->conf.bs, &mrb);
+
        if (likely(ret == -EAGAIN)) { /* vring emptied */
            /* Re-enable guest->host notifies and stop processing the vring.
             * But if the guest has snuck in more descriptors, keep processing.
@@ -322,6 +132,7 @@ static void handle_notify(EventNotifier *e)
            break;
        }
    }
+    bdrv_io_unplug(s->blk->conf.bs);
 }

 /* Context: QEMU global mutex held */
@@ -331,10 +142,20 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
 {
    VirtIOBlockDataPlane *s;
    Error *local_err = NULL;
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    *dataplane = NULL;

-    if (!blk->data_plane) {
+    if (!blk->data_plane && !blk->iothread) {
+        return;
+    }
+
+    /* Don't try if transport does not support notifiers. */
+    if (!k->set_guest_notifiers || !k->set_host_notifier) {
+        error_setg(errp,
+                   "device is incompatible with x-data-plane "
+                   "(transport does not support notifiers)");
        return;
    }

@@ -367,6 +188,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
        s->iothread = &s->internal_iothread_obj;
    }
    s->ctx = iothread_get_aio_context(s->iothread);
+    s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);

    error_setg(&s->blocker, "block device is in use by data plane");
    bdrv_op_block_all(blk->conf.bs, s->blocker);
@@ -385,6 +207,7 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
    bdrv_op_unblock_all(s->blk->conf.bs, s->blocker);
    error_free(s->blocker);
    object_unref(OBJECT(s->iothread));
+    qemu_bh_delete(s->bh);
    g_free(s);
 }

@@ -393,6 +216,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
 {
    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
    VirtQueue *vq;

    if (s->started) {
@@ -426,6 +250,9 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
    }
    s->host_notifier = *virtio_queue_get_host_notifier(vq);

+    s->saved_complete_request = vblk->complete_request;
+    vblk->complete_request = complete_request_vring;
+
    s->starting = false;
    s->started = true;
    trace_virtio_blk_data_plane_start(s);
@@ -446,10 +273,12 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
 {
    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
    if (!s->started || s->stopping) {
        return;
    }
    s->stopping = true;
+    vblk->complete_request = s->saved_complete_request;
    trace_virtio_blk_data_plane_stop(s);

    aio_context_acquire(s->ctx);
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -12,6 +12,7 @@
 */

 #include "qemu-common.h"
+#include "qemu/iov.h"
 #include "qemu/error-report.h"
 #include "trace.h"
 #include "hw/block/block.h"
@@ -26,19 +27,26 @@
 # include <scsi/sg.h>
 #endif
 #include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"

-typedef struct VirtIOBlockReq
+VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
 {
-    VirtIOBlock *dev;
-    VirtQueueElement elem;
-    struct virtio_blk_inhdr *in;
-    struct virtio_blk_outhdr *out;
-    QEMUIOVector qiov;
-    struct VirtIOBlockReq *next;
-    BlockAcctCookie acct;
-} VirtIOBlockReq;
+    VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
+    req->dev = s;
+    req->qiov.size = 0;
+    req->next = NULL;
+    return req;
+}

-static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
+void virtio_blk_free_request(VirtIOBlockReq *req)
+{
+    if (req) {
+        g_slice_free(VirtIOBlockReq, req);
+    }
+}
+
+static void virtio_blk_complete_request(VirtIOBlockReq *req,
+                                        unsigned char status)
 {
    VirtIOBlock *s = req->dev;
    VirtIODevice *vdev = VIRTIO_DEVICE(s);
@@ -50,6 +58,11 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
    virtio_notify(vdev, s->vq);
 }

+static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
+{
+    req->dev->complete_request(req, status);
+}
+
 static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
    bool is_read)
 {
@@ -62,7 +75,7 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
    } else if (action == BLOCK_ERROR_ACTION_REPORT) {
        virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
        bdrv_acct_done(s->bs, &req->acct);
-        g_free(req);
+        virtio_blk_free_request(req);
    }

    bdrv_error_action(s->bs, action, is_read, error);
@@ -76,14 +89,15 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
    trace_virtio_blk_rw_complete(req, ret);

    if (ret) {
-        bool is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT);
+        int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
+        bool is_read = !(p & VIRTIO_BLK_T_OUT);
        if (virtio_blk_handle_rw_error(req, -ret, is_read))
            return;
    }

    virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
    bdrv_acct_done(req->dev->bs, &req->acct);
-    g_free(req);
+    virtio_blk_free_request(req);
 }

 static void virtio_blk_flush_complete(void *opaque, int ret)
@@ -98,27 +112,16 @@ static void virtio_blk_flush_complete(void *opaque, int ret)

    virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
    bdrv_acct_done(req->dev->bs, &req->acct);
-    g_free(req);
-}
-
-static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
-{
-    VirtIOBlockReq *req = g_malloc(sizeof(*req));
-    req->dev = s;
-    req->qiov.size = 0;
-    req->next = NULL;
-    return req;
+    virtio_blk_free_request(req);
 }

 static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
 {
    VirtIOBlockReq *req = virtio_blk_alloc_request(s);

-    if (req != NULL) {
-        if (!virtqueue_pop(s->vq, &req->elem)) {
-            g_free(req);
-            return NULL;
-        }
+    if (!virtqueue_pop(s->vq, &req->elem)) {
+        virtio_blk_free_request(req);
+        return NULL;
    }

    return req;
@@ -129,6 +132,8 @@ int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
 {
    int status = VIRTIO_BLK_S_OK;
    struct virtio_scsi_inhdr *scsi = NULL;
+    VirtIODevice *vdev = VIRTIO_DEVICE(blk);
+
 #ifdef __linux__
    int i;
    struct sg_io_hdr hdr;
@@ -223,12 +228,12 @@ int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
        hdr.status = CHECK_CONDITION;
    }

-    stl_p(&scsi->errors,
-          hdr.status | (hdr.msg_status << 8) |
-          (hdr.host_status << 16) | (hdr.driver_status << 24));
-    stl_p(&scsi->residual, hdr.resid);
-    stl_p(&scsi->sense_len, hdr.sb_len_wr);
-    stl_p(&scsi->data_len, hdr.dxfer_len);
+    virtio_stl_p(vdev, &scsi->errors,
+                 hdr.status | (hdr.msg_status << 8) |
+                 (hdr.host_status << 16) | (hdr.driver_status << 24));
+    virtio_stl_p(vdev, &scsi->residual, hdr.resid);
+    virtio_stl_p(vdev, &scsi->sense_len, hdr.sb_len_wr);
+    virtio_stl_p(vdev, &scsi->data_len, hdr.dxfer_len);

    return status;
 #else
@@ -238,7 +243,7 @@ int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
 fail:
    /* Just put anything nonzero so that the ioctl fails in the guest.  */
    if (scsi) {
-        stl_p(&scsi->errors, 255);
+        virtio_stl_p(vdev, &scsi->errors, 255);
    }
    return status;
 }
@@ -249,15 +254,10 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)

    status = virtio_blk_handle_scsi_req(req->dev, &req->elem);
    virtio_blk_req_complete(req, status);
-    g_free(req);
+    virtio_blk_free_request(req);
 }

-typedef struct MultiReqBuffer {
-    BlockRequest        blkreq[32];
-    unsigned int        num_writes;
-} MultiReqBuffer;
-
-static void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb)
+void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb)
 {
    int i, ret;

@@ -288,26 +288,42 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
    bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req);
 }

+static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
+                                     uint64_t sector, size_t size)
+{
+    uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
+    uint64_t total_sectors;
+
+    if (sector & dev->sector_mask) {
+        return false;
+    }
+    if (size % dev->conf->logical_block_size) {
+        return false;
+    }
+    bdrv_get_geometry(dev->bs, &total_sectors);
+    if (sector > total_sectors || nb_sectors > total_sectors - sector) {
+        return false;
+    }
+    return true;
+}
+
 static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 {
    BlockRequest *blkreq;
    uint64_t sector;

-    sector = ldq_p(&req->out->sector);
-
-    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
+    sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);

    trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512);

-    if (sector & req->dev->sector_mask) {
-        virtio_blk_rw_complete(req, -EIO);
-        return;
-    }
-    if (req->qiov.size % req->dev->conf->logical_block_size) {
-        virtio_blk_rw_complete(req, -EIO);
+    if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
+        virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+        virtio_blk_free_request(req);
        return;
    }

+    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
+
    if (mrb->num_writes == 32) {
        virtio_submit_multiwrite(req->dev->bs, mrb);
    }
@@ -327,45 +343,55 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
 {
    uint64_t sector;

-    sector = ldq_p(&req->out->sector);
-
-    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
+    sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);

    trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512);

-    if (sector & req->dev->sector_mask) {
-        virtio_blk_rw_complete(req, -EIO);
-        return;
-    }
-    if (req->qiov.size % req->dev->conf->logical_block_size) {
-        virtio_blk_rw_complete(req, -EIO);
+    if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
+        virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+        virtio_blk_free_request(req);
        return;
    }
+
+    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
    bdrv_aio_readv(req->dev->bs, sector, &req->qiov,
                   req->qiov.size / BDRV_SECTOR_SIZE,
                   virtio_blk_rw_complete, req);
 }

-static void virtio_blk_handle_request(VirtIOBlockReq *req,
-    MultiReqBuffer *mrb)
+void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 {
    uint32_t type;
+    struct iovec *in_iov = req->elem.in_sg;
+    struct iovec *iov = req->elem.out_sg;
+    unsigned in_num = req->elem.in_num;
+    unsigned out_num = req->elem.out_num;

    if (req->elem.out_num < 1 || req->elem.in_num < 1) {
        error_report("virtio-blk missing headers");
        exit(1);
    }

-    if (req->elem.out_sg[0].iov_len < sizeof(*req->out) ||
-        req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) {
-        error_report("virtio-blk header not in correct element");
+    if (unlikely(iov_to_buf(iov, out_num, 0, &req->out,
+                            sizeof(req->out)) != sizeof(req->out))) {
+        error_report("virtio-blk request outhdr too short");
        exit(1);
    }

-    req->out = (void *)req->elem.out_sg[0].iov_base;
-    req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base;
+    iov_discard_front(&iov, &out_num, sizeof(req->out));

-    type = ldl_p(&req->out->type);
+    if (in_num < 1 ||
+        in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
+        error_report("virtio-blk request inhdr too short");
+        exit(1);
+    }
+
+    req->in = (void *)in_iov[in_num - 1].iov_base
+              + in_iov[in_num - 1].iov_len
+              - sizeof(struct virtio_blk_inhdr);
+    iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
+
+    type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);

    if (type & VIRTIO_BLK_T_FLUSH) {
        virtio_blk_handle_flush(req, mrb);
@@ -382,7 +408,7 @@ static void virtio_blk_handle_request(VirtIOBlockReq *req,
                s->blk.serial ? s->blk.serial : "",
                MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
        virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
-        g_free(req);
+        virtio_blk_free_request(req);
    } else if (type & VIRTIO_BLK_T_OUT) {
        qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
                                 req->elem.out_num - 1);
@@ -394,7 +420,7 @@ static void virtio_blk_handle_request(VirtIOBlockReq *req,
        virtio_blk_handle_read(req);
    } else {
        virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
-        g_free(req);
+        virtio_blk_free_request(req);
    }
 }

@@ -443,8 +469,9 @@ static void virtio_blk_dma_restart_bh(void *opaque)
    s->rq = NULL;

    while (req) {
+        VirtIOBlockReq *next = req->next;
        virtio_blk_handle_request(req, &mrb);
-        req = req->next;
+        req = next;
    }

    virtio_submit_multiwrite(s->bs, &mrb);
@@ -460,7 +487,8 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running,
    }

    if (!s->bh) {
-        s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
+        s->bh = aio_bh_new(bdrv_get_aio_context(s->blk.conf.bs),
+                           virtio_blk_dma_restart_bh, s);
        qemu_bh_schedule(s->bh);
    }
 }
@@ -494,12 +522,12 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)

    bdrv_get_geometry(s->bs, &capacity);
    memset(&blkcfg, 0, sizeof(blkcfg));
-    stq_p(&blkcfg.capacity, capacity);
-    stl_p(&blkcfg.seg_max, 128 - 2);
-    stw_p(&blkcfg.cylinders, s->conf->cyls);
-    stl_p(&blkcfg.blk_size, blk_size);
-    stw_p(&blkcfg.min_io_size, s->conf->min_io_size / blk_size);
-    stw_p(&blkcfg.opt_io_size, s->conf->opt_io_size / blk_size);
+    virtio_stq_p(vdev, &blkcfg.capacity, capacity);
+    virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2);
+    virtio_stw_p(vdev, &blkcfg.cylinders, s->conf->cyls);
+    virtio_stl_p(vdev, &blkcfg.blk_size, blk_size);
+    virtio_stw_p(vdev, &blkcfg.min_io_size, s->conf->min_io_size / blk_size);
+    virtio_stw_p(vdev, &blkcfg.opt_io_size, s->conf->opt_io_size / blk_size);
    blkcfg.heads = s->conf->heads;
    /*
     * We must ensure that the block device capacity is a multiple of
@@ -601,15 +629,20 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)

 static void virtio_blk_save(QEMUFile *f, void *opaque)
 {
-    VirtIOBlock *s = opaque;
-    VirtIODevice *vdev = VIRTIO_DEVICE(s);
-    VirtIOBlockReq *req = s->rq;
+    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);

    virtio_save(vdev, f);
+}
    
+static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
+{
+    VirtIOBlock *s = VIRTIO_BLK(vdev);
+    VirtIOBlockReq *req = s->rq;
+
    while (req) {
        qemu_put_sbyte(f, 1);
-        qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
+        qemu_put_buffer(f, (unsigned char *)&req->elem,
+                        sizeof(VirtQueueElement));
        req = req->next;
    }
    qemu_put_sbyte(f, 0);
@@ -619,19 +652,22 @@ static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
 {
    VirtIOBlock *s = opaque;
    VirtIODevice *vdev = VIRTIO_DEVICE(s);
-    int ret;

    if (version_id != 2)
        return -EINVAL;

-    ret = virtio_load(vdev, f);
-    if (ret) {
-        return ret;
-    }
+    return virtio_load(vdev, f, version_id);
+}
+
+static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
+                                  int version_id)
+{
+    VirtIOBlock *s = VIRTIO_BLK(vdev);

    while (qemu_get_sbyte(f)) {
        VirtIOBlockReq *req = virtio_blk_alloc_request(s);
-        qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
+        qemu_get_buffer(f, (unsigned char *)&req->elem,
+                        sizeof(VirtQueueElement));
        req->next = s->rq;
        s->rq = req;

@@ -655,12 +691,6 @@ static const BlockDevOps virtio_block_ops = {
    .resize_cb = virtio_blk_resize,
 };

-void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk)
-{
-    VirtIOBlock *s = VIRTIO_BLK(dev);
-    memcpy(&(s->blk), blk, sizeof(struct VirtIOBlkConf));
-}
-
 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
 /* Disable dataplane thread during live migration since it does not
 * update the dirty memory bitmap yet.
@@ -729,6 +759,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
    s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1;

    s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output);
+    s->complete_request = virtio_blk_complete_request;
 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
    virtio_blk_data_plane_create(vdev, blk, &s->dataplane, &err);
    if (err != NULL) {
@@ -767,8 +798,27 @@ static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp)
    virtio_cleanup(vdev);
 }

+static void virtio_blk_instance_init(Object *obj)
+{
+    VirtIOBlock *s = VIRTIO_BLK(obj);
+
+    object_property_add_link(obj, "iothread", TYPE_IOTHREAD,
+                             (Object **)&s->blk.iothread,
+                             qdev_prop_allow_set_link_before_realize,
+                             OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL);
+}
+
 static Property virtio_blk_properties[] = {
-    DEFINE_VIRTIO_BLK_PROPERTIES(VirtIOBlock, blk),
+    DEFINE_BLOCK_PROPERTIES(VirtIOBlock, blk.conf),
+    DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, blk.conf),
+    DEFINE_PROP_STRING("serial", VirtIOBlock, blk.serial),
+    DEFINE_PROP_BIT("config-wce", VirtIOBlock, blk.config_wce, 0, true),
+#ifdef __linux__
+    DEFINE_PROP_BIT("scsi", VirtIOBlock, blk.scsi, 0, true),
+#endif
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+    DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, blk.data_plane, 0, false),
+#endif
    DEFINE_PROP_END_OF_LIST(),
 };

@@ -786,12 +836,15 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data)
    vdc->get_features = virtio_blk_get_features;
    vdc->set_status = virtio_blk_set_status;
    vdc->reset = virtio_blk_reset;
+    vdc->save = virtio_blk_save_device;
+    vdc->load = virtio_blk_load_device;
 }

 static const TypeInfo virtio_device_info = {
    .name = TYPE_VIRTIO_BLK,
    .parent = TYPE_VIRTIO_DEVICE,
    .instance_size = sizeof(VirtIOBlock),
+    .instance_init = virtio_blk_instance_init,
    .class_init = virtio_blk_class_init,
 };

--- a/hw/char/cadence_uart.c
+++ b/hw/char/cadence_uart.c
@@ -175,8 +175,10 @@ static void uart_send_breaks(UartState *s)
 {
    int break_enabled = 1;

-    qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_SERIAL_SET_BREAK,
-                               &break_enabled);
+    if (s->chr) {
+        qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_SERIAL_SET_BREAK,
+                                   &break_enabled);
+    }
 }

 static void uart_parameters_setup(UartState *s)
@@ -227,7 +229,9 @@ static void uart_parameters_setup(UartState *s)

    packet_size += ssp.data_bits + ssp.stop_bits;
    s->char_tx_time = (get_ticks_per_sec() / ssp.speed) * packet_size;
-    qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp);
+    if (s->chr) {
+        qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp);
+    }
 }

 static int uart_can_receive(void *opaque)
@@ -295,6 +299,7 @@ static gboolean cadence_uart_xmit(GIOChannel *chan, GIOCondition cond,
    /* instant drain the fifo when there's no back-end */
    if (!s->chr) {
        s->tx_count = 0;
+        return FALSE;
    }

    if (!s->tx_count) {
@@ -306,7 +311,8 @@ static gboolean cadence_uart_xmit(GIOChannel *chan, GIOCondition cond,
    memmove(s->tx_fifo, s->tx_fifo + ret, s->tx_count);

    if (s->tx_count) {
-        int r = qemu_chr_fe_add_watch(s->chr, G_IO_OUT, cadence_uart_xmit, s);
+        int r = qemu_chr_fe_add_watch(s->chr, G_IO_OUT|G_IO_HUP,
+                                      cadence_uart_xmit, s);
        assert(r);
    }

@@ -374,7 +380,9 @@ static void uart_read_rx_fifo(UartState *s, uint32_t *c)
        *c = s->rx_fifo[rx_rpos];
        s->rx_count--;

-        qemu_chr_accept_input(s->chr);
+        if (s->chr) {
+            qemu_chr_accept_input(s->chr);
+        }
    } else {
        *c = 0;
    }
--- a/hw/char/serial-pci.c
+++ b/hw/char/serial-pci.c
@@ -148,11 +148,12 @@ static void multi_serial_pci_exit(PCIDevice *dev)
    for (i = 0; i < pci->ports; i++) {
        s = pci->state + i;
        serial_exit_core(s);
+        memory_region_del_subregion(&pci->iobar, &s->io);
        memory_region_destroy(&s->io);
        g_free(pci->name[i]);
    }
    memory_region_destroy(&pci->iobar);
-    qemu_free_irqs(pci->irqs);
+    qemu_free_irqs(pci->irqs, pci->ports);
 }

 static const VMStateDescription vmstate_pci_serial = {
--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -223,37 +223,42 @@ static gboolean serial_xmit(GIOChannel *chan, GIOCondition cond, void *opaque)
 {
    SerialState *s = opaque;

-    if (s->tsr_retry <= 0) {
-        if (s->fcr & UART_FCR_FE) {
-            if (fifo8_is_empty(&s->xmit_fifo)) {
+    do {
+        if (s->tsr_retry <= 0) {
+            if (s->fcr & UART_FCR_FE) {
+                if (fifo8_is_empty(&s->xmit_fifo)) {
+                    return FALSE;
+                }
+                s->tsr = fifo8_pop(&s->xmit_fifo);
+                if (!s->xmit_fifo.num) {
+                    s->lsr |= UART_LSR_THRE;
+                }
+            } else if ((s->lsr & UART_LSR_THRE)) {
+                return FALSE;
+            } else {
+                s->tsr = s->thr;
+                s->lsr |= UART_LSR_THRE;
+                s->lsr &= ~UART_LSR_TEMT;
+            }
+        }
+
+        if (s->mcr & UART_MCR_LOOP) {
+            /* in loopback mode, say that we just received a char */
+            serial_receive1(s, &s->tsr, 1);
+        } else if (qemu_chr_fe_write(s->chr, &s->tsr, 1) != 1) {
+            if (s->tsr_retry >= 0 && s->tsr_retry < MAX_XMIT_RETRY &&
+                qemu_chr_fe_add_watch(s->chr, G_IO_OUT|G_IO_HUP,
+                                      serial_xmit, s) > 0) {
+                s->tsr_retry++;
                return FALSE;
            }
-            s->tsr = fifo8_pop(&s->xmit_fifo);
-            if (!s->xmit_fifo.num) {
-                s->lsr |= UART_LSR_THRE;
-            }
-        } else if ((s->lsr & UART_LSR_THRE)) {
-            return FALSE;
+            s->tsr_retry = 0;
        } else {
-            s->tsr = s->thr;
-            s->lsr |= UART_LSR_THRE;
-            s->lsr &= ~UART_LSR_TEMT;
+            s->tsr_retry = 0;
        }
-    }
-
-    if (s->mcr & UART_MCR_LOOP) {
-        /* in loopback mode, say that we just received a char */
-        serial_receive1(s, &s->tsr, 1);
-    } else if (qemu_chr_fe_write(s->chr, &s->tsr, 1) != 1) {
-        if (s->tsr_retry >= 0 && s->tsr_retry < MAX_XMIT_RETRY &&
-            qemu_chr_fe_add_watch(s->chr, G_IO_OUT, serial_xmit, s) > 0) {
-            s->tsr_retry++;
-            return FALSE;
-        }
-        s->tsr_retry = 0;
-    } else {
-        s->tsr_retry = 0;
-    }
+        /* Transmit another byte if it is already available. It is only
+           possible when FIFO is enabled and not empty. */
+    } while ((s->fcr & UART_FCR_FE) && !fifo8_is_empty(&s->xmit_fifo));

    s->last_xmit_ts = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);

@@ -293,7 +298,9 @@ static void serial_ioport_write(void *opaque, hwaddr addr, uint64_t val,
            s->thr_ipending = 0;
            s->lsr &= ~UART_LSR_THRE;
            serial_update_irq(s);
-            serial_xmit(NULL, G_IO_OUT, s);
+            if (s->tsr_retry <= 0) {
+                serial_xmit(NULL, G_IO_OUT, s);
+            }
        }
        break;
    case 1:
--- a/hw/char/virtio-console.c
+++ b/hw/char/virtio-console.c
@@ -14,6 +14,7 @@
 #include "qemu/error-report.h"
 #include "trace.h"
 #include "hw/virtio/virtio-serial.h"
+#include "qapi-event.h"

 #define TYPE_VIRTIO_CONSOLE_SERIAL_PORT "virtserialport"
 #define VIRTIO_CONSOLE(obj) \
@@ -69,7 +70,8 @@ static ssize_t flush_buf(VirtIOSerialPort *port,
        if (!k->is_console) {
            virtio_serial_throttle_port(port, true);
            if (!vcon->watch) {
-                vcon->watch = qemu_chr_fe_add_watch(vcon->chr, G_IO_OUT,
+                vcon->watch = qemu_chr_fe_add_watch(vcon->chr,
+                                                    G_IO_OUT|G_IO_HUP,
                                                    chr_write_unblocked, vcon);
            }
        }
@@ -81,11 +83,16 @@ static ssize_t flush_buf(VirtIOSerialPort *port,
 static void set_guest_connected(VirtIOSerialPort *port, int guest_connected)
 {
    VirtConsole *vcon = VIRTIO_CONSOLE(port);
+    DeviceState *dev = DEVICE(port);

-    if (!vcon->chr) {
-        return;
+    if (vcon->chr) {
+        qemu_chr_fe_set_open(vcon->chr, guest_connected);
+    }
+
+    if (dev->id) {
+        qapi_event_send_vserport_change(dev->id, guest_connected,
+                                        &error_abort);
    }
-    qemu_chr_fe_set_open(vcon->chr, guest_connected);
 }

 /* Readiness of the guest to accept data on a port */
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -24,6 +24,7 @@
 #include "hw/sysbus.h"
 #include "trace.h"
 #include "hw/virtio/virtio-serial.h"
+#include "hw/virtio/virtio-access.h"

 static VirtIOSerialPort *find_port_by_id(VirtIOSerial *vser, uint32_t id)
 {
@@ -183,11 +184,12 @@ static size_t send_control_msg(VirtIOSerial *vser, void *buf, size_t len)
 static size_t send_control_event(VirtIOSerial *vser, uint32_t port_id,
                                 uint16_t event, uint16_t value)
 {
+    VirtIODevice *vdev = VIRTIO_DEVICE(vser);
    struct virtio_console_control cpkt;

-    stl_p(&cpkt.id, port_id);
-    stw_p(&cpkt.event, event);
-    stw_p(&cpkt.value, value);
+    virtio_stl_p(vdev, &cpkt.id, port_id);
+    virtio_stw_p(vdev, &cpkt.event, event);
+    virtio_stw_p(vdev, &cpkt.value, value);

    trace_virtio_serial_send_control_event(port_id, event, value);
    return send_control_msg(vser, &cpkt, sizeof(cpkt));
@@ -278,6 +280,7 @@ void virtio_serial_throttle_port(VirtIOSerialPort *port, bool throttle)
 /* Guest wants to notify us of some event */
 static void handle_control_message(VirtIOSerial *vser, void *buf, size_t len)
 {
+    VirtIODevice *vdev = VIRTIO_DEVICE(vser);
    struct VirtIOSerialPort *port;
    VirtIOSerialPortClass *vsc;
    struct virtio_console_control cpkt, *gcpkt;
@@ -291,8 +294,8 @@ static void handle_control_message(VirtIOSerial *vser, void *buf, size_t len)
        return;
    }

-    cpkt.event = lduw_p(&gcpkt->event);
-    cpkt.value = lduw_p(&gcpkt->value);
+    cpkt.event = virtio_lduw_p(vdev, &gcpkt->event);
+    cpkt.value = virtio_lduw_p(vdev, &gcpkt->value);

    trace_virtio_serial_handle_control_message(cpkt.event, cpkt.value);

@@ -312,10 +315,10 @@ static void handle_control_message(VirtIOSerial *vser, void *buf, size_t len)
        return;
    }

-    port = find_port_by_id(vser, ldl_p(&gcpkt->id));
+    port = find_port_by_id(vser, virtio_ldl_p(vdev, &gcpkt->id));
    if (!port) {
        error_report("virtio-serial-bus: Unexpected port id %u for device %s",
-                     ldl_p(&gcpkt->id), vser->bus.qbus.name);
+                     virtio_ldl_p(vdev, &gcpkt->id), vser->bus.qbus.name);
        return;
    }

@@ -342,9 +345,9 @@ static void handle_control_message(VirtIOSerial *vser, void *buf, size_t len)
        }

        if (port->name) {
-            stl_p(&cpkt.id, port->id);
-            stw_p(&cpkt.event, VIRTIO_CONSOLE_PORT_NAME);
-            stw_p(&cpkt.value, 1);
+            virtio_stl_p(vdev, &cpkt.id, port->id);
+            virtio_stw_p(vdev, &cpkt.event, VIRTIO_CONSOLE_PORT_NAME);
+            virtio_stw_p(vdev, &cpkt.value, 1);

            buffer_len = sizeof(cpkt) + strlen(port->name) + 1;
            buffer = g_malloc(buffer_len);
@@ -510,18 +513,25 @@ static void vser_reset(VirtIODevice *vdev)

    vser = VIRTIO_SERIAL(vdev);
    guest_reset(vser);
+
+    /* In case we have switched endianness */
+    vser->config.max_nr_ports =
+        virtio_tswap32(vdev, vser->serial.max_virtserial_ports);
 }

 static void virtio_serial_save(QEMUFile *f, void *opaque)
 {
-    VirtIOSerial *s = VIRTIO_SERIAL(opaque);
+    /* The virtio device */
+    virtio_save(VIRTIO_DEVICE(opaque), f);
+}
+
+static void virtio_serial_save_device(VirtIODevice *vdev, QEMUFile *f)
+{
+    VirtIOSerial *s = VIRTIO_SERIAL(vdev);
    VirtIOSerialPort *port;
    uint32_t nr_active_ports;
    unsigned int i, max_nr_ports;

-    /* The virtio device */
-    virtio_save(VIRTIO_DEVICE(s), f);
-
    /* The config space */
    qemu_put_be16s(f, &s->config.cols);
    qemu_put_be16s(f, &s->config.rows);
@@ -529,7 +539,7 @@ static void virtio_serial_save(QEMUFile *f, void *opaque)
    qemu_put_be32s(f, &s->config.max_nr_ports);

    /* The ports map */
-    max_nr_ports = tswap32(s->config.max_nr_ports);
+    max_nr_ports = virtio_tswap32(vdev, s->config.max_nr_ports);
    for (i = 0; i < (max_nr_ports + 31) / 32; i++) {
        qemu_put_be32s(f, &s->ports_map[i]);
    }
@@ -659,36 +669,39 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id,

 static int virtio_serial_load(QEMUFile *f, void *opaque, int version_id)
 {
-    VirtIOSerial *s = VIRTIO_SERIAL(opaque);
-    uint32_t max_nr_ports, nr_active_ports, ports_map;
-    unsigned int i;
-    int ret;
-
    if (version_id > 3) {
        return -EINVAL;
    }

    /* The virtio device */
-    ret = virtio_load(VIRTIO_DEVICE(s), f);
-    if (ret) {
-        return ret;
-    }
+    return virtio_load(VIRTIO_DEVICE(opaque), f, version_id);
+}
+
+static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f,
+                                     int version_id)
+{
+    VirtIOSerial *s = VIRTIO_SERIAL(vdev);
+    uint32_t max_nr_ports, nr_active_ports, ports_map;
+    unsigned int i;
+    int ret;
+    uint32_t tmp;

    if (version_id < 2) {
        return 0;
    }

-    /* The config space */
-    qemu_get_be16s(f, &s->config.cols);
-    qemu_get_be16s(f, &s->config.rows);
-
-    qemu_get_be32s(f, &max_nr_ports);
-    tswap32s(&max_nr_ports);
-    if (max_nr_ports > tswap32(s->config.max_nr_ports)) {
-        /* Source could have had more ports than us. Fail migration. */
-        return -EINVAL;
-    }
+    /* Unused */
+    qemu_get_be16s(f, (uint16_t *) &tmp);
+    qemu_get_be16s(f, (uint16_t *) &tmp);
+    qemu_get_be32s(f, &tmp);

+    /* Note: this is the only location where we use tswap32() instead of
+     * virtio_tswap32() because:
+     * - virtio_tswap32() only makes sense when the device is fully restored
+     * - the target endianness that was used to populate s->config is
+     *   necessarly the default one
+     */
+    max_nr_ports = tswap32(s->config.max_nr_ports);
    for (i = 0; i < (max_nr_ports + 31) / 32; i++) {
        qemu_get_be32s(f, &ports_map);

@@ -751,9 +764,10 @@ static void virtser_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent)
 /* This function is only used if a port id is not provided by the user */
 static uint32_t find_free_port_id(VirtIOSerial *vser)
 {
+    VirtIODevice *vdev = VIRTIO_DEVICE(vser);
    unsigned int i, max_nr_ports;

-    max_nr_ports = tswap32(vser->config.max_nr_ports);
+    max_nr_ports = virtio_tswap32(vdev, vser->config.max_nr_ports);
    for (i = 0; i < (max_nr_ports + 31) / 32; i++) {
        uint32_t map, bit;

@@ -783,10 +797,18 @@ static void add_port(VirtIOSerial *vser, uint32_t port_id)
 static void remove_port(VirtIOSerial *vser, uint32_t port_id)
 {
    VirtIOSerialPort *port;
-    unsigned int i;

-    i = port_id / 32;
-    vser->ports_map[i] &= ~(1U << (port_id % 32));
+    /*
+     * Don't mark port 0 removed -- we explicitly reserve it for
+     * backward compat with older guests, ensure a virtconsole device
+     * unplug retains the reservation.
+     */
+    if (port_id) {
+        unsigned int i;
+
+        i = port_id / 32;
+        vser->ports_map[i] &= ~(1U << (port_id % 32));
+    }

    port = find_port_by_id(vser, port_id);
    /*
@@ -806,6 +828,7 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
    VirtIOSerialPort *port = VIRTIO_SERIAL_PORT(dev);
    VirtIOSerialPortClass *vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
    VirtIOSerialBus *bus = VIRTIO_SERIAL_BUS(qdev_get_parent_bus(dev));
+    VirtIODevice *vdev = VIRTIO_DEVICE(bus->vser);
    int max_nr_ports;
    bool plugging_port0;
    Error *err = NULL;
@@ -841,7 +864,7 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
        }
    }

-    max_nr_ports = tswap32(port->vser->config.max_nr_ports);
+    max_nr_ports = virtio_tswap32(vdev, port->vser->config.max_nr_ports);
    if (port->id >= max_nr_ports) {
        error_setg(errp, "virtio-serial-bus: Out-of-range port id specified, "
                         "max. allowed: %u", max_nr_ports - 1);
@@ -863,7 +886,7 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
    add_port(port->vser, port->id);

    /* Send an update to the guest about this new port added */
-    virtio_notify_config(VIRTIO_DEVICE(port->vser));
+    virtio_notify_config(vdev);
 }

 static void virtser_port_device_unrealize(DeviceState *dev, Error **errp)
@@ -942,7 +965,8 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp)
        vser->ovqs[i] = virtio_add_queue(vdev, 128, handle_output);
    }

-    vser->config.max_nr_ports = tswap32(vser->serial.max_virtserial_ports);
+    vser->config.max_nr_ports =
+        virtio_tswap32(vdev, vser->serial.max_virtserial_ports);
    vser->ports_map = g_malloc0(((vser->serial.max_virtserial_ports + 31) / 32)
        * sizeof(vser->ports_map[0]));
    /*
@@ -1019,6 +1043,8 @@ static void virtio_serial_class_init(ObjectClass *klass, void *data)
    vdc->get_config = get_config;
    vdc->set_status = set_status;
    vdc->reset = vser_reset;
+    vdc->save = virtio_serial_save_device;
+    vdc->load = virtio_serial_load_device;
 }

 static const TypeInfo virtio_device_info = {
--- a/hw/core/irq.c
+++ b/hw/core/irq.c
@@ -23,8 +23,13 @@
 */
 #include "qemu-common.h"
 #include "hw/irq.h"
+#include "qom/object.h"
+
+#define IRQ(obj) OBJECT_CHECK(struct IRQState, (obj), TYPE_IRQ)

 struct IRQState {
+    Object parent_obj;
+
    qemu_irq_handler handler;
    void *opaque;
    int n;
@@ -42,23 +47,14 @@ qemu_irq *qemu_extend_irqs(qemu_irq *old, int n_old, qemu_irq_handler handler,
                           void *opaque, int n)
 {
    qemu_irq *s;
-    struct IRQState *p;
    int i;

    if (!old) {
        n_old = 0;
    }
    s = old ? g_renew(qemu_irq, old, n + n_old) : g_new(qemu_irq, n);
-    p = old ? g_renew(struct IRQState, s[0], n + n_old) :
-                g_new(struct IRQState, n);
-    for (i = 0; i < n + n_old; i++) {
-        if (i >= n_old) {
-            p->handler = handler;
-            p->opaque = opaque;
-            p->n = i;
-        }
-        s[i] = p;
-        p++;
+    for (i = n_old; i < n + n_old; i++) {
+        s[i] = qemu_allocate_irq(handler, opaque, i);
    }
    return s;
 }
@@ -72,7 +68,7 @@ qemu_irq qemu_allocate_irq(qemu_irq_handler handler, void *opaque, int n)
 {
    struct IRQState *irq;

-    irq = g_new(struct IRQState, 1);
+    irq = IRQ(object_new(TYPE_IRQ));
    irq->handler = handler;
    irq->opaque = opaque;
    irq->n = n;
@@ -80,15 +76,18 @@ qemu_irq qemu_allocate_irq(qemu_irq_handler handler, void *opaque, int n)
    return irq;
 }

-void qemu_free_irqs(qemu_irq *s)
+void qemu_free_irqs(qemu_irq *s, int n)
 {
-    g_free(s[0]);
+    int i;
+    for (i = 0; i < n; i++) {
+        qemu_free_irq(s[i]);
+    }
    g_free(s);
 }

 void qemu_free_irq(qemu_irq irq)
 {
-    g_free(irq);
+    object_unref(OBJECT(irq));
 }

 static void qemu_notirq(void *opaque, int line, int level)
@@ -102,7 +101,7 @@ qemu_irq qemu_irq_invert(qemu_irq irq)
 {
    /* The default state for IRQs is low, so raise the output now.  */
    qemu_irq_raise(irq);
-    return qemu_allocate_irqs(qemu_notirq, irq, 1)[0];
+    return qemu_allocate_irq(qemu_notirq, irq, 0);
 }

 static void qemu_splitirq(void *opaque, int line, int level)
@@ -117,7 +116,7 @@ qemu_irq qemu_irq_split(qemu_irq irq1, qemu_irq irq2)
    qemu_irq *s = g_malloc0(2 * sizeof(qemu_irq));
    s[0] = irq1;
    s[1] = irq2;
-    return qemu_allocate_irqs(qemu_splitirq, s, 1)[0];
+    return qemu_allocate_irq(qemu_splitirq, s, 0);
 }

 static void proxy_irq_handler(void *opaque, int n, int level)
@@ -150,3 +149,16 @@ void qemu_irq_intercept_out(qemu_irq **gpio_out, qemu_irq_handler handler, int n
    qemu_irq *old_irqs = *gpio_out;
    *gpio_out = qemu_allocate_irqs(handler, old_irqs, n);
 }
+
+static const TypeInfo irq_type_info = {
+   .name = TYPE_IRQ,
+   .parent = TYPE_OBJECT,
+   .instance_size = sizeof(struct IRQState),
+};
+
+static void irq_register_types(void)
+{
+    type_register_static(&irq_type_info);
+}
+
+type_init(irq_register_types)
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -239,11 +239,11 @@ static void machine_initfn(Object *obj)
 {
    object_property_add_str(obj, "accel",
                            machine_get_accel, machine_set_accel, NULL);
-    object_property_add_bool(obj, "kernel_irqchip",
+    object_property_add_bool(obj, "kernel-irqchip",
                             machine_get_kernel_irqchip,
                             machine_set_kernel_irqchip,
                             NULL);
-    object_property_add(obj, "kvm_shadow_mem", "int",
+    object_property_add(obj, "kvm-shadow-mem", "int",
                        machine_get_kvm_shadow_mem,
                        machine_set_kvm_shadow_mem,
                        NULL, NULL, NULL);
@@ -257,11 +257,11 @@ static void machine_initfn(Object *obj)
                            machine_get_dtb, machine_set_dtb, NULL);
    object_property_add_str(obj, "dumpdtb",
                            machine_get_dumpdtb, machine_set_dumpdtb, NULL);
-    object_property_add(obj, "phandle_start", "int",
+    object_property_add(obj, "phandle-start", "int",
                        machine_get_phandle_start,
                        machine_set_phandle_start,
                        NULL, NULL, NULL);
-    object_property_add_str(obj, "dt_compatible",
+    object_property_add_str(obj, "dt-compatible",
                            machine_get_dt_compatible,
                            machine_set_dt_compatible,
                            NULL);
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -180,7 +180,6 @@ PropertyInfo qdev_prop_chr = {
 static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
 {
    NICPeers *peers_ptr = (NICPeers *)ptr;
-    NICConf *conf = container_of(peers_ptr, NICConf, peers);
    NetClientState **ncs = peers_ptr->ncs;
    NetClientState *peers[MAX_QUEUE_NUM];
    int queues, i = 0;
@@ -219,7 +218,7 @@ static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
        ncs[i]->queue_index = i;
    }

-    conf->queues = queues;
+    peers_ptr->queues = queues;

    return 0;

@@ -386,56 +385,6 @@ void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd)
    nd->instantiated = 1;
 }

-/* --- iothread --- */
-
-static char *print_iothread(void *ptr)
-{
-    return iothread_get_id(ptr);
-}
-
-static int parse_iothread(DeviceState *dev, const char *str, void **ptr)
-{
-    IOThread *iothread;
-
-    iothread = iothread_find(str);
-    if (!iothread) {
-        return -ENOENT;
-    }
-    object_ref(OBJECT(iothread));
-    *ptr = iothread;
-    return 0;
-}
-
-static void get_iothread(Object *obj, struct Visitor *v, void *opaque,
-                         const char *name, Error **errp)
-{
-    get_pointer(obj, v, opaque, print_iothread, name, errp);
-}
-
-static void set_iothread(Object *obj, struct Visitor *v, void *opaque,
-                         const char *name, Error **errp)
-{
-    set_pointer(obj, v, opaque, parse_iothread, name, errp);
-}
-
-static void release_iothread(Object *obj, const char *name, void *opaque)
-{
-    DeviceState *dev = DEVICE(obj);
-    Property *prop = opaque;
-    IOThread **ptr = qdev_get_prop_ptr(dev, prop);
-
-    if (*ptr) {
-        object_unref(OBJECT(*ptr));
-    }
-}
-
-PropertyInfo qdev_prop_iothread = {
-    .name = "iothread",
-    .get = get_iothread,
-    .set = set_iothread,
-    .release = release_iothread,
-};
-
 static int qdev_add_one_global(QemuOpts *opts, void *opaque)
 {
    GlobalProperty *g;
@@ -445,7 +394,8 @@ static int qdev_add_one_global(QemuOpts *opts, void *opaque)
    g->driver   = qemu_opt_get(opts, "driver");
    g->property = qemu_opt_get(opts, "property");
    g->value    = qemu_opt_get(opts, "value");
-    oc = object_class_by_name(g->driver);
+    oc = object_class_dynamic_cast(object_class_by_name(g->driver),
+                                   TYPE_DEVICE);
    if (oc) {
        DeviceClass *dc = DEVICE_CLASS(oc);

--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -780,6 +780,27 @@ void qdev_property_add_static(DeviceState *dev, Property *prop,
    }
 }

+/* @qdev_alias_all_properties - Add alias properties to the source object for
+ * all qdev properties on the target DeviceState.
+ */
+void qdev_alias_all_properties(DeviceState *target, Object *source)
+{
+    ObjectClass *class;
+    Property *prop;
+
+    class = object_get_class(OBJECT(target));
+    do {
+        DeviceClass *dc = DEVICE_CLASS(class);
+
+        for (prop = dc->props; prop && prop->name; prop++) {
+            object_property_add_alias(source, prop->name,
+                                      OBJECT(target), prop->name,
+                                      &error_abort);
+        }
+        class = object_class_get_parent(class);
+    } while (class != object_class_by_name(TYPE_DEVICE));
+}
+
 static bool device_get_realized(Object *obj, Error **errp)
 {
    DeviceState *dev = DEVICE(obj);
@@ -848,6 +869,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
        if (dev->hotplugged && local_err == NULL) {
            device_reset(dev);
        }
+        dev->pending_deleted_event = false;
    } else if (!value && dev->realized) {
        QLIST_FOREACH(bus, &dev->child_bus, sibling) {
            object_property_set_bool(OBJECT(bus), false, "realized",
@@ -862,6 +884,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
        if (dc->unrealize && local_err == NULL) {
            dc->unrealize(dev, &local_err);
        }
+        dev->pending_deleted_event = true;
    }

    if (local_err != NULL) {
@@ -934,7 +957,13 @@ static void device_initfn(Object *obj)

 static void device_post_init(Object *obj)
 {
-    qdev_prop_set_globals(DEVICE(obj), &error_abort);
+    Error *err = NULL;
+    qdev_prop_set_globals(DEVICE(obj), &err);
+    if (err) {
+        qerror_report_err(err);
+        error_free(err);
+        exit(EXIT_FAILURE);
+    }
 }

 /* Unlink device from bus and free the structure.  */
@@ -949,7 +978,7 @@ static void device_finalize(Object *obj)

    QLIST_FOREACH_SAFE(ngl, &dev->gpios, node, next) {
        QLIST_REMOVE(ngl, node);
-        qemu_free_irqs(ngl->in);
+        qemu_free_irqs(ngl->in, ngl->num_in);
        g_free(ngl->name);
        g_free(ngl);
        /* ngl->out irqs are owned by the other end and should not be freed
@@ -972,7 +1001,6 @@ static void device_unparent(Object *obj)
 {
    DeviceState *dev = DEVICE(obj);
    BusState *bus;
-    bool have_realized = dev->realized;

    if (dev->realized) {
        object_property_set_bool(obj, false, "realized", NULL);
@@ -988,7 +1016,7 @@ static void device_unparent(Object *obj)
    }

    /* Only send event if the device had been completely realized */
-    if (have_realized) {
+    if (dev->pending_deleted_event) {
        gchar *path = object_get_canonical_path(OBJECT(dev));

        qapi_event_send_device_deleted(!!dev->id, dev->id, path, &error_abort);
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -2059,7 +2059,7 @@ static void cirrus_vga_mem_write(void *opaque,
 	}
    } else {
 #ifdef DEBUG_CIRRUS
-        printf("cirrus: mem_writeb " TARGET_FMT_plx " value %02x\n", addr,
+        printf("cirrus: mem_writeb " TARGET_FMT_plx " value 0x%02" PRIu64 "\n", addr,
               mem_value);
 #endif
    }
@@ -2594,7 +2594,7 @@ static void cirrus_vga_ioport_write(void *opaque, hwaddr addr, uint64_t val,
 	break;
    case 0x3c5:
 #ifdef DEBUG_VGA_REG
-	printf("vga: write SR%x = 0x%02x\n", s->sr_index, val);
+	printf("vga: write SR%x = 0x%02" PRIu64 "\n", s->sr_index, val);
 #endif
 	cirrus_vga_write_sr(c, val);
        break;
@@ -2619,7 +2619,7 @@ static void cirrus_vga_ioport_write(void *opaque, hwaddr addr, uint64_t val,
 	break;
    case 0x3cf:
 #ifdef DEBUG_VGA_REG
-	printf("vga: write GR%x = 0x%02x\n", s->gr_index, val);
+	printf("vga: write GR%x = 0x%02" PRIu64 "\n", s->gr_index, val);
 #endif
 	cirrus_vga_write_gr(c, s->gr_index, val);
 	break;
@@ -2630,7 +2630,7 @@ static void cirrus_vga_ioport_write(void *opaque, hwaddr addr, uint64_t val,
    case 0x3b5:
    case 0x3d5:
 #ifdef DEBUG_VGA_REG
-	printf("vga: write CR%x = 0x%02x\n", s->cr_index, val);
+	printf("vga: write CR%x = 0x%02"PRIu64"\n", s->cr_index, val);
 #endif
 	cirrus_vga_write_cr(c, val);
 	break;
@@ -2911,6 +2911,14 @@ static void isa_cirrus_vga_realizefn(DeviceState *dev, Error **errp)
    ISACirrusVGAState *d = ISA_CIRRUS_VGA(dev);
    VGACommonState *s = &d->cirrus_vga.vga;

+    /* follow real hardware, cirrus card emulated has 4 MB video memory.
+       Also accept 8 MB/16 MB for backward compatibility. */
+    if (s->vram_size_mb != 4 && s->vram_size_mb != 8 &&
+        s->vram_size_mb != 16) {
+        error_setg(errp, "Invalid cirrus_vga ram size '%u'",
+                   s->vram_size_mb);
+        return;
+    }
    vga_common_init(s, OBJECT(dev), true);
    cirrus_init_common(&d->cirrus_vga, OBJECT(dev), CIRRUS_ID_CLGD5430, 0,
                       isa_address_space(isadev),
@@ -2957,6 +2965,14 @@ static int pci_cirrus_vga_initfn(PCIDevice *dev)
     PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
     int16_t device_id = pc->device_id;

+     /* follow real hardware, cirrus card emulated has 4 MB video memory.
+       Also accept 8 MB/16 MB for backward compatibility. */
+     if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 &&
+         s->vga.vram_size_mb != 16) {
+         error_report("Invalid cirrus_vga ram size '%u'",
+                      s->vga.vram_size_mb);
+         return -1;
+     }
     /* setup VGA */
     vga_common_init(&s->vga, OBJECT(dev), true);
     cirrus_init_common(s, OBJECT(dev), device_id, 1, pci_address_space(dev),
--- a/hw/display/cirrus_vga_rop.h
+++ b/hw/display/cirrus_vga_rop.h
@@ -52,8 +52,7 @@ glue(cirrus_bitblt_rop_fwd_, ROP_NAME)(CirrusVGAState *s,
    dstpitch -= bltwidth;
    srcpitch -= bltwidth;

-    if (dstpitch < 0 || srcpitch < 0) {
-        /* is 0 valid? srcpitch == 0 could be useful */
+    if (bltheight > 1 && (dstpitch < 0 || srcpitch < 0)) {
        return;
    }

--- a/hw/display/qxl-render.c
+++ b/hw/display/qxl-render.c
@@ -138,7 +138,9 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl)
        if (qemu_spice_rect_is_empty(qxl->dirty+i)) {
            break;
        }
-        if (qxl->dirty[i].left > qxl->dirty[i].right ||
+        if (qxl->dirty[i].left < 0 ||
+            qxl->dirty[i].top < 0 ||
+            qxl->dirty[i].left > qxl->dirty[i].right ||
            qxl->dirty[i].top > qxl->dirty[i].bottom ||
            qxl->dirty[i].right > qxl->guest_primary.surface.width ||
            qxl->dirty[i].bottom > qxl->guest_primary.surface.height) {
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2063,6 +2063,7 @@ static int qxl_init_primary(PCIDevice *dev)

    qxl->id = 0;
    qxl_init_ramsize(qxl);
+    vga->vbe_size = qxl->vgamem_size;
    vga->vram_size_mb = qxl->vga.vram_size >> 20;
    vga_common_init(vga, OBJECT(dev), true);
    vga_init(vga, OBJECT(dev),
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -580,6 +580,93 @@ void vga_ioport_write(void *opaque, uint32_t addr, uint32_t val)
    }
 }

+/*
+ * Sanity check vbe register writes.
+ *
+ * As we don't have a way to signal errors to the guest in the bochs
+ * dispi interface we'll go adjust the registers to the closest valid
+ * value.
+ */
+static void vbe_fixup_regs(VGACommonState *s)
+{
+    uint16_t *r = s->vbe_regs;
+    uint32_t bits, linelength, maxy, offset;
+
+    if (!(r[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED)) {
+        /* vbe is turned off -- nothing to do */
+        return;
+    }
+
+    /* check depth */
+    switch (r[VBE_DISPI_INDEX_BPP]) {
+    case 4:
+    case 8:
+    case 16:
+    case 24:
+    case 32:
+        bits = r[VBE_DISPI_INDEX_BPP];
+        break;
+    case 15:
+        bits = 16;
+        break;
+    default:
+        bits = r[VBE_DISPI_INDEX_BPP] = 8;
+        break;
+    }
+
+    /* check width */
+    r[VBE_DISPI_INDEX_XRES] &= ~7u;
+    if (r[VBE_DISPI_INDEX_XRES] == 0) {
+        r[VBE_DISPI_INDEX_XRES] = 8;
+    }
+    if (r[VBE_DISPI_INDEX_XRES] > VBE_DISPI_MAX_XRES) {
+        r[VBE_DISPI_INDEX_XRES] = VBE_DISPI_MAX_XRES;
+    }
+    r[VBE_DISPI_INDEX_VIRT_WIDTH] &= ~7u;
+    if (r[VBE_DISPI_INDEX_VIRT_WIDTH] > VBE_DISPI_MAX_XRES) {
+        r[VBE_DISPI_INDEX_VIRT_WIDTH] = VBE_DISPI_MAX_XRES;
+    }
+    if (r[VBE_DISPI_INDEX_VIRT_WIDTH] < r[VBE_DISPI_INDEX_XRES]) {
+        r[VBE_DISPI_INDEX_VIRT_WIDTH] = r[VBE_DISPI_INDEX_XRES];
+    }
+
+    /* check height */
+    linelength = r[VBE_DISPI_INDEX_VIRT_WIDTH] * bits / 8;
+    maxy = s->vbe_size / linelength;
+    if (r[VBE_DISPI_INDEX_YRES] == 0) {
+        r[VBE_DISPI_INDEX_YRES] = 1;
+    }
+    if (r[VBE_DISPI_INDEX_YRES] > VBE_DISPI_MAX_YRES) {
+        r[VBE_DISPI_INDEX_YRES] = VBE_DISPI_MAX_YRES;
+    }
+    if (r[VBE_DISPI_INDEX_YRES] > maxy) {
+        r[VBE_DISPI_INDEX_YRES] = maxy;
+    }
+
+    /* check offset */
+    if (r[VBE_DISPI_INDEX_X_OFFSET] > VBE_DISPI_MAX_XRES) {
+        r[VBE_DISPI_INDEX_X_OFFSET] = VBE_DISPI_MAX_XRES;
+    }
+    if (r[VBE_DISPI_INDEX_Y_OFFSET] > VBE_DISPI_MAX_YRES) {
+        r[VBE_DISPI_INDEX_Y_OFFSET] = VBE_DISPI_MAX_YRES;
+    }
+    offset = r[VBE_DISPI_INDEX_X_OFFSET] * bits / 8;
+    offset += r[VBE_DISPI_INDEX_Y_OFFSET] * linelength;
+    if (offset + r[VBE_DISPI_INDEX_YRES] * linelength > s->vbe_size) {
+        r[VBE_DISPI_INDEX_Y_OFFSET] = 0;
+        offset = r[VBE_DISPI_INDEX_X_OFFSET] * bits / 8;
+        if (offset + r[VBE_DISPI_INDEX_YRES] * linelength > s->vbe_size) {
+            r[VBE_DISPI_INDEX_X_OFFSET] = 0;
+            offset = 0;
+        }
+    }
+
+    /* update vga state */
+    r[VBE_DISPI_INDEX_VIRT_HEIGHT] = maxy;
+    s->vbe_line_offset = linelength;
+    s->vbe_start_addr  = offset / 4;
+}
+
 static uint32_t vbe_ioport_read_index(void *opaque, uint32_t addr)
 {
    VGACommonState *s = opaque;
@@ -614,7 +701,7 @@ uint32_t vbe_ioport_read_data(void *opaque, uint32_t addr)
            val = s->vbe_regs[s->vbe_index];
        }
    } else if (s->vbe_index == VBE_DISPI_INDEX_VIDEO_MEMORY_64K) {
-        val = s->vram_size / (64 * 1024);
+        val = s->vbe_size / (64 * 1024);
    } else {
        val = 0;
    }
@@ -649,22 +736,13 @@ void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
            }
            break;
        case VBE_DISPI_INDEX_XRES:
-            if ((val <= VBE_DISPI_MAX_XRES) && ((val & 7) == 0)) {
-                s->vbe_regs[s->vbe_index] = val;
-            }
-            break;
        case VBE_DISPI_INDEX_YRES:
-            if (val <= VBE_DISPI_MAX_YRES) {
-                s->vbe_regs[s->vbe_index] = val;
-            }
-            break;
        case VBE_DISPI_INDEX_BPP:
-            if (val == 0)
-                val = 8;
-            if (val == 4 || val == 8 || val == 15 ||
-                val == 16 || val == 24 || val == 32) {
-                s->vbe_regs[s->vbe_index] = val;
-            }
+        case VBE_DISPI_INDEX_VIRT_WIDTH:
+        case VBE_DISPI_INDEX_X_OFFSET:
+        case VBE_DISPI_INDEX_Y_OFFSET:
+            s->vbe_regs[s->vbe_index] = val;
+            vbe_fixup_regs(s);
            break;
        case VBE_DISPI_INDEX_BANK:
            if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4) {
@@ -681,19 +759,11 @@ void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
                !(s->vbe_regs[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED)) {
                int h, shift_control;

-                s->vbe_regs[VBE_DISPI_INDEX_VIRT_WIDTH] =
-                    s->vbe_regs[VBE_DISPI_INDEX_XRES];
-                s->vbe_regs[VBE_DISPI_INDEX_VIRT_HEIGHT] =
-                    s->vbe_regs[VBE_DISPI_INDEX_YRES];
+                s->vbe_regs[VBE_DISPI_INDEX_VIRT_WIDTH] = 0;
                s->vbe_regs[VBE_DISPI_INDEX_X_OFFSET] = 0;
                s->vbe_regs[VBE_DISPI_INDEX_Y_OFFSET] = 0;
-
-                if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4)
-                    s->vbe_line_offset = s->vbe_regs[VBE_DISPI_INDEX_XRES] >> 1;
-                else
-                    s->vbe_line_offset = s->vbe_regs[VBE_DISPI_INDEX_XRES] *
-                        ((s->vbe_regs[VBE_DISPI_INDEX_BPP] + 7) >> 3);
-                s->vbe_start_addr = 0;
+                s->vbe_regs[VBE_DISPI_INDEX_ENABLE] |= VBE_DISPI_ENABLED;
+                vbe_fixup_regs(s);

                /* clear the screen (should be done in BIOS) */
                if (!(val & VBE_DISPI_NOCLEARMEM)) {
@@ -742,40 +812,6 @@ void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
            s->vbe_regs[s->vbe_index] = val;
            vga_update_memory_access(s);
            break;
-        case VBE_DISPI_INDEX_VIRT_WIDTH:
-            {
-                int w, h, line_offset;
-
-                if (val < s->vbe_regs[VBE_DISPI_INDEX_XRES])
-                    return;
-                w = val;
-                if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4)
-                    line_offset = w >> 1;
-                else
-                    line_offset = w * ((s->vbe_regs[VBE_DISPI_INDEX_BPP] + 7) >> 3);
-                h = s->vram_size / line_offset;
-                /* XXX: support weird bochs semantics ? */
-                if (h < s->vbe_regs[VBE_DISPI_INDEX_YRES])
-                    return;
-                s->vbe_regs[VBE_DISPI_INDEX_VIRT_WIDTH] = w;
-                s->vbe_regs[VBE_DISPI_INDEX_VIRT_HEIGHT] = h;
-                s->vbe_line_offset = line_offset;
-            }
-            break;
-        case VBE_DISPI_INDEX_X_OFFSET:
-        case VBE_DISPI_INDEX_Y_OFFSET:
-            {
-                int x;
-                s->vbe_regs[s->vbe_index] = val;
-                s->vbe_start_addr = s->vbe_line_offset * s->vbe_regs[VBE_DISPI_INDEX_Y_OFFSET];
-                x = s->vbe_regs[VBE_DISPI_INDEX_X_OFFSET];
-                if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4)
-                    s->vbe_start_addr += x >> 1;
-                else
-                    s->vbe_start_addr += x * ((s->vbe_regs[VBE_DISPI_INDEX_BPP] + 7) >> 3);
-                s->vbe_start_addr >>= 2;
-            }
-            break;
        default:
            break;
        }
@@ -2289,6 +2325,9 @@ void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate)
        s->vram_size <<= 1;
    }
    s->vram_size_mb = s->vram_size >> 20;
+    if (!s->vbe_size) {
+        s->vbe_size = s->vram_size;
+    }

    s->is_vbe_vmstate = 1;
    memory_region_init_ram(&s->vram, obj, "vga.vram", s->vram_size);
--- a/hw/display/vga_int.h
+++ b/hw/display/vga_int.h
@@ -93,6 +93,7 @@ typedef struct VGACommonState {
    MemoryRegion vram_vbe;
    uint32_t vram_size;
    uint32_t vram_size_mb; /* property */
+    uint32_t vbe_size;
    uint32_t latch;
    MemoryRegion *chain4_alias;
    uint8_t sr_index;
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -93,10 +93,12 @@ struct XenFB {

 static int common_bind(struct common *c)
 {
-    int mfn;
+    uint64_t mfn;

-    if (xenstore_read_fe_int(&c->xendev, "page-ref", &mfn) == -1)
+    if (xenstore_read_fe_uint64(&c->xendev, "page-ref", &mfn) == -1)
 	return -1;
+    assert(mfn == (xen_pfn_t)mfn);
+
    if (xenstore_read_fe_int(&c->xendev, "event-channel", &c->xendev.remote_port) == -1)
 	return -1;

@@ -107,7 +109,7 @@ static int common_bind(struct common *c)
 	return -1;

    xen_be_bind_evtchn(&c->xendev);
-    xen_be_printf(&c->xendev, 1, "ring mfn %d, remote-port %d, local-port %d\n",
+    xen_be_printf(&c->xendev, 1, "ring mfn %"PRIx64", remote-port %d, local-port %d\n",
 		  mfn, c->xendev.remote_port, c->xendev.local_port);

    return 0;
@@ -409,7 +411,7 @@ static void input_event(struct XenDevice *xendev)

 /* -------------------------------------------------------------------- */

-static void xenfb_copy_mfns(int mode, int count, unsigned long *dst, void *src)
+static void xenfb_copy_mfns(int mode, int count, xen_pfn_t *dst, void *src)
 {
    uint32_t *src32 = src;
    uint64_t *src64 = src;
@@ -424,8 +426,8 @@ static int xenfb_map_fb(struct XenFB *xenfb)
    struct xenfb_page *page = xenfb->c.page;
    char *protocol = xenfb->c.xendev.protocol;
    int n_fbdirs;
-    unsigned long *pgmfns = NULL;
-    unsigned long *fbmfns = NULL;
+    xen_pfn_t *pgmfns = NULL;
+    xen_pfn_t *fbmfns = NULL;
    void *map, *pd;
    int mode, ret = -1;

@@ -483,8 +485,8 @@ static int xenfb_map_fb(struct XenFB *xenfb)
    n_fbdirs = xenfb->fbpages * mode / 8;
    n_fbdirs = (n_fbdirs + (XC_PAGE_SIZE - 1)) / XC_PAGE_SIZE;

-    pgmfns = g_malloc0(sizeof(unsigned long) * n_fbdirs);
-    fbmfns = g_malloc0(sizeof(unsigned long) * xenfb->fbpages);
+    pgmfns = g_malloc0(sizeof(xen_pfn_t) * n_fbdirs);
+    fbmfns = g_malloc0(sizeof(xen_pfn_t) * xenfb->fbpages);

    xenfb_copy_mfns(mode, n_fbdirs, pgmfns, pd);
    map = xc_map_foreign_pages(xen_xc, xenfb->c.xendev.dom,
--- a/hw/dma/omap_dma.c
+++ b/hw/dma/omap_dma.c
@@ -1660,7 +1660,7 @@ struct soc_dma_s *omap_dma_init(hwaddr base, qemu_irq *irqs,
    }

    omap_dma_setcaps(s);
-    omap_clk_adduser(s->clk, qemu_allocate_irqs(omap_dma_clk_update, s, 1)[0]);
+    omap_clk_adduser(s->clk, qemu_allocate_irq(omap_dma_clk_update, s, 0));
    omap_dma_reset(s->dma);
    omap_dma_clk_update(s, 0, 1);

@@ -2082,7 +2082,7 @@ struct soc_dma_s *omap_dma4_init(hwaddr base, qemu_irq *irqs,
    s->intr_update = omap_dma_interrupts_4_update;

    omap_dma_setcaps(s);
-    omap_clk_adduser(s->clk, qemu_allocate_irqs(omap_dma_clk_update, s, 1)[0]);
+    omap_clk_adduser(s->clk, qemu_allocate_irq(omap_dma_clk_update, s, 0));
    omap_dma_reset(s->dma);
    omap_dma_clk_update(s, 0, !!s->dma->freq);

--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -25,7 +25,9 @@
 #include <glib.h>
 #include "qemu-common.h"
 #include "qemu/bitmap.h"
+#include "qemu/osdep.h"
 #include "qemu/range.h"
+#include "qemu/error-report.h"
 #include "hw/pci/pci.h"
 #include "qom/cpu.h"
 #include "hw/i386/pc.h"
@@ -52,6 +54,16 @@
 #include "qapi/qmp/qint.h"
 #include "qom/qom-qobject.h"

+/* These are used to size the ACPI tables for -M pc-i440fx-1.7 and
+ * -M pc-i440fx-2.0.  Even if the actual amount of AML generated grows
+ * a little bit, there should be plenty of free space since the DSDT
+ * shrunk by ~1.5k between QEMU 2.0 and QEMU 2.1.
+ */
+#define ACPI_BUILD_LEGACY_CPU_AML_SIZE    97
+#define ACPI_BUILD_ALIGN_SIZE             0x1000
+
+#define ACPI_BUILD_TABLE_SIZE             0x20000
+
 typedef struct AcpiCpuInfo {
    DECLARE_BITMAP(found_cpus, ACPI_CPU_HOTPLUG_ID_LIMIT);
 } AcpiCpuInfo;
@@ -64,6 +76,7 @@ typedef struct AcpiMcfgInfo {
 typedef struct AcpiPmInfo {
    bool s3_disabled;
    bool s4_disabled;
+    bool pcihp_bridge_en;
    uint8_t s4_val;
    uint16_t sci_int;
    uint8_t acpi_enable_cmd;
@@ -85,6 +98,7 @@ typedef struct AcpiBuildPciBusHotplugState {
    GArray *device_table;
    GArray *notify_table;
    struct AcpiBuildPciBusHotplugState *parent;
+    bool pcihp_bridge_en;
 } AcpiBuildPciBusHotplugState;

 static void acpi_get_dsdt(AcpiMiscInfo *info)
@@ -188,6 +202,9 @@ static void acpi_get_pm_info(AcpiPmInfo *pm)
                                           NULL);
    pm->gpe0_blk_len = object_property_get_int(obj, ACPI_PM_PROP_GPE0_BLK_LEN,
                                               NULL);
+    pm->pcihp_bridge_en =
+        object_property_get_bool(obj, "acpi-pci-hotplug-with-bridge-support",
+                                 NULL);
 }

 static void acpi_get_misc_info(AcpiMiscInfo *info)
@@ -529,6 +546,12 @@ static void fadt_setup(AcpiFadtDescriptorRev1 *fadt, AcpiPmInfo *pm)
                              (1 << ACPI_FADT_F_SLP_BUTTON) |
                              (1 << ACPI_FADT_F_RTC_S4));
    fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_USE_PLATFORM_CLOCK);
+    /* APIC destination mode ("Flat Logical") has an upper limit of 8 CPUs
+     * For more than 8 CPUs, "Clustered Logical" mode has to be used
+     */
+    if (max_cpus > 8) {
+        fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL);
+    }
 }


@@ -768,11 +791,13 @@ static void acpi_set_pci_info(void)
 }

 static void build_pci_bus_state_init(AcpiBuildPciBusHotplugState *state,
-                                     AcpiBuildPciBusHotplugState *parent)
+                                     AcpiBuildPciBusHotplugState *parent,
+                                     bool pcihp_bridge_en)
 {
    state->parent = parent;
    state->device_table = build_alloc_array();
    state->notify_table = build_alloc_array();
+    state->pcihp_bridge_en = pcihp_bridge_en;
 }

 static void build_pci_bus_state_cleanup(AcpiBuildPciBusHotplugState *state)
@@ -786,7 +811,7 @@ static void *build_pci_bus_begin(PCIBus *bus, void *parent_state)
    AcpiBuildPciBusHotplugState *parent = parent_state;
    AcpiBuildPciBusHotplugState *child = g_malloc(sizeof *child);

-    build_pci_bus_state_init(child, parent);
+    build_pci_bus_state_init(child, parent, parent->pcihp_bridge_en);

    return child;
 }
@@ -807,6 +832,14 @@ static void build_pci_bus_end(PCIBus *bus, void *bus_state)
    GArray *method;
    bool bus_hotplug_support = false;

+    /*
+     * Skip bridge subtree creation if bridge hotplug is disabled
+     * to make acpi tables compatible with legacy machine types.
+     */
+    if (!child->pcihp_bridge_en && bus->parent_dev) {
+        return;
+    }
+
    if (bus->parent_dev) {
        op = 0x82; /* DeviceOp */
        build_append_nameseg(bus_table, "S%.02X_",
@@ -844,6 +877,7 @@ static void build_pci_bus_end(PCIBus *bus, void *bus_state)
        PCIDeviceClass *pc;
        PCIDevice *pdev = bus->devices[i];
        int slot = PCI_SLOT(i);
+        bool bridge_in_acpi;

        if (!pdev) {
            continue;
@@ -853,7 +887,13 @@ static void build_pci_bus_end(PCIBus *bus, void *bus_state)
        pc = PCI_DEVICE_GET_CLASS(pdev);
        dc = DEVICE_GET_CLASS(pdev);

-        if (pc->class_id == PCI_CLASS_BRIDGE_ISA || pc->is_bridge) {
+        /* When hotplug for bridges is enabled, bridges are
+         * described in ACPI separately (see build_pci_bus_end).
+         * In this case they aren't themselves hot-pluggable.
+         */
+        bridge_in_acpi = pc->is_bridge && child->pcihp_bridge_en;
+
+        if (pc->class_id == PCI_CLASS_BRIDGE_ISA || bridge_in_acpi) {
            set_bit(slot, slot_device_system);
        }

@@ -865,7 +905,7 @@ static void build_pci_bus_end(PCIBus *bus, void *bus_state)
            }
        }

-        if (!dc->hotpluggable || pc->is_bridge) {
+        if (!dc->hotpluggable || bridge_in_acpi) {
            clear_bit(slot, slot_hotplug_enable);
        }
    }
@@ -1130,7 +1170,7 @@ build_ssdt(GArray *table_data, GArray *linker,
                bus = PCI_HOST_BRIDGE(pci_host)->bus;
            }

-            build_pci_bus_state_init(&hotplug_state, NULL);
+            build_pci_bus_state_init(&hotplug_state, NULL, pm->pcihp_bridge_en);

            if (bus) {
                /* Scan all PCI buses. Generate tables to support hotplug. */
@@ -1359,7 +1399,7 @@ build_rsdp(GArray *rsdp_table, GArray *linker, unsigned rsdt)
 {
    AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp);

-    bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, 1,
+    bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, 16,
                             true /* fseg memory */);

    memcpy(&rsdp->signature, "RSD PTR ", 8);
@@ -1440,13 +1480,14 @@ static
 void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
 {
    GArray *table_offsets;
-    unsigned facs, dsdt, rsdt;
+    unsigned facs, ssdt, dsdt, rsdt;
    AcpiCpuInfo cpu;
    AcpiPmInfo pm;
    AcpiMiscInfo misc;
    AcpiMcfgInfo mcfg;
    PcPciInfo pci;
    uint8_t *u;
+    size_t aml_len = 0;

    acpi_get_cpu_info(&cpu);
    acpi_get_pm_info(&pm);
@@ -1474,13 +1515,20 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
    dsdt = tables->table_data->len;
    build_dsdt(tables->table_data, tables->linker, &misc);

+    /* Count the size of the DSDT and SSDT, we will need it for legacy
+     * sizing of ACPI tables.
+     */
+    aml_len += tables->table_data->len - dsdt;
+
    /* ACPI tables pointed to by RSDT */
    acpi_add_table(table_offsets, tables->table_data);
    build_fadt(tables->table_data, tables->linker, &pm, facs, dsdt);

+    ssdt = tables->table_data->len;
    acpi_add_table(table_offsets, tables->table_data);
    build_ssdt(tables->table_data, tables->linker, &cpu, &pm, &misc, &pci,
               guest_info);
+    aml_len += tables->table_data->len - ssdt;

    acpi_add_table(table_offsets, tables->table_data);
    build_madt(tables->table_data, tables->linker, &cpu, guest_info);
@@ -1513,14 +1561,53 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
    /* RSDP is in FSEG memory, so allocate it separately */
    build_rsdp(tables->rsdp, tables->linker, rsdt);

-    /* We'll expose it all to Guest so align size to reduce
+    /* We'll expose it all to Guest so we want to reduce
     * chance of size changes.
     * RSDP is small so it's easy to keep it immutable, no need to
     * bother with alignment.
+     *
+     * We used to align the tables to 4k, but of course this would
+     * too simple to be enough.  4k turned out to be too small an
+     * alignment very soon, and in fact it is almost impossible to
+     * keep the table size stable for all (max_cpus, max_memory_slots)
+     * combinations.  So the table size is always 64k for pc-i440fx-2.1
+     * and we give an error if the table grows beyond that limit.
+     *
+     * We still have the problem of migrating from "-M pc-i440fx-2.0".  For
+     * that, we exploit the fact that QEMU 2.1 generates _smaller_ tables
+     * than 2.0 and we can always pad the smaller tables with zeros.  We can
+     * then use the exact size of the 2.0 tables.
+     *
+     * All this is for PIIX4, since QEMU 2.0 didn't support Q35 migration.
     */
-    acpi_align_size(tables->table_data, 0x1000);
+    if (guest_info->legacy_acpi_table_size) {
+        /* Subtracting aml_len gives the size of fixed tables.  Then add the
+         * size of the PIIX4 DSDT/SSDT in QEMU 2.0.
+         */
+        int legacy_aml_len =
+            guest_info->legacy_acpi_table_size +
+            ACPI_BUILD_LEGACY_CPU_AML_SIZE * max_cpus;
+        int legacy_table_size =
+            ROUND_UP(tables->table_data->len - aml_len + legacy_aml_len,
+                     ACPI_BUILD_ALIGN_SIZE);
+        if (tables->table_data->len > legacy_table_size) {
+            /* Should happen only with PCI bridges and -M pc-i440fx-2.0.  */
+            error_report("Warning: migration may not work.");
+        }
+        g_array_set_size(tables->table_data, legacy_table_size);
+    } else {
+        /* Make sure we have a buffer in case we need to resize the tables. */
+        if (tables->table_data->len > ACPI_BUILD_TABLE_SIZE / 2) {
+            /* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots.  */
+            error_report("Warning: ACPI tables are larger than 64k.");
+            error_report("Warning: migration may not work.");
+            error_report("Warning: please remove CPUs, NUMA nodes, "
+                         "memory slots or PCI bridges.");
+        }
+        acpi_align_size(tables->table_data, ACPI_BUILD_TABLE_SIZE);
+    }

-    acpi_align_size(tables->linker, 0x1000);
+    acpi_align_size(tables->linker, ACPI_BUILD_ALIGN_SIZE);

    /* Cleanup memory that's no longer used. */
    g_array_free(table_offsets, true);
--- a/hw/i386/acpi-dsdt.dsl
+++ b/hw/i386/acpi-dsdt.dsl
@@ -181,57 +181,45 @@ DefinitionBlock (

    Scope(\_SB) {
        Scope(PCI0) {
-            Name(_PRT, Package() {
-                /* PCI IRQ routing table, example from ACPI 2.0a specification,
-                   section 6.2.8.1 */
-                /* Note: we provide the same info as the PCI routing
-                   table of the Bochs BIOS */
+            Method (_PRT, 0) {
+                Store(Package(128) {}, Local0)
+                Store(Zero, Local1)
+                While(LLess(Local1, 128)) {
+                    // slot = pin >> 2
+                    Store(ShiftRight(Local1, 2), Local2)

-#define prt_slot(nr, lnk0, lnk1, lnk2, lnk3) \
-    Package() { nr##ffff, 0, lnk0, 0 }, \
-    Package() { nr##ffff, 1, lnk1, 0 }, \
-    Package() { nr##ffff, 2, lnk2, 0 }, \
-    Package() { nr##ffff, 3, lnk3, 0 }
+                    // lnk = (slot + pin) & 3
+                    Store(And(Add(Local1, Local2), 3), Local3)
+                    If (LEqual(Local3, 0)) {
+                        Store(Package(4) { Zero, Zero, LNKD, Zero }, Local4)
+                    }
+                    If (LEqual(Local3, 1)) {
+                        // device 1 is the power-management device, needs SCI
+                        If (LEqual(Local1, 4)) {
+                            Store(Package(4) { Zero, Zero, LNKS, Zero }, Local4)
+                        } Else {
+                            Store(Package(4) { Zero, Zero, LNKA, Zero }, Local4)
+                        }
+                    }
+                    If (LEqual(Local3, 2)) {
+                        Store(Package(4) { Zero, Zero, LNKB, Zero }, Local4)
+                    }
+                    If (LEqual(Local3, 3)) {
+                        Store(Package(4) { Zero, Zero, LNKC, Zero }, Local4)
+                    }

-#define prt_slot0(nr) prt_slot(nr, LNKD, LNKA, LNKB, LNKC)
-#define prt_slot1(nr) prt_slot(nr, LNKA, LNKB, LNKC, LNKD)
-#define prt_slot2(nr) prt_slot(nr, LNKB, LNKC, LNKD, LNKA)
-#define prt_slot3(nr) prt_slot(nr, LNKC, LNKD, LNKA, LNKB)
+                    // Complete the interrupt routing entry:
+                    //    Package(4) { 0x[slot]FFFF, [pin], [link], 0) }

-                prt_slot0(0x0000),
-                /* Device 1 is power mgmt device, and can only use irq 9 */
-                prt_slot(0x0001, LNKS, LNKB, LNKC, LNKD),
-                prt_slot2(0x0002),
-                prt_slot3(0x0003),
-                prt_slot0(0x0004),
-                prt_slot1(0x0005),
-                prt_slot2(0x0006),
-                prt_slot3(0x0007),
-                prt_slot0(0x0008),
-                prt_slot1(0x0009),
-                prt_slot2(0x000a),
-                prt_slot3(0x000b),
-                prt_slot0(0x000c),
-                prt_slot1(0x000d),
-                prt_slot2(0x000e),
-                prt_slot3(0x000f),
-                prt_slot0(0x0010),
-                prt_slot1(0x0011),
-                prt_slot2(0x0012),
-                prt_slot3(0x0013),
-                prt_slot0(0x0014),
-                prt_slot1(0x0015),
-                prt_slot2(0x0016),
-                prt_slot3(0x0017),
-                prt_slot0(0x0018),
-                prt_slot1(0x0019),
-                prt_slot2(0x001a),
-                prt_slot3(0x001b),
-                prt_slot0(0x001c),
-                prt_slot1(0x001d),
-                prt_slot2(0x001e),
-                prt_slot3(0x001f),
-            })
+                    Store(Or(ShiftLeft(Local2, 16), 0xFFFF), Index(Local4, 0))
+                    Store(And(Local1, 3),                    Index(Local4, 1))
+                    Store(Local4,                            Index(Local0, Local1))
+
+                    Increment(Local1)
+                }
+
+                Return(Local0)
+            }
        }

        Field(PCI0.ISA.P40C, ByteAcc, NoLock, Preserve) {
@@ -314,7 +302,7 @@ DefinitionBlock (
 /****************************************************************
 * General purpose events
 ****************************************************************/
-    External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)
+    External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)

    Scope(\_GPE) {
        Name(_HID, "ACPI0006")
@@ -333,7 +321,7 @@ DefinitionBlock (
        }
        Method(_E03) {
            // Memory hotplug event
-            \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
+            \_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
        }
        Method(_L04) {
        }
--- a/hw/i386/acpi-dsdt.hex.generated
+++ b/hw/i386/acpi-dsdt.hex.generated
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -14,10 +14,8 @@
 */

 #include "qemu-common.h"
-#include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
-#include "sysemu/cpus.h"
 #include "hw/sysbus.h"
 #include "hw/kvm/clock.h"

@@ -36,48 +34,6 @@ typedef struct KVMClockState {
    bool clock_valid;
 } KVMClockState;

-struct pvclock_vcpu_time_info {
-    uint32_t   version;
-    uint32_t   pad0;
-    uint64_t   tsc_timestamp;
-    uint64_t   system_time;
-    uint32_t   tsc_to_system_mul;
-    int8_t     tsc_shift;
-    uint8_t    flags;
-    uint8_t    pad[2];
-} __attribute__((__packed__)); /* 32 bytes */
-
-static uint64_t kvmclock_current_nsec(KVMClockState *s)
-{
-    CPUState *cpu = first_cpu;
-    CPUX86State *env = cpu->env_ptr;
-    hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL;
-    uint64_t migration_tsc = env->tsc;
-    struct pvclock_vcpu_time_info time;
-    uint64_t delta;
-    uint64_t nsec_lo;
-    uint64_t nsec_hi;
-    uint64_t nsec;
-
-    if (!(env->system_time_msr & 1ULL)) {
-        /* KVM clock not active */
-        return 0;
-    }
-
-    cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));
-
-    assert(time.tsc_timestamp <= migration_tsc);
-    delta = migration_tsc - time.tsc_timestamp;
-    if (time.tsc_shift < 0) {
-        delta >>= -time.tsc_shift;
-    } else {
-        delta <<= time.tsc_shift;
-    }
-
-    mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul);
-    nsec = (nsec_lo >> 32) | (nsec_hi << 32);
-    return nsec + time.system_time;
-}

 static void kvmclock_vm_state_change(void *opaque, int running,
                                     RunState state)
@@ -89,15 +45,9 @@ static void kvmclock_vm_state_change(void *opaque, int running,

    if (running) {
        struct kvm_clock_data data;
-        uint64_t time_at_migration = kvmclock_current_nsec(s);

        s->clock_valid = false;

-	/* We can't rely on the migrated clock value, just discard it */
-	if (time_at_migration) {
-	        s->clock = time_at_migration;
-	}
-
        data.clock = s->clock;
        data.flags = 0;
        ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
@@ -125,8 +75,6 @@ static void kvmclock_vm_state_change(void *opaque, int running,
        if (s->clock_valid) {
            return;
        }
-
-        cpu_synchronize_all_states();
        ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
        if (ret < 0) {
            fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -73,7 +73,12 @@
 #endif

 /* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables.  */
-#define ACPI_DATA_SIZE       0x10000
+unsigned acpi_data_size = 0x20000;
+void pc_set_legacy_acpi_data_size(void)
+{
+    acpi_data_size = 0x10000;
+}
+
 #define BIOS_CFG_IOPORT 0x510
 #define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
 #define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
@@ -811,8 +816,9 @@ static void load_linux(FWCfgState *fw_cfg,
        initrd_max = 0x37ffffff;
    }

-    if (initrd_max >= max_ram_size-ACPI_DATA_SIZE)
-    	initrd_max = max_ram_size-ACPI_DATA_SIZE-1;
+    if (initrd_max >= max_ram_size - acpi_data_size) {
+        initrd_max = max_ram_size - acpi_data_size - 1;
+    }

    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1);
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -61,6 +61,7 @@ static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };

 static bool has_pci_info;
 static bool has_acpi_build = true;
+static int legacy_acpi_table_size;
 static bool smbios_defaults = true;
 static bool smbios_legacy_mode;
 /* Make sure that guest addresses aligned at 1Gbyte boundaries get mapped to
@@ -114,7 +115,7 @@ static void pc_init1(MachineState *machine,
        lowmem = 0xe0000000;
    }

-    /* Handle the machine opt max-ram-below-4g.  It is basicly doing
+    /* Handle the machine opt max-ram-below-4g.  It is basically doing
     * min(qemu limit, user limit).
     */
    if (lowmem > pc_machine->max_ram_below_4g) {
@@ -163,6 +164,7 @@ static void pc_init1(MachineState *machine,
    guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size);

    guest_info->has_acpi_build = has_acpi_build;
+    guest_info->legacy_acpi_table_size = legacy_acpi_table_size;

    guest_info->has_pci_info = has_pci_info;
    guest_info->isapc_ram_fw = !pci_enabled;
@@ -297,8 +299,26 @@ static void pc_init_pci(MachineState *machine)

 static void pc_compat_2_0(MachineState *machine)
 {
+    /* This value depends on the actual DSDT and SSDT compiled into
+     * the source QEMU; unfortunately it depends on the binary and
+     * not on the machine type, so we cannot make pc-i440fx-1.7 work on
+     * both QEMU 1.7 and QEMU 2.0.
+     *
+     * Large variations cause migration to fail for more than one
+     * consecutive value of the "-smp" maxcpus option.
+     *
+     * For small variations of the kind caused by different iasl versions,
+     * the 4k rounding usually leaves slack.  However, there could be still
+     * one or two values that break.  For QEMU 1.7 and QEMU 2.0 the
+     * slack is only ~10 bytes before one "-smp maxcpus" value breaks!
+     *
+     * 6652 is valid for QEMU 2.0, the right value for pc-i440fx-1.7 on
+     * QEMU 1.7 it is 6414.  For RHEL/CentOS 7.0 it is 6418.
+     */
+    legacy_acpi_table_size = 6652;
    smbios_legacy_mode = true;
    has_reserved_memory = false;
+    pc_set_legacy_acpi_data_size();
 }

 static void pc_compat_1_7(MachineState *machine)
@@ -307,6 +327,7 @@ static void pc_compat_1_7(MachineState *machine)
    smbios_defaults = false;
    gigabyte_align = false;
    option_rom_has_mr = true;
+    legacy_acpi_table_size = 6414;
    x86_cpu_compat_disable_kvm_features(FEAT_1_ECX, CPUID_EXT_X2APIC);
 }

@@ -386,14 +407,10 @@ static void pc_init_pci_1_2(MachineState *machine)
    pc_init_pci(machine);
 }

-/* PC init function for pc-0.10 to pc-0.13, and reused by xenfv */
+/* PC init function for pc-0.10 to pc-0.13 */
 static void pc_init_pci_no_kvmclock(MachineState *machine)
 {
-    has_pci_info = false;
-    has_acpi_build = false;
-    smbios_defaults = false;
-    x86_cpu_compat_disable_kvm_features(FEAT_KVM, KVM_FEATURE_PV_EOI);
-    enable_compat_apic_id_mode();
+    pc_compat_1_2(machine);
    pc_init1(machine, 1, 0);
 }

@@ -402,6 +419,11 @@ static void pc_init_isa(MachineState *machine)
    has_pci_info = false;
    has_acpi_build = false;
    smbios_defaults = false;
+    gigabyte_align = false;
+    smbios_legacy_mode = true;
+    has_reserved_memory = false;
+    option_rom_has_mr = true;
+    rom_file_has_mr = false;
    if (!machine->cpu_model) {
        machine->cpu_model = "486";
    }
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -103,7 +103,7 @@ static void pc_q35_init(MachineState *machine)
        lowmem = 0xb0000000;
    }

-    /* Handle the machine opt max-ram-below-4g.  It is basicly doing
+    /* Handle the machine opt max-ram-below-4g.  It is basically doing
     * min(qemu limit, user limit).
     */
    if (lowmem > pc_machine->max_ram_below_4g) {
@@ -155,6 +155,11 @@ static void pc_q35_init(MachineState *machine)
    guest_info->has_acpi_build = has_acpi_build;
    guest_info->has_reserved_memory = has_reserved_memory;

+    /* Migration was not supported in 2.0 for Q35, so do not bother
+     * with this hack (see hw/i386/acpi-build.c).
+     */
+    guest_info->legacy_acpi_table_size = 0;
+
    if (smbios_defaults) {
        MachineClass *mc = MACHINE_GET_CLASS(machine);
        /* These values are guest ABI, do not change */
@@ -277,6 +282,7 @@ static void pc_compat_2_0(MachineState *machine)
 {
    smbios_legacy_mode = true;
    has_reserved_memory = false;
+    pc_set_legacy_acpi_data_size();
 }

 static void pc_compat_1_7(MachineState *machine)
@@ -361,7 +367,7 @@ static QEMUMachine pc_q35_machine_v2_0 = {
    .name = "pc-q35-2.0",
    .init = pc_q35_init_2_0,
    .compat_props = (GlobalProperty[]) {
-        PC_Q35_COMPAT_2_0,
+        PC_COMPAT_2_0,
        { /* end of list */ }
    },
 };
@@ -373,7 +379,7 @@ static QEMUMachine pc_q35_machine_v1_7 = {
    .name = "pc-q35-1.7",
    .init = pc_q35_init_1_7,
    .compat_props = (GlobalProperty[]) {
-        PC_Q35_COMPAT_1_7,
+        PC_COMPAT_1_7,
        { /* end of list */ }
    },
 };
@@ -385,7 +391,7 @@ static QEMUMachine pc_q35_machine_v1_6 = {
    .name = "pc-q35-1.6",
    .init = pc_q35_init_1_6,
    .compat_props = (GlobalProperty[]) {
-        PC_Q35_COMPAT_1_6,
+        PC_COMPAT_1_6,
        { /* end of list */ }
    },
 };
@@ -395,7 +401,7 @@ static QEMUMachine pc_q35_machine_v1_5 = {
    .name = "pc-q35-1.5",
    .init = pc_q35_init_1_5,
    .compat_props = (GlobalProperty[]) {
-        PC_Q35_COMPAT_1_5,
+        PC_COMPAT_1_5,
        { /* end of list */ }
    },
 };
@@ -409,7 +415,7 @@ static QEMUMachine pc_q35_machine_v1_4 = {
    .name = "pc-q35-1.4",
    .init = pc_q35_init_1_4,
    .compat_props = (GlobalProperty[]) {
-        PC_Q35_COMPAT_1_4,
+        PC_COMPAT_1_4,
        { /* end of list */ }
    },
 };
--- a/hw/i386/q35-acpi-dsdt.dsl
+++ b/hw/i386/q35-acpi-dsdt.dsl
@@ -410,7 +410,7 @@ DefinitionBlock (
 /****************************************************************
 * General purpose events
 ****************************************************************/
-    External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)
+    External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)

    Scope(\_GPE) {
        Name(_HID, "ACPI0006")
@@ -425,7 +425,7 @@ DefinitionBlock (
        }
        Method(_E03) {
            // Memory hotplug event
-            \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
+            \_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
        }
        Method(_L04) {
        }
--- a/hw/i386/ssdt-mem.dsl
+++ b/hw/i386/ssdt-mem.dsl
@@ -39,10 +39,10 @@ ACPI_EXTRACT_ALL_CODE ssdm_mem_aml
 DefinitionBlock ("ssdt-mem.aml", "SSDT", 0x02, "BXPC", "CSSDT", 0x1)
 {

-    External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_CRS_METHOD, MethodObj)
-    External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD, MethodObj)
-    External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_OST_METHOD, MethodObj)
-    External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD, MethodObj)
+    External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_CRS_METHOD, MethodObj)
+    External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD, MethodObj)
+    External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_OST_METHOD, MethodObj)
+    External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD, MethodObj)

    Scope(\_SB) {
 /*  v------------------ DO NOT EDIT ------------------v */
@@ -58,19 +58,19 @@ DefinitionBlock ("ssdt-mem.aml", "SSDT", 0x02, "BXPC", "CSSDT", 0x1)
            Name(_HID, EISAID("PNP0C80"))

            Method(_CRS, 0) {
-                Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_CRS_METHOD(_UID))
+                Return(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_CRS_METHOD(_UID))
            }

            Method(_STA, 0) {
-                Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD(_UID))
+                Return(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD(_UID))
            }

            Method(_PXM, 0) {
-                Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD(_UID))
+                Return(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD(_UID))
            }

            Method(_OST, 3) {
-                \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_OST_METHOD(_UID, Arg0, Arg1, Arg2)
+                \_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_OST_METHOD(_UID, Arg0, Arg1, Arg2)
            }
        }
    }
--- a/hw/i386/ssdt-misc.dsl
+++ b/hw/i386/ssdt-misc.dsl
@@ -120,7 +120,7 @@ DefinitionBlock ("ssdt-misc.aml", "SSDT", 0x01, "BXPC", "BXSSDTSUSP", 0x1)

    External(MEMORY_SLOT_NOTIFY_METHOD, MethodObj)
    Scope(\_SB.PCI0) {
-        Device(MEMORY_HOPTLUG_DEVICE) {
+        Device(MEMORY_HOTPLUG_DEVICE) {
            Name(_HID, "PNP0A06")
            Name(_UID, "Memory hotplug resources")

--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -175,17 +175,18 @@ static void ahci_trigger_irq(AHCIState *s, AHCIDevice *d,
    ahci_check_irq(s);
 }

-static void map_page(uint8_t **ptr, uint64_t addr, uint32_t wanted)
+static void map_page(AddressSpace *as, uint8_t **ptr, uint64_t addr,
+                     uint32_t wanted)
 {
    hwaddr len = wanted;

    if (*ptr) {
-        cpu_physical_memory_unmap(*ptr, len, 1, len);
+        dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len);
    }

-    *ptr = cpu_physical_memory_map(addr, &len, 1);
+    *ptr = dma_memory_map(as, addr, &len, DMA_DIRECTION_FROM_DEVICE);
    if (len < wanted) {
-        cpu_physical_memory_unmap(*ptr, len, 1, len);
+        dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len);
        *ptr = NULL;
    }
 }
@@ -198,24 +199,24 @@ static void  ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
    switch (offset) {
        case PORT_LST_ADDR:
            pr->lst_addr = val;
-            map_page(&s->dev[port].lst,
+            map_page(s->as, &s->dev[port].lst,
                     ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
            s->dev[port].cur_cmd = NULL;
            break;
        case PORT_LST_ADDR_HI:
            pr->lst_addr_hi = val;
-            map_page(&s->dev[port].lst,
+            map_page(s->as, &s->dev[port].lst,
                     ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
            s->dev[port].cur_cmd = NULL;
            break;
        case PORT_FIS_ADDR:
            pr->fis_addr = val;
-            map_page(&s->dev[port].res_fis,
+            map_page(s->as, &s->dev[port].res_fis,
                     ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
            break;
        case PORT_FIS_ADDR_HI:
            pr->fis_addr_hi = val;
-            map_page(&s->dev[port].res_fis,
+            map_page(s->as, &s->dev[port].res_fis,
                     ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
            break;
        case PORT_IRQ_STAT:
@@ -639,6 +640,11 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis)
    }
 }

+static int prdt_tbl_entry_size(const AHCI_SG *tbl)
+{
+    return (le32_to_cpu(tbl->flags_size) & AHCI_PRDT_SIZE_MASK) + 1;
+}
+
 static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
 {
    AHCICmdHdr *cmd = ad->cur_cmd;
@@ -681,7 +687,7 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
        sum = 0;
        for (i = 0; i < sglist_alloc_hint; i++) {
            /* flags_size is zero-based */
-            tbl_entry_size = (le32_to_cpu(tbl[i].flags_size) + 1);
+            tbl_entry_size = prdt_tbl_entry_size(&tbl[i]);
            if (offset <= (sum + tbl_entry_size)) {
                off_idx = i;
                off_pos = offset - sum;
@@ -700,12 +706,12 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
        qemu_sglist_init(sglist, qbus->parent, (sglist_alloc_hint - off_idx),
                         ad->hba->as);
        qemu_sglist_add(sglist, le64_to_cpu(tbl[off_idx].addr + off_pos),
-                        le32_to_cpu(tbl[off_idx].flags_size) + 1 - off_pos);
+                        prdt_tbl_entry_size(&tbl[off_idx]) - off_pos);

        for (i = off_idx + 1; i < sglist_alloc_hint; i++) {
            /* flags_size is zero-based */
            qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
-                            le32_to_cpu(tbl[i].flags_size) + 1);
+                            prdt_tbl_entry_size(&tbl[i]));
        }
    }

@@ -1260,9 +1266,9 @@ static int ahci_state_post_load(void *opaque, int version_id)
        ad = &s->dev[i];
        AHCIPortRegs *pr = &ad->port_regs;

-        map_page(&ad->lst,
+        map_page(s->as, &ad->lst,
                 ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
-        map_page(&ad->res_fis,
+        map_page(s->as, &ad->res_fis,
                 ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
        /*
         * All pending i/o should be flushed out on a migrate. However,
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -201,6 +201,8 @@

 #define AHCI_COMMAND_TABLE_ACMD            0x40

+#define AHCI_PRDT_SIZE_MASK                0x3fffff
+
 #define IDE_FEATURE_DMA                    1

 #define READ_FPDMA_QUEUED                  0x60
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -499,6 +499,18 @@ static void ide_rw_error(IDEState *s) {
    ide_set_irq(s->bus);
 }

+static bool ide_sect_range_ok(IDEState *s,
+                              uint64_t sector, uint64_t nb_sectors)
+{
+    uint64_t total_sectors;
+
+    bdrv_get_geometry(s->bs, &total_sectors);
+    if (sector > total_sectors || nb_sectors > total_sectors - sector) {
+        return false;
+    }
+    return true;
+}
+
 static void ide_sector_read_cb(void *opaque, int ret)
 {
    IDEState *s = opaque;
@@ -554,6 +566,11 @@ void ide_sector_read(IDEState *s)
    printf("sector=%" PRId64 "\n", sector_num);
 #endif

+    if (!ide_sect_range_ok(s, sector_num, n)) {
+        ide_rw_error(s);
+        return;
+    }
+
    s->iov.iov_base = s->io_buffer;
    s->iov.iov_len  = n * BDRV_SECTOR_SIZE;
    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
@@ -671,6 +688,13 @@ void ide_dma_cb(void *opaque, int ret)
           sector_num, n, s->dma_cmd);
 #endif

+    if ((s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) &&
+        !ide_sect_range_ok(s, sector_num, n)) {
+        dma_buf_commit(s);
+        ide_dma_error(s);
+        return;
+    }
+
    switch (s->dma_cmd) {
    case IDE_DMA_READ:
        s->bus->dma->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num,
@@ -790,6 +814,11 @@ void ide_sector_write(IDEState *s)
        n = s->req_nb_sectors;
    }

+    if (!ide_sect_range_ok(s, sector_num, n)) {
+        ide_rw_error(s);
+        return;
+    }
+
    s->iov.iov_base = s->io_buffer;
    s->iov.iov_len  = n * BDRV_SECTOR_SIZE;
    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
--- a/hw/ide/microdrive.c
+++ b/hw/ide/microdrive.c
@@ -593,7 +593,7 @@ static void microdrive_realize(DeviceState *dev, Error **errp)
 {
    MicroDriveState *md = MICRODRIVE(dev);

-    ide_init2(&md->bus, qemu_allocate_irqs(md_set_irq, md, 1)[0]);
+    ide_init2(&md->bus, qemu_allocate_irq(md_set_irq, md, 0));
 }

 static void microdrive_init(Object *obj)
--- a/hw/input/hid.c
+++ b/hw/input/hid.c
@@ -124,7 +124,7 @@ static void hid_pointer_event(DeviceState *dev, QemuConsole *src,
        if (evt->rel->axis == INPUT_AXIS_X) {
            e->xdx += evt->rel->value;
        } else if (evt->rel->axis == INPUT_AXIS_Y) {
-            e->ydy -= evt->rel->value;
+            e->ydy += evt->rel->value;
        }
        break;

@@ -191,7 +191,7 @@ static void hid_pointer_sync(DeviceState *dev)
        if (hs->kind == HID_MOUSE) {
            prev->xdx += curr->xdx;
            curr->xdx = 0;
-            prev->ydy -= curr->ydy;
+            prev->ydy += curr->ydy;
            curr->ydy = 0;
        } else {
            prev->xdx = curr->xdx;
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -437,7 +437,7 @@ static void ics_set_irq(void *opaque, int srcno, int val)
 {
    ICSState *ics = (ICSState *)opaque;

-    if (ics->islsi[srcno]) {
+    if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
        set_irq_lsi(ics, srcno, val);
    } else {
        set_irq_msi(ics, srcno, val);
@@ -474,7 +474,7 @@ static void ics_write_xive(ICSState *ics, int nr, int server,

    trace_xics_ics_write_xive(nr, srcno, server, priority);

-    if (ics->islsi[srcno]) {
+    if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
        write_xive_lsi(ics, srcno);
    } else {
        write_xive_msi(ics, srcno);
@@ -496,7 +496,7 @@ static void ics_resend(ICSState *ics)

    for (i = 0; i < ics->nr_irqs; i++) {
        /* FIXME: filter by server#? */
-        if (ics->islsi[i]) {
+        if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
            resend_lsi(ics, i);
        } else {
            resend_msi(ics, i);
@@ -511,7 +511,7 @@ static void ics_eoi(ICSState *ics, int nr)

    trace_xics_ics_eoi(nr);

-    if (ics->islsi[srcno]) {
+    if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
        irq->status &= ~XICS_STATUS_SENT;
    }
 }
@@ -520,11 +520,18 @@ static void ics_reset(DeviceState *dev)
 {
    ICSState *ics = ICS(dev);
    int i;
+    uint8_t flags[ics->nr_irqs];
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        flags[i] = ics->irqs[i].flags;
+    }

    memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
+
    for (i = 0; i < ics->nr_irqs; i++) {
        ics->irqs[i].priority = 0xff;
        ics->irqs[i].saved_priority = 0xff;
+        ics->irqs[i].flags = flags[i];
    }
 }

@@ -563,13 +570,14 @@ static int ics_dispatch_post_load(void *opaque, int version_id)

 static const VMStateDescription vmstate_ics_irq = {
    .name = "ics/irq",
-    .version_id = 1,
+    .version_id = 2,
    .minimum_version_id = 1,
    .fields = (VMStateField[]) {
        VMSTATE_UINT32(server, ICSIRQState),
        VMSTATE_UINT8(priority, ICSIRQState),
        VMSTATE_UINT8(saved_priority, ICSIRQState),
        VMSTATE_UINT8(status, ICSIRQState),
+        VMSTATE_UINT8(flags, ICSIRQState),
        VMSTATE_END_OF_LIST()
    },
 };
@@ -606,7 +614,6 @@ static void ics_realize(DeviceState *dev, Error **errp)
        return;
    }
    ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
-    ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
    ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs);
 }

@@ -633,21 +640,166 @@ static const TypeInfo ics_info = {
 /*
 * Exported functions
 */
+static int xics_find_source(XICSState *icp, int irq)
+{
+    int sources = 1;
+    int src;
+
+    /* FIXME: implement multiple sources */
+    for (src = 0; src < sources; ++src) {
+        ICSState *ics = &icp->ics[src];
+        if (ics_valid_irq(ics, irq)) {
+            return src;
+        }
+    }
+
+    return -1;
+}

 qemu_irq xics_get_qirq(XICSState *icp, int irq)
 {
-    if (!ics_valid_irq(icp->ics, irq)) {
-        return NULL;
+    int src = xics_find_source(icp, irq);
+
+    if (src >= 0) {
+        ICSState *ics = &icp->ics[src];
+        return ics->qirqs[irq - ics->offset];
    }

-    return icp->ics->qirqs[irq - icp->ics->offset];
+    return NULL;
+}
+
+static void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
+{
+    assert(!(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK));
+
+    ics->irqs[srcno].flags |=
+        lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI;
 }

 void xics_set_irq_type(XICSState *icp, int irq, bool lsi)
 {
-    assert(ics_valid_irq(icp->ics, irq));
+    int src = xics_find_source(icp, irq);
+    ICSState *ics;

-    icp->ics->islsi[irq - icp->ics->offset] = lsi;
+    assert(src >= 0);
+
+    ics = &icp->ics[src];
+    ics_set_irq_type(ics, irq - ics->offset, lsi);
+}
+
+#define ICS_IRQ_FREE(ics, srcno)   \
+    (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
+
+static int ics_find_free_block(ICSState *ics, int num, int alignnum)
+{
+    int first, i;
+
+    for (first = 0; first < ics->nr_irqs; first += alignnum) {
+        if (num > (ics->nr_irqs - first)) {
+            return -1;
+        }
+        for (i = first; i < first + num; ++i) {
+            if (!ICS_IRQ_FREE(ics, i)) {
+                break;
+            }
+        }
+        if (i == (first + num)) {
+            return first;
+        }
+    }
+
+    return -1;
+}
+
+int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
+{
+    ICSState *ics = &icp->ics[src];
+    int irq;
+
+    if (irq_hint) {
+        assert(src == xics_find_source(icp, irq_hint));
+        if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
+            trace_xics_alloc_failed_hint(src, irq_hint);
+            return -1;
+        }
+        irq = irq_hint;
+    } else {
+        irq = ics_find_free_block(ics, 1, 1);
+        if (irq < 0) {
+            trace_xics_alloc_failed_no_left(src);
+            return -1;
+        }
+        irq += ics->offset;
+    }
+
+    ics_set_irq_type(ics, irq - ics->offset, lsi);
+    trace_xics_alloc(src, irq);
+
+    return irq;
+}
+
+/*
+ * Allocate block of consequtive IRQs, returns a number of the first.
+ * If align==true, aligns the first IRQ number to num.
+ */
+int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
+{
+    int i, first = -1;
+    ICSState *ics = &icp->ics[src];
+
+    assert(src == 0);
+    /*
+     * MSIMesage::data is used for storing VIRQ so
+     * it has to be aligned to num to support multiple
+     * MSI vectors. MSI-X is not affected by this.
+     * The hint is used for the first IRQ, the rest should
+     * be allocated continuously.
+     */
+    if (align) {
+        assert((num == 1) || (num == 2) || (num == 4) ||
+               (num == 8) || (num == 16) || (num == 32));
+        first = ics_find_free_block(ics, num, num);
+    } else {
+        first = ics_find_free_block(ics, num, 1);
+    }
+
+    if (first >= 0) {
+        for (i = first; i < first + num; ++i) {
+            ics_set_irq_type(ics, i, lsi);
+        }
+    }
+    first += ics->offset;
+
+    trace_xics_alloc_block(src, first, num, lsi, align);
+
+    return first;
+}
+
+static void ics_free(ICSState *ics, int srcno, int num)
+{
+    int i;
+
+    for (i = srcno; i < srcno + num; ++i) {
+        if (ICS_IRQ_FREE(ics, i)) {
+            trace_xics_ics_free_warn(ics - ics->icp->ics, i + ics->offset);
+        }
+        memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
+    }
+}
+
+void xics_free(XICSState *icp, int irq, int num)
+{
+    int src = xics_find_source(icp, irq);
+
+    if (src >= 0) {
+        ICSState *ics = &icp->ics[src];
+
+        /* FIXME: implement multiple sources */
+        assert(src == 0);
+
+        trace_xics_ics_free(ics - icp->ics, irq, num);
+        ics_free(ics, irq - ics->offset, num);
+    }
 }

 /*
@@ -866,10 +1018,10 @@ static void xics_realize(DeviceState *dev, Error **errp)
    }

    /* Registration of global state belongs into realize */
-    spapr_rtas_register("ibm,set-xive", rtas_set_xive);
-    spapr_rtas_register("ibm,get-xive", rtas_get_xive);
-    spapr_rtas_register("ibm,int-off", rtas_int_off);
-    spapr_rtas_register("ibm,int-on", rtas_int_on);
+    spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
+    spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
+    spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_int_off);
+    spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_int_on);

    spapr_register_hypercall(H_CPPR, h_cppr);
    spapr_register_hypercall(H_IPI, h_ipi);
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -38,10 +38,6 @@
 typedef struct KVMXICSState {
    XICSState parent_obj;

-    uint32_t set_xive_token;
-    uint32_t get_xive_token;
-    uint32_t int_off_token;
-    uint32_t int_on_token;
    int kernel_xics_fd;
 } KVMXICSState;

@@ -224,7 +220,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id)
            state |= KVM_XICS_MASKED;
        }

-        if (ics->islsi[i]) {
+        if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
            state |= KVM_XICS_LEVEL_SENSITIVE;
            if (irq->status & XICS_STATUS_ASSERTED) {
                state |= KVM_XICS_PENDING;
@@ -253,7 +249,7 @@ static void ics_kvm_set_irq(void *opaque, int srcno, int val)
    int rc;

    args.irq = srcno + ics->offset;
-    if (!ics->islsi[srcno]) {
+    if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) {
        if (!val) {
            return;
        }
@@ -271,11 +267,18 @@ static void ics_kvm_reset(DeviceState *dev)
 {
    ICSState *ics = ICS(dev);
    int i;
+    uint8_t flags[ics->nr_irqs];
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        flags[i] = ics->irqs[i].flags;
+    }

    memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
+
    for (i = 0; i < ics->nr_irqs; i++) {
        ics->irqs[i].priority = 0xff;
        ics->irqs[i].saved_priority = 0xff;
+        ics->irqs[i].flags = flags[i];
    }

    ics_set_kvm_state(ics, 1);
@@ -290,7 +293,6 @@ static void ics_kvm_realize(DeviceState *dev, Error **errp)
        return;
    }
    ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
-    ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
    ics->qirqs = qemu_allocate_irqs(ics_kvm_set_irq, ics, ics->nr_irqs);
 }

@@ -392,32 +394,30 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
        goto fail;
    }

-    icpkvm->set_xive_token = spapr_rtas_register("ibm,set-xive", rtas_dummy);
-    icpkvm->get_xive_token = spapr_rtas_register("ibm,get-xive", rtas_dummy);
-    icpkvm->int_off_token = spapr_rtas_register("ibm,int-off", rtas_dummy);
-    icpkvm->int_on_token = spapr_rtas_register("ibm,int-on", rtas_dummy);
+    spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_dummy);
+    spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_dummy);
+    spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_dummy);
+    spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_dummy);

-    rc = kvmppc_define_rtas_kernel_token(icpkvm->set_xive_token,
-                                         "ibm,set-xive");
+    rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_SET_XIVE, "ibm,set-xive");
    if (rc < 0) {
        error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,set-xive");
        goto fail;
    }

-    rc = kvmppc_define_rtas_kernel_token(icpkvm->get_xive_token,
-                                         "ibm,get-xive");
+    rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_GET_XIVE, "ibm,get-xive");
    if (rc < 0) {
        error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,get-xive");
        goto fail;
    }

-    rc = kvmppc_define_rtas_kernel_token(icpkvm->int_on_token, "ibm,int-on");
+    rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_ON, "ibm,int-on");
    if (rc < 0) {
        error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-on");
        goto fail;
    }

-    rc = kvmppc_define_rtas_kernel_token(icpkvm->int_off_token, "ibm,int-off");
+    rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_OFF, "ibm,int-off");
    if (rc < 0) {
        error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-off");
        goto fail;
--- a/hw/ipack/ipack.c
+++ b/hw/ipack/ipack.c
@@ -66,7 +66,7 @@ static void ipack_device_unrealize(DeviceState *dev, Error **errp)
        return;
    }

-    qemu_free_irqs(idev->irq);
+    qemu_free_irqs(idev->irq, 2);
 }

 static Property ipack_device_props[] = {
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -146,7 +146,13 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
    uint64_t new_addr, ret = 0;
    uint64_t address_space_end = address_space_start + address_space_size;

-    assert(address_space_end > address_space_size);
+    if (!address_space_size) {
+        error_setg(errp, "memory hotplug is not enabled, "
+                         "please add maxmem option");
+        goto out;
+    }
+
+    assert(address_space_end > address_space_start);
    object_child_foreach(qdev_get_machine(), pc_dimm_built_list, &list);

    if (hint) {
@@ -246,6 +252,12 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp)
        error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
        return;
    }
+    if (dimm->node >= nb_numa_nodes) {
+        error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
+                   PRIu32 "' which exceeds the number of numa nodes: %d",
+                   dimm->node, nb_numa_nodes);
+        return;
+    }
 }

 static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm)
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -792,9 +792,23 @@ static int64_t load_kernel (void)
                loaderparams.kernel_filename);
        exit(1);
    }
+
+    /* Sanity check where the kernel has been linked */
    if (kvm_enabled()) {
+        if (kernel_entry & 0x80000000ll) {
+            error_report("KVM guest kernels must be linked in useg. "
+                         "Did you forget to enable CONFIG_KVM_GUEST?");
+            exit(1);
+        }
+
        xlate_to_kseg0 = cpu_mips_kvm_um_phys_to_kseg0;
    } else {
+        if (!(kernel_entry & 0x80000000ll)) {
+            error_report("KVM guest kernels aren't supported with TCG. "
+                         "Did you unintentionally enable CONFIG_KVM_GUEST?");
+            exit(1);
+        }
+
        xlate_to_kseg0 = cpu_mips_phys_to_kseg0;
    }

@@ -1028,7 +1042,7 @@ void mips_malta_init(MachineState *machine)
    fl_idx++;
    if (kernel_filename) {
        ram_low_size = MIN(ram_size, 256 << 20);
-        /* For KVM T&E we reserve 1MB of RAM for running bootloader */
+        /* For KVM we reserve 1MB of RAM for running bootloader */
        if (kvm_enabled()) {
            ram_low_size -= 0x100000;
            bootloader_run_addr = 0x40000000 + ram_low_size;
@@ -1052,10 +1066,10 @@ void mips_malta_init(MachineState *machine)
                             bootloader_run_addr, kernel_entry);
        }
    } else {
-        /* The flash region isn't executable from a KVM T&E guest */
+        /* The flash region isn't executable from a KVM guest */
        if (kvm_enabled()) {
            error_report("KVM enabled but no -kernel argument was specified. "
-                         "Booting from flash is not supported with KVM T&E.");
+                         "Booting from flash is not supported with KVM.");
            exit(1);
        }
        /* Load firmware from flash. */
--- a/hw/misc/cbus.c
+++ b/hw/misc/cbus.c
@@ -135,9 +135,9 @@ CBus *cbus_init(qemu_irq dat)
    CBusPriv *s = (CBusPriv *) g_malloc0(sizeof(*s));

    s->dat_out = dat;
-    s->cbus.clk = qemu_allocate_irqs(cbus_clk, s, 1)[0];
-    s->cbus.dat = qemu_allocate_irqs(cbus_dat, s, 1)[0];
-    s->cbus.sel = qemu_allocate_irqs(cbus_sel, s, 1)[0];
+    s->cbus.clk = qemu_allocate_irq(cbus_clk, s, 0);
+    s->cbus.dat = qemu_allocate_irq(cbus_dat, s, 0);
+    s->cbus.sel = qemu_allocate_irq(cbus_sel, s, 0);

    s->sel = 1;
    s->clk = 0;
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .0.50
 .1.1