vnc: fix bug: vnc server can't start when 'to' is specified

commit e0d03b8ceb converted VNC startup to use SocketAddress, the interface socket_listen don't have a port_offset param, so we need to add the port offset (5900) to both 'port' and 'to' opts. currently only 'port' is added by offset. This patch add the port offset to 'to' opts. Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn> Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Message-id: 1445926252-14830-1-git-send-email-hongyang.yang@easystack.cn Cc: Daniel P. Berrange <berrange@redhat.com> Cc: Eric Blake <eblake@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
vnc: allow fall back to RAW encoding
2015-11-03 10:21:49 +01:00 · 2015-11-03 10:21:49 +01:00 · 2015-11-03 10:13:42 +01:00 · 2015-11-03 10:12:46 +01:00 · 2015-11-03 10:12:46 +01:00 · 2015-11-03 10:12:45 +01:00
307 changed files with 8937 additions and 2940 deletions
--- a/7
+++ b/7
@@ -151,6 +151,8 @@ dummy := $(call unnest-vars,, \
                stub-obj-y \
                util-obj-y \
                qga-obj-y \
+                ivshmem-client-obj-y \
+                ivshmem-server-obj-y \
                qga-vss-dll-obj-y \
                block-obj-y \
                block-obj-m \
@@ -323,6 +325,11 @@ ifneq ($(EXESUF),)
 qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
 endif

+ivshmem-client$(EXESUF): $(ivshmem-client-obj-y)
+	$(call LINK, $^)
+ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
+	$(call LINK, $^)
+
 clean:
 # avoid old build problems by removing potentially incorrect old files
 	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -104,3 +104,8 @@ target-obj-y += trace/
 # by libqemuutil.a.  These should be moved to a separate .json schema.
 qga-obj-y = qga/
 qga-vss-dll-obj-y = qga/
+
+######################################################################
+# contrib
+ivshmem-client-obj-y = contrib/ivshmem-client/
+ivshmem-server-obj-y = contrib/ivshmem-server/
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -25,6 +25,7 @@ struct AioHandler
    IOHandler *io_write;
    int deleted;
    void *opaque;
+    bool is_external;
    QLIST_ENTRY(AioHandler) node;
 };

@@ -43,6 +44,7 @@ static AioHandler *find_aio_handler(AioContext *ctx, int fd)

 void aio_set_fd_handler(AioContext *ctx,
                        int fd,
+                        bool is_external,
                        IOHandler *io_read,
                        IOHandler *io_write,
                        void *opaque)
@@ -82,6 +84,7 @@ void aio_set_fd_handler(AioContext *ctx,
        node->io_read = io_read;
        node->io_write = io_write;
        node->opaque = opaque;
+        node->is_external = is_external;

        node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
        node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
@@ -92,10 +95,11 @@ void aio_set_fd_handler(AioContext *ctx,

 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *notifier,
+                            bool is_external,
                            EventNotifierHandler *io_read)
 {
    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
-                       (IOHandler *)io_read, NULL, notifier);
+                       is_external, (IOHandler *)io_read, NULL, notifier);
 }

 bool aio_prepare(AioContext *ctx)
@@ -257,7 +261,8 @@ bool aio_poll(AioContext *ctx, bool blocking)

    /* fill pollfds */
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->pfd.events) {
+        if (!node->deleted && node->pfd.events
+            && aio_node_check(ctx, node->is_external)) {
            add_pollfd(node);
        }
    }
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -28,11 +28,13 @@ struct AioHandler {
    GPollFD pfd;
    int deleted;
    void *opaque;
+    bool is_external;
    QLIST_ENTRY(AioHandler) node;
 };

 void aio_set_fd_handler(AioContext *ctx,
                        int fd,
+                        bool is_external,
                        IOHandler *io_read,
                        IOHandler *io_write,
                        void *opaque)
@@ -86,6 +88,7 @@ void aio_set_fd_handler(AioContext *ctx,
        node->opaque = opaque;
        node->io_read = io_read;
        node->io_write = io_write;
+        node->is_external = is_external;

        event = event_notifier_get_handle(&ctx->notifier);
        WSAEventSelect(node->pfd.fd, event,
@@ -98,6 +101,7 @@ void aio_set_fd_handler(AioContext *ctx,

 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *e,
+                            bool is_external,
                            EventNotifierHandler *io_notify)
 {
    AioHandler *node;
@@ -133,6 +137,7 @@ void aio_set_event_notifier(AioContext *ctx,
            node->e = e;
            node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
            node->pfd.events = G_IO_IN;
+            node->is_external = is_external;
            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);

            g_source_add_poll(&ctx->source, &node->pfd);
@@ -304,7 +309,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
    /* fill fd sets */
    count = 0;
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->io_notify) {
+        if (!node->deleted && node->io_notify
+            && aio_node_check(ctx, node->is_external)) {
            events[count++] = event_notifier_get_handle(node->e);
        }
    }
--- a/async.c
+++ b/async.c
@@ -247,7 +247,7 @@ aio_ctx_finalize(GSource     *source)
    }
    qemu_mutex_unlock(&ctx->bh_lock);

-    aio_set_event_notifier(ctx, &ctx->notifier, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL);
    event_notifier_cleanup(&ctx->notifier);
    rfifolock_destroy(&ctx->lock);
    qemu_mutex_destroy(&ctx->bh_lock);
@@ -329,6 +329,7 @@ AioContext *aio_context_new(Error **errp)
    }
    g_source_set_can_recurse(&ctx->source, true);
    aio_set_event_notifier(ctx, &ctx->notifier,
+                           false,
                           (EventNotifierHandler *)
                           event_notifier_dummy_cb);
    ctx->thread_pool = NULL;
--- a/block.c
+++ b/block.c
@@ -257,7 +257,6 @@ BlockDriverState *bdrv_new(void)
    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
        QLIST_INIT(&bs->op_blockers[i]);
    }
-    bdrv_iostatus_disable(bs);
    notifier_list_init(&bs->close_notifiers);
    notifier_with_return_list_init(&bs->before_write_notifiers);
    qemu_co_queue_init(&bs->throttled_reqs[0]);
@@ -857,7 +856,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
        goto fail_opts;
    }

-    bs->guest_block_size = 512;
    bs->request_alignment = 512;
    bs->zero_beyond_eof = true;
    open_flags = bdrv_open_flags(bs, flags);
@@ -1081,6 +1079,10 @@ static int bdrv_fill_options(QDict **options, const char **pfilename,
        }
    }

+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        *flags |= BDRV_O_INCOMING;
+    }
+
    return 0;
 }

@@ -1908,6 +1910,10 @@ void bdrv_close(BlockDriverState *bs)
    bdrv_drain(bs); /* in case flush left pending I/O */
    notifier_list_notify(&bs->close_notifiers, bs);

+    if (bs->blk) {
+        blk_dev_change_media_cb(bs->blk, false);
+    }
+
    if (bs->drv) {
        BdrvChild *child, *next;

@@ -1946,10 +1952,6 @@ void bdrv_close(BlockDriverState *bs)
        bs->full_open_options = NULL;
    }

-    if (bs->blk) {
-        blk_dev_change_media_cb(bs->blk, false);
-    }
-
    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
        g_free(ban);
    }
@@ -1998,19 +2000,10 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
    /* move some fields that need to stay attached to the device */

    /* dev info */
-    bs_dest->guest_block_size   = bs_src->guest_block_size;
    bs_dest->copy_on_read       = bs_src->copy_on_read;

    bs_dest->enable_write_cache = bs_src->enable_write_cache;

-    /* r/w error */
-    bs_dest->on_read_error      = bs_src->on_read_error;
-    bs_dest->on_write_error     = bs_src->on_write_error;
-
-    /* i/o status */
-    bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
-    bs_dest->iostatus           = bs_src->iostatus;
-
    /* dirty bitmap */
    bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
 }
@@ -2497,82 +2490,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
    *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
 }

-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
-                       BlockdevOnError on_write_error)
-{
-    bs->on_read_error = on_read_error;
-    bs->on_write_error = on_write_error;
-}
-
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
-{
-    return is_read ? bs->on_read_error : bs->on_write_error;
-}
-
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
-{
-    BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
-
-    switch (on_err) {
-    case BLOCKDEV_ON_ERROR_ENOSPC:
-        return (error == ENOSPC) ?
-               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
-    case BLOCKDEV_ON_ERROR_STOP:
-        return BLOCK_ERROR_ACTION_STOP;
-    case BLOCKDEV_ON_ERROR_REPORT:
-        return BLOCK_ERROR_ACTION_REPORT;
-    case BLOCKDEV_ON_ERROR_IGNORE:
-        return BLOCK_ERROR_ACTION_IGNORE;
-    default:
-        abort();
-    }
-}
-
-static void send_qmp_error_event(BlockDriverState *bs,
-                                 BlockErrorAction action,
-                                 bool is_read, int error)
-{
-    IoOperationType optype;
-
-    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
-    qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
-                                   bdrv_iostatus_is_enabled(bs),
-                                   error == ENOSPC, strerror(error),
-                                   &error_abort);
-}
-
-/* This is done by device models because, while the block layer knows
- * about the error, it does not know whether an operation comes from
- * the device or the block layer (from a job, for example).
- */
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
-                       bool is_read, int error)
-{
-    assert(error >= 0);
-
-    if (action == BLOCK_ERROR_ACTION_STOP) {
-        /* First set the iostatus, so that "info block" returns an iostatus
-         * that matches the events raised so far (an additional error iostatus
-         * is fine, but not a lost one).
-         */
-        bdrv_iostatus_set_err(bs, error);
-
-        /* Then raise the request to stop the VM and the event.
-         * qemu_system_vmstop_request_prepare has two effects.  First,
-         * it ensures that the STOP event always comes after the
-         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
-         * can observe the STOP event and do a "cont" before the STOP
-         * event is issued, the VM will not stop.  In this case, vm_start()
-         * also ensures that the STOP/RESUME pair of events is emitted.
-         */
-        qemu_system_vmstop_request_prepare();
-        send_qmp_error_event(bs, action, is_read, error);
-        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
-    } else {
-        send_qmp_error_event(bs, action, is_read, error);
-    }
-}
-
 int bdrv_is_read_only(BlockDriverState *bs)
 {
    return bs->read_only;
@@ -2766,6 +2683,11 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
        blk = blk_by_name(device);

        if (blk) {
+            if (!blk_bs(blk)) {
+                error_setg(errp, "Device '%s' has no medium", device);
+                return NULL;
+            }
+
            return blk_bs(blk);
        }
    }
@@ -3136,15 +3058,23 @@ void bdrv_invalidate_cache_all(Error **errp)
 /**
 * Return TRUE if the media is present
 */
-int bdrv_is_inserted(BlockDriverState *bs)
+bool bdrv_is_inserted(BlockDriverState *bs)
 {
    BlockDriver *drv = bs->drv;
+    BdrvChild *child;

-    if (!drv)
-        return 0;
-    if (!drv->bdrv_is_inserted)
-        return 1;
-    return drv->bdrv_is_inserted(bs);
+    if (!drv) {
+        return false;
+    }
+    if (drv->bdrv_is_inserted) {
+        return drv->bdrv_is_inserted(bs);
+    }
+    QLIST_FOREACH(child, &bs->children, next) {
+        if (!bdrv_is_inserted(child->bs)) {
+            return false;
+        }
+    }
+    return true;
 }

 /**
@@ -3195,11 +3125,6 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked)
    }
 }

-void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
-{
-    bs->guest_block_size = align;
-}
-
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
 {
    BdrvDirtyBitmap *bm;
@@ -3597,46 +3522,6 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
    return true;
 }

-void bdrv_iostatus_enable(BlockDriverState *bs)
-{
-    bs->iostatus_enabled = true;
-    bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-}
-
-/* The I/O status is only enabled if the drive explicitly
- * enables it _and_ the VM is configured to stop on errors */
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
-{
-    return (bs->iostatus_enabled &&
-           (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
-            bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
-            bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
-}
-
-void bdrv_iostatus_disable(BlockDriverState *bs)
-{
-    bs->iostatus_enabled = false;
-}
-
-void bdrv_iostatus_reset(BlockDriverState *bs)
-{
-    if (bdrv_iostatus_is_enabled(bs)) {
-        bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-        if (bs->job) {
-            block_job_iostatus_reset(bs->job);
-        }
-    }
-}
-
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
-{
-    assert(bdrv_iostatus_is_enabled(bs));
-    if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-        bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
-                                         BLOCK_DEVICE_IO_STATUS_FAILED;
-    }
-}
-
 void bdrv_img_create(const char *filename, const char *fmt,
                     const char *base_filename, const char *base_fmt,
                     char *options, uint64_t img_size, int flags,
@@ -4148,14 +4033,3 @@ void bdrv_refresh_filename(BlockDriverState *bs)
        QDECREF(json);
    }
 }
-
-/* This accessor function purpose is to allow the device models to access the
- * BlockAcctStats structure embedded inside a BlockDriverState without being
- * aware of the BlockDriverState structure layout.
- * It will go away when the BlockAcctStats structure will be moved inside
- * the device models.
- */
-BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
-{
-    return &bs->stats;
-}
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -47,14 +47,6 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
 }


-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
-                               unsigned int nb_sectors)
-{
-    if (stats->wr_highest_sector < sector_num + nb_sectors - 1) {
-        stats->wr_highest_sector = sector_num + nb_sectors - 1;
-    }
-}
-
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                      int num_requests)
 {
--- a/block/backup.c
+++ b/block/backup.c
@@ -21,6 +21,7 @@
 #include "block/blockjob.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"

 #define BACKUP_CLUSTER_BITS 16
 #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
@@ -215,7 +216,9 @@ static void backup_iostatus_reset(BlockJob *job)
 {
    BackupBlockJob *s = container_of(job, BackupBlockJob, common);

-    bdrv_iostatus_reset(s->target);
+    if (s->target->blk) {
+        blk_iostatus_reset(s->target->blk);
+    }
 }

 static const BlockJobDriver backup_job_driver = {
@@ -360,8 +363,10 @@ static void coroutine_fn backup_run(void *opaque)
    job->bitmap = hbitmap_alloc(end, 0);

    bdrv_set_enable_write_cache(target, true);
-    bdrv_set_on_error(target, on_target_error, on_target_error);
-    bdrv_iostatus_enable(target);
+    if (target->blk) {
+        blk_set_on_error(target->blk, on_target_error, on_target_error);
+        blk_iostatus_enable(target->blk);
+    }

    bdrv_add_before_write_notifier(bs, &before_write);

@@ -451,7 +456,9 @@ static void coroutine_fn backup_run(void *opaque)
    }
    hbitmap_free(job->bitmap);

-    bdrv_iostatus_disable(target);
+    if (target->blk) {
+        blk_iostatus_disable(target->blk);
+    }
    bdrv_op_unblock_all(target, job->common.blocker);

    data = g_malloc(sizeof(*data));
@@ -480,7 +487,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,

    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
        error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
        return;
    }
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -12,12 +12,17 @@

 #include "sysemu/block-backend.h"
 #include "block/block_int.h"
+#include "block/blockjob.h"
+#include "block/throttle-groups.h"
 #include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
 #include "qapi-event.h"

 /* Number of coroutines to reserve per attached device model */
 #define COROUTINE_POOL_RESERVATION 64

+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
+
 struct BlockBackend {
    char *name;
    int refcnt;
@@ -29,15 +34,31 @@ struct BlockBackend {
    /* TODO change to DeviceState when all users are qdevified */
    const BlockDevOps *dev_ops;
    void *dev_opaque;
+
+    /* the block size for which the guest device expects atomicity */
+    int guest_block_size;
+
+    /* If the BDS tree is removed, some of its options are stored here (which
+     * can be used to restore those options in the new BDS on insert) */
+    BlockBackendRootState root_state;
+
+    /* I/O stats (display with "info blockstats"). */
+    BlockAcctStats stats;
+
+    BlockdevOnError on_read_error, on_write_error;
+    bool iostatus_enabled;
+    BlockDeviceIoStatus iostatus;
 };

 typedef struct BlockBackendAIOCB {
    BlockAIOCB common;
    QEMUBH *bh;
+    BlockBackend *blk;
    int ret;
 } BlockBackendAIOCB;

 static const AIOCBInfo block_backend_aiocb_info = {
+    .get_aio_context = blk_aiocb_get_aio_context,
    .aiocb_size = sizeof(BlockBackendAIOCB),
 };

@@ -145,6 +166,10 @@ static void blk_delete(BlockBackend *blk)
        bdrv_unref(blk->bs);
        blk->bs = NULL;
    }
+    if (blk->root_state.throttle_state) {
+        g_free(blk->root_state.throttle_group);
+        throttle_group_unref(blk->root_state.throttle_state);
+    }
    /* Avoid double-remove after blk_hide_on_behalf_of_hmp_drive_del() */
    if (blk->name[0]) {
        QTAILQ_REMOVE(&blk_backends, blk, link);
@@ -308,6 +333,17 @@ void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk)
    }
 }

+/*
+ * Associates a new BlockDriverState with @blk.
+ */
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+    assert(!blk->bs && !bs->blk);
+    bdrv_ref(bs);
+    blk->bs = bs;
+    bs->blk = blk;
+}
+
 /*
 * Attach device model @dev to @blk.
 * Return 0 on success, -EBUSY when a device model is attached already.
@@ -320,7 +356,7 @@ int blk_attach_dev(BlockBackend *blk, void *dev)
    }
    blk_ref(blk);
    blk->dev = dev;
-    bdrv_iostatus_reset(blk->bs);
+    blk_iostatus_reset(blk);
    return 0;
 }

@@ -347,7 +383,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
    blk->dev = NULL;
    blk->dev_ops = NULL;
    blk->dev_opaque = NULL;
-    bdrv_set_guest_block_size(blk->bs, 512);
+    blk->guest_block_size = 512;
    blk_unref(blk);
 }

@@ -452,7 +488,47 @@ void blk_dev_resize_cb(BlockBackend *blk)

 void blk_iostatus_enable(BlockBackend *blk)
 {
-    bdrv_iostatus_enable(blk->bs);
+    blk->iostatus_enabled = true;
+    blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+}
+
+/* The I/O status is only enabled if the drive explicitly
+ * enables it _and_ the VM is configured to stop on errors */
+bool blk_iostatus_is_enabled(const BlockBackend *blk)
+{
+    return (blk->iostatus_enabled &&
+           (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+            blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
+            blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
+}
+
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
+{
+    return blk->iostatus;
+}
+
+void blk_iostatus_disable(BlockBackend *blk)
+{
+    blk->iostatus_enabled = false;
+}
+
+void blk_iostatus_reset(BlockBackend *blk)
+{
+    if (blk_iostatus_is_enabled(blk)) {
+        blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+        if (blk->bs && blk->bs->job) {
+            block_job_iostatus_reset(blk->bs->job);
+        }
+    }
+}
+
+void blk_iostatus_set_err(BlockBackend *blk, int error)
+{
+    assert(blk_iostatus_is_enabled(blk));
+    if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+        blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+                                          BLOCK_DEVICE_IO_STATUS_FAILED;
+    }
 }

 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
@@ -464,7 +540,7 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
        return -EIO;
    }

-    if (!blk_is_inserted(blk)) {
+    if (!blk_is_available(blk)) {
        return -ENOMEDIUM;
    }

@@ -558,6 +634,7 @@ static BlockAIOCB *abort_aio_request(BlockBackend *blk, BlockCompletionFunc *cb,
    QEMUBH *bh;

    acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
+    acb->blk = blk;
    acb->ret = ret;

    bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
@@ -602,16 +679,28 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)

 int64_t blk_getlength(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
    return bdrv_getlength(blk->bs);
 }

 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
 {
-    bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+    if (!blk->bs) {
+        *nb_sectors_ptr = 0;
+    } else {
+        bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+    }
 }

 int64_t blk_nb_sectors(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
    return bdrv_nb_sectors(blk->bs);
 }

@@ -642,6 +731,10 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
                          BlockCompletionFunc *cb, void *opaque)
 {
+    if (!blk_is_available(blk)) {
+        return abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+    }
+
    return bdrv_aio_flush(blk->bs, cb, opaque);
 }

@@ -683,12 +776,20 @@ int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)

 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
    return bdrv_ioctl(blk->bs, req, buf);
 }

 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
                          BlockCompletionFunc *cb, void *opaque)
 {
+    if (!blk_is_available(blk)) {
+        return abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+    }
+
    return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque);
 }

@@ -704,11 +805,19 @@ int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)

 int blk_co_flush(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
    return bdrv_co_flush(blk->bs);
 }

 int blk_flush(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
    return bdrv_flush(blk->bs);
 }

@@ -719,7 +828,9 @@ int blk_flush_all(void)

 void blk_drain(BlockBackend *blk)
 {
-    bdrv_drain(blk->bs);
+    if (blk->bs) {
+        bdrv_drain(blk->bs);
+    }
 }

 void blk_drain_all(void)
@@ -727,76 +838,178 @@ void blk_drain_all(void)
    bdrv_drain_all();
 }

+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+                      BlockdevOnError on_write_error)
+{
+    blk->on_read_error = on_read_error;
+    blk->on_write_error = on_write_error;
+}
+
 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
 {
-    return bdrv_get_on_error(blk->bs, is_read);
+    return is_read ? blk->on_read_error : blk->on_write_error;
 }

 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
                                      int error)
 {
-    return bdrv_get_error_action(blk->bs, is_read, error);
+    BlockdevOnError on_err = blk_get_on_error(blk, is_read);
+
+    switch (on_err) {
+    case BLOCKDEV_ON_ERROR_ENOSPC:
+        return (error == ENOSPC) ?
+               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
+    case BLOCKDEV_ON_ERROR_STOP:
+        return BLOCK_ERROR_ACTION_STOP;
+    case BLOCKDEV_ON_ERROR_REPORT:
+        return BLOCK_ERROR_ACTION_REPORT;
+    case BLOCKDEV_ON_ERROR_IGNORE:
+        return BLOCK_ERROR_ACTION_IGNORE;
+    default:
+        abort();
+    }
 }

+static void send_qmp_error_event(BlockBackend *blk,
+                                 BlockErrorAction action,
+                                 bool is_read, int error)
+{
+    IoOperationType optype;
+
+    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
+    qapi_event_send_block_io_error(blk_name(blk), optype, action,
+                                   blk_iostatus_is_enabled(blk),
+                                   error == ENOSPC, strerror(error),
+                                   &error_abort);
+}
+
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
 void blk_error_action(BlockBackend *blk, BlockErrorAction action,
                      bool is_read, int error)
 {
-    bdrv_error_action(blk->bs, action, is_read, error);
+    assert(error >= 0);
+
+    if (action == BLOCK_ERROR_ACTION_STOP) {
+        /* First set the iostatus, so that "info block" returns an iostatus
+         * that matches the events raised so far (an additional error iostatus
+         * is fine, but not a lost one).
+         */
+        blk_iostatus_set_err(blk, error);
+
+        /* Then raise the request to stop the VM and the event.
+         * qemu_system_vmstop_request_prepare has two effects.  First,
+         * it ensures that the STOP event always comes after the
+         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
+         * can observe the STOP event and do a "cont" before the STOP
+         * event is issued, the VM will not stop.  In this case, vm_start()
+         * also ensures that the STOP/RESUME pair of events is emitted.
+         */
+        qemu_system_vmstop_request_prepare();
+        send_qmp_error_event(blk, action, is_read, error);
+        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
+    } else {
+        send_qmp_error_event(blk, action, is_read, error);
+    }
 }

 int blk_is_read_only(BlockBackend *blk)
 {
-    return bdrv_is_read_only(blk->bs);
+    if (blk->bs) {
+        return bdrv_is_read_only(blk->bs);
+    } else {
+        return blk->root_state.read_only;
+    }
 }

 int blk_is_sg(BlockBackend *blk)
 {
+    if (!blk->bs) {
+        return 0;
+    }
+
    return bdrv_is_sg(blk->bs);
 }

 int blk_enable_write_cache(BlockBackend *blk)
 {
-    return bdrv_enable_write_cache(blk->bs);
+    if (blk->bs) {
+        return bdrv_enable_write_cache(blk->bs);
+    } else {
+        return !!(blk->root_state.open_flags & BDRV_O_CACHE_WB);
+    }
 }

 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
 {
-    bdrv_set_enable_write_cache(blk->bs, wce);
+    if (blk->bs) {
+        bdrv_set_enable_write_cache(blk->bs, wce);
+    } else {
+        if (wce) {
+            blk->root_state.open_flags |= BDRV_O_CACHE_WB;
+        } else {
+            blk->root_state.open_flags &= ~BDRV_O_CACHE_WB;
+        }
+    }
 }

 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
 {
+    if (!blk->bs) {
+        error_setg(errp, "Device '%s' has no medium", blk->name);
+        return;
+    }
+
    bdrv_invalidate_cache(blk->bs, errp);
 }

-int blk_is_inserted(BlockBackend *blk)
+bool blk_is_inserted(BlockBackend *blk)
 {
-    return bdrv_is_inserted(blk->bs);
+    return blk->bs && bdrv_is_inserted(blk->bs);
+}
+
+bool blk_is_available(BlockBackend *blk)
+{
+    return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
 }

 void blk_lock_medium(BlockBackend *blk, bool locked)
 {
-    bdrv_lock_medium(blk->bs, locked);
+    if (blk->bs) {
+        bdrv_lock_medium(blk->bs, locked);
+    }
 }

 void blk_eject(BlockBackend *blk, bool eject_flag)
 {
-    bdrv_eject(blk->bs, eject_flag);
+    if (blk->bs) {
+        bdrv_eject(blk->bs, eject_flag);
+    }
 }

 int blk_get_flags(BlockBackend *blk)
 {
-    return bdrv_get_flags(blk->bs);
+    if (blk->bs) {
+        return bdrv_get_flags(blk->bs);
+    } else {
+        return blk->root_state.open_flags;
+    }
 }

 int blk_get_max_transfer_length(BlockBackend *blk)
 {
-    return blk->bs->bl.max_transfer_length;
+    if (blk->bs) {
+        return blk->bs->bl.max_transfer_length;
+    } else {
+        return 0;
+    }
 }

 void blk_set_guest_block_size(BlockBackend *blk, int align)
 {
-    bdrv_set_guest_block_size(blk->bs, align);
+    blk->guest_block_size = align;
 }

 void *blk_blockalign(BlockBackend *blk, size_t size)
@@ -806,40 +1019,64 @@ void *blk_blockalign(BlockBackend *blk, size_t size)

 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
 {
+    if (!blk->bs) {
+        return false;
+    }
+
    return bdrv_op_is_blocked(blk->bs, op, errp);
 }

 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
 {
-    bdrv_op_unblock(blk->bs, op, reason);
+    if (blk->bs) {
+        bdrv_op_unblock(blk->bs, op, reason);
+    }
 }

 void blk_op_block_all(BlockBackend *blk, Error *reason)
 {
-    bdrv_op_block_all(blk->bs, reason);
+    if (blk->bs) {
+        bdrv_op_block_all(blk->bs, reason);
+    }
 }

 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
 {
-    bdrv_op_unblock_all(blk->bs, reason);
+    if (blk->bs) {
+        bdrv_op_unblock_all(blk->bs, reason);
+    }
 }

 AioContext *blk_get_aio_context(BlockBackend *blk)
 {
-    return bdrv_get_aio_context(blk->bs);
+    if (blk->bs) {
+        return bdrv_get_aio_context(blk->bs);
+    } else {
+        return qemu_get_aio_context();
+    }
+}
+
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
+{
+    BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
+    return blk_get_aio_context(blk_acb->blk);
 }

 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
 {
-    bdrv_set_aio_context(blk->bs, new_context);
+    if (blk->bs) {
+        bdrv_set_aio_context(blk->bs, new_context);
+    }
 }

 void blk_add_aio_context_notifier(BlockBackend *blk,
        void (*attached_aio_context)(AioContext *new_context, void *opaque),
        void (*detach_aio_context)(void *opaque), void *opaque)
 {
-    bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
-                                  detach_aio_context, opaque);
+    if (blk->bs) {
+        bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
+                                      detach_aio_context, opaque);
+    }
 }

 void blk_remove_aio_context_notifier(BlockBackend *blk,
@@ -848,28 +1085,36 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
                                     void (*detach_aio_context)(void *),
                                     void *opaque)
 {
-    bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
-                                     detach_aio_context, opaque);
+    if (blk->bs) {
+        bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
+                                         detach_aio_context, opaque);
+    }
 }

 void blk_add_close_notifier(BlockBackend *blk, Notifier *notify)
 {
-    bdrv_add_close_notifier(blk->bs, notify);
+    if (blk->bs) {
+        bdrv_add_close_notifier(blk->bs, notify);
+    }
 }

 void blk_io_plug(BlockBackend *blk)
 {
-    bdrv_io_plug(blk->bs);
+    if (blk->bs) {
+        bdrv_io_plug(blk->bs);
+    }
 }

 void blk_io_unplug(BlockBackend *blk)
 {
-    bdrv_io_unplug(blk->bs);
+    if (blk->bs) {
+        bdrv_io_unplug(blk->bs);
+    }
 }

 BlockAcctStats *blk_get_stats(BlockBackend *blk)
 {
-    return bdrv_get_stats(blk->bs);
+    return &blk->stats;
 }

 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
@@ -902,6 +1147,10 @@ int blk_write_compressed(BlockBackend *blk, int64_t sector_num,

 int blk_truncate(BlockBackend *blk, int64_t offset)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
    return bdrv_truncate(blk->bs, offset);
 }

@@ -918,20 +1167,67 @@ int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
                     int64_t pos, int size)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
    return bdrv_save_vmstate(blk->bs, buf, pos, size);
 }

 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
    return bdrv_load_vmstate(blk->bs, buf, pos, size);
 }

 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
    return bdrv_probe_blocksizes(blk->bs, bsz);
 }

 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
    return bdrv_probe_geometry(blk->bs, geo);
 }
+
+/*
+ * Updates the BlockBackendRootState object with data from the currently
+ * attached BlockDriverState.
+ */
+void blk_update_root_state(BlockBackend *blk)
+{
+    assert(blk->bs);
+
+    blk->root_state.open_flags    = blk->bs->open_flags;
+    blk->root_state.read_only     = blk->bs->read_only;
+    blk->root_state.detect_zeroes = blk->bs->detect_zeroes;
+
+    if (blk->root_state.throttle_group) {
+        g_free(blk->root_state.throttle_group);
+        throttle_group_unref(blk->root_state.throttle_state);
+    }
+    if (blk->bs->throttle_state) {
+        const char *name = throttle_group_get_name(blk->bs);
+        blk->root_state.throttle_group = g_strdup(name);
+        blk->root_state.throttle_state = throttle_group_incref(name);
+    } else {
+        blk->root_state.throttle_group = NULL;
+        blk->root_state.throttle_state = NULL;
+    }
+}
+
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
+{
+    return &blk->root_state;
+}
--- a/block/commit.c
+++ b/block/commit.c
@@ -17,6 +17,7 @@
 #include "block/blockjob.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"

 enum {
    /*
@@ -213,7 +214,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,

    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
        error_setg(errp, "Invalid parameter combination");
        return;
    }
--- a/block/curl.c
+++ b/block/curl.c
@@ -154,18 +154,20 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
    DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
    switch (action) {
        case CURL_POLL_IN:
-            aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
-                               NULL, state);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               curl_multi_read, NULL, state);
            break;
        case CURL_POLL_OUT:
-            aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               NULL, curl_multi_do, state);
            break;
        case CURL_POLL_INOUT:
-            aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
-                               curl_multi_do, state);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               curl_multi_read, curl_multi_do, state);
            break;
        case CURL_POLL_REMOVE:
-            aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               NULL, NULL, NULL);
            break;
    }

--- a/block/io.c
+++ b/block/io.c
@@ -23,6 +23,7 @@
 */

 #include "trace.h"
+#include "sysemu/block-backend.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
 #include "block/throttle-groups.h"
@@ -215,6 +216,8 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
 /* Check if any requests are in-flight (including throttled requests) */
 bool bdrv_requests_pending(BlockDriverState *bs)
 {
+    BdrvChild *child;
+
    if (!QLIST_EMPTY(&bs->tracked_requests)) {
        return true;
    }
@@ -224,12 +227,13 @@ bool bdrv_requests_pending(BlockDriverState *bs)
    if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
        return true;
    }
-    if (bs->file && bdrv_requests_pending(bs->file->bs)) {
-        return true;
-    }
-    if (bs->backing && bdrv_requests_pending(bs->backing->bs)) {
-        return true;
+
+    QLIST_FOREACH(child, &bs->children, next) {
+        if (bdrv_requests_pending(child->bs)) {
+            return true;
+        }
    }
+
    return false;
 }

@@ -1151,7 +1155,9 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,

    bdrv_set_dirty(bs, sector_num, nb_sectors);

-    block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
+    if (bs->wr_highest_offset < offset + bytes) {
+        bs->wr_highest_offset = offset + bytes;
+    }

    if (ret >= 0) {
        bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
@@ -1903,7 +1909,10 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
        }
    }

-    block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
+    if (bs->blk) {
+        block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
+                              num_reqs - outidx - 1);
+    }

    return outidx + 1;
 }
@@ -2618,3 +2627,20 @@ void bdrv_flush_io_queue(BlockDriverState *bs)
    }
    bdrv_start_throttled_reqs(bs);
 }
+
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+    if (!bs->quiesce_counter++) {
+        aio_disable_external(bdrv_get_aio_context(bs));
+    }
+    bdrv_drain(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+    assert(bs->quiesce_counter > 0);
+    if (--bs->quiesce_counter > 0) {
+        return;
+    }
+    aio_enable_external(bdrv_get_aio_context(bs));
+}
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -291,8 +291,8 @@ iscsi_set_events(IscsiLun *iscsilun)
    int ev = iscsi_which_events(iscsi);

    if (ev != iscsilun->events) {
-        aio_set_fd_handler(iscsilun->aio_context,
-                           iscsi_get_fd(iscsi),
+        aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
+                           false,
                           (ev & POLLIN) ? iscsi_process_read : NULL,
                           (ev & POLLOUT) ? iscsi_process_write : NULL,
                           iscsilun);
@@ -1280,9 +1280,8 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
 {
    IscsiLun *iscsilun = bs->opaque;

-    aio_set_fd_handler(iscsilun->aio_context,
-                       iscsi_get_fd(iscsilun->iscsi),
-                       NULL, NULL, NULL);
+    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
+                       false, NULL, NULL, NULL);
    iscsilun->events = 0;

    if (iscsilun->nop_timer) {
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -287,7 +287,7 @@ void laio_detach_aio_context(void *s_, AioContext *old_context)
 {
    struct qemu_laio_state *s = s_;

-    aio_set_event_notifier(old_context, &s->e, NULL);
+    aio_set_event_notifier(old_context, &s->e, false, NULL);
    qemu_bh_delete(s->completion_bh);
 }

@@ -296,7 +296,8 @@ void laio_attach_aio_context(void *s_, AioContext *new_context)
    struct qemu_laio_state *s = s_;

    s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
-    aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
+    aio_set_event_notifier(new_context, &s->e, false,
+                           qemu_laio_completion_cb);
 }

 void *laio_init(void)
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -14,6 +14,7 @@
 #include "trace.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
@@ -599,7 +600,9 @@ immediate_exit:
    g_free(s->cow_bitmap);
    g_free(s->in_flight_bitmap);
    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
-    bdrv_iostatus_disable(s->target);
+    if (s->target->blk) {
+        blk_iostatus_disable(s->target->blk);
+    }

    data = g_malloc(sizeof(*data));
    data->ret = ret;
@@ -621,7 +624,9 @@ static void mirror_iostatus_reset(BlockJob *job)
 {
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);

-    bdrv_iostatus_reset(s->target);
+    if (s->target->blk) {
+        blk_iostatus_reset(s->target->blk);
+    }
 }

 static void mirror_complete(BlockJob *job, Error **errp)
@@ -704,7 +709,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,

    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
        error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
        return;
    }
@@ -740,8 +745,10 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
        return;
    }
    bdrv_set_enable_write_cache(s->target, true);
-    bdrv_set_on_error(s->target, on_target_error, on_target_error);
-    bdrv_iostatus_enable(s->target);
+    if (s->target->blk) {
+        blk_set_on_error(s->target->blk, on_target_error, on_target_error);
+        blk_iostatus_enable(s->target->blk);
+    }
    s->common.co = qemu_coroutine_create(mirror_run);
    trace_mirror_start(bs, s, s->common.co, opaque);
    qemu_coroutine_enter(s->common.co, s);
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -124,7 +124,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
    s->send_coroutine = qemu_coroutine_self();
    aio_context = bdrv_get_aio_context(bs);

-    aio_set_fd_handler(aio_context, s->sock,
+    aio_set_fd_handler(aio_context, s->sock, false,
                       nbd_reply_ready, nbd_restart_write, bs);
    if (qiov) {
        if (!s->is_unix) {
@@ -144,7 +144,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
    } else {
        rc = nbd_send_request(s->sock, request);
    }
-    aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, bs);
+    aio_set_fd_handler(aio_context, s->sock, false,
+                       nbd_reply_ready, NULL, bs);
    s->send_coroutine = NULL;
    qemu_co_mutex_unlock(&s->send_mutex);
    return rc;
@@ -348,14 +349,15 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
 void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
    aio_set_fd_handler(bdrv_get_aio_context(bs),
-                       nbd_get_client_session(bs)->sock, NULL, NULL, NULL);
+                       nbd_get_client_session(bs)->sock,
+                       false, NULL, NULL, NULL);
 }

 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                   AioContext *new_context)
 {
    aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sock,
-                       nbd_reply_ready, NULL, bs);
+                       false, nbd_reply_ready, NULL, bs);
 }

 void nbd_client_close(BlockDriverState *bs)
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -206,24 +206,24 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
    saddr = g_new0(SocketAddress, 1);

    if (qdict_haskey(options, "path")) {
-        saddr->kind = SOCKET_ADDRESS_KIND_UNIX;
-        saddr->q_unix = g_new0(UnixSocketAddress, 1);
-        saddr->q_unix->path = g_strdup(qdict_get_str(options, "path"));
+        saddr->type = SOCKET_ADDRESS_KIND_UNIX;
+        saddr->u.q_unix = g_new0(UnixSocketAddress, 1);
+        saddr->u.q_unix->path = g_strdup(qdict_get_str(options, "path"));
        qdict_del(options, "path");
    } else {
-        saddr->kind = SOCKET_ADDRESS_KIND_INET;
-        saddr->inet = g_new0(InetSocketAddress, 1);
-        saddr->inet->host = g_strdup(qdict_get_str(options, "host"));
+        saddr->type = SOCKET_ADDRESS_KIND_INET;
+        saddr->u.inet = g_new0(InetSocketAddress, 1);
+        saddr->u.inet->host = g_strdup(qdict_get_str(options, "host"));
        if (!qdict_get_try_str(options, "port")) {
-            saddr->inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
+            saddr->u.inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
        } else {
-            saddr->inet->port = g_strdup(qdict_get_str(options, "port"));
+            saddr->u.inet->port = g_strdup(qdict_get_str(options, "port"));
        }
        qdict_del(options, "host");
        qdict_del(options, "port");
    }

-    s->client.is_unix = saddr->kind == SOCKET_ADDRESS_KIND_UNIX;
+    s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;

    *export = g_strdup(qdict_get_try_str(options, "export"));
    if (*export) {
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -63,11 +63,10 @@ static void nfs_set_events(NFSClient *client)
 {
    int ev = nfs_which_events(client->context);
    if (ev != client->events) {
-        aio_set_fd_handler(client->aio_context,
-                           nfs_get_fd(client->context),
+        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+                           false,
                           (ev & POLLIN) ? nfs_process_read : NULL,
-                           (ev & POLLOUT) ? nfs_process_write : NULL,
-                           client);
+                           (ev & POLLOUT) ? nfs_process_write : NULL, client);

    }
    client->events = ev;
@@ -242,9 +241,8 @@ static void nfs_detach_aio_context(BlockDriverState *bs)
 {
    NFSClient *client = bs->opaque;

-    aio_set_fd_handler(client->aio_context,
-                       nfs_get_fd(client->context),
-                       NULL, NULL, NULL);
+    aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+                       false, NULL, NULL, NULL);
    client->events = 0;
 }

@@ -263,9 +261,8 @@ static void nfs_client_close(NFSClient *client)
        if (client->fh) {
            nfs_close(client->context, client->fh);
        }
-        aio_set_fd_handler(client->aio_context,
-                           nfs_get_fd(client->context),
-                           NULL, NULL, NULL);
+        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+                           false, NULL, NULL, NULL);
        nfs_destroy_context(client->context);
    }
    memset(client, 0, sizeof(NFSClient));
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -301,17 +301,17 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
        info->tray_open = blk_dev_is_tray_open(blk);
    }

-    if (bdrv_iostatus_is_enabled(bs)) {
+    if (blk_iostatus_is_enabled(blk)) {
        info->has_io_status = true;
-        info->io_status = bs->iostatus;
+        info->io_status = blk_iostatus(blk);
    }

-    if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
+    if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) {
        info->has_dirty_bitmaps = true;
        info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
    }

-    if (bs->drv) {
+    if (bs && bs->drv) {
        info->has_inserted = true;
        info->inserted = bdrv_block_device_info(bs, errp);
        if (info->inserted == NULL) {
@@ -344,18 +344,22 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
    }

    s->stats = g_malloc0(sizeof(*s->stats));
-    s->stats->rd_bytes = bs->stats.nr_bytes[BLOCK_ACCT_READ];
-    s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE];
-    s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ];
-    s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE];
-    s->stats->rd_merged = bs->stats.merged[BLOCK_ACCT_READ];
-    s->stats->wr_merged = bs->stats.merged[BLOCK_ACCT_WRITE];
-    s->stats->wr_highest_offset =
-        bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE;
-    s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH];
-    s->stats->wr_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_WRITE];
-    s->stats->rd_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_READ];
-    s->stats->flush_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_FLUSH];
+    if (bs->blk) {
+        BlockAcctStats *stats = blk_get_stats(bs->blk);
+
+        s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
+        s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+        s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
+        s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
+        s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
+        s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+        s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
+        s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+        s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
+        s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+    }
+
+    s->stats->wr_highest_offset = bs->wr_highest_offset;

    if (bs->file) {
        s->has_parent = true;
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2738,18 +2738,16 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
    ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1);

    *spec_info = (ImageInfoSpecific){
-        .kind  = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
-        {
-            .qcow2 = g_new(ImageInfoSpecificQCow2, 1),
-        },
+        .type  = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
+        .u.qcow2 = g_new(ImageInfoSpecificQCow2, 1),
    };
    if (s->qcow_version == 2) {
-        *spec_info->qcow2 = (ImageInfoSpecificQCow2){
+        *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){
            .compat             = g_strdup("0.10"),
            .refcount_bits      = s->refcount_bits,
        };
    } else if (s->qcow_version == 3) {
-        *spec_info->qcow2 = (ImageInfoSpecificQCow2){
+        *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){
            .compat             = g_strdup("1.1"),
            .lazy_refcounts     = s->compatible_features &
                                  QCOW2_COMPAT_LAZY_REFCOUNTS,
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,11 +127,6 @@ do { \

 #define FTYPE_FILE   0
 #define FTYPE_CD     1
-#define FTYPE_FD     2
-
-/* if the FD is not accessed during that time (in ns), we try to
-   reopen it to see if the disk has been changed */
-#define FD_OPEN_TIMEOUT (1000000000)

 #define MAX_BLOCKSIZE	4096

@@ -141,13 +136,6 @@ typedef struct BDRVRawState {
    int open_flags;
    size_t buf_align;

-#if defined(__linux__)
-    /* linux floppy specific */
-    int64_t fd_open_time;
-    int64_t fd_error_time;
-    int fd_got_error;
-    int fd_media_changed;
-#endif
 #ifdef CONFIG_LINUX_AIO
    int use_aio;
    void *aio_ctx;
@@ -635,7 +623,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
    }
 #endif

-    if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
+    if (s->type == FTYPE_CD) {
        raw_s->open_flags |= O_NONBLOCK;
    }

@@ -2187,47 +2175,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
 }

 #if defined(__linux__)
-/* Note: we do not have a reliable method to detect if the floppy is
-   present. The current method is to try to open the floppy at every
-   I/O and to keep it opened during a few hundreds of ms. */
-static int fd_open(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int last_media_present;
-
-    if (s->type != FTYPE_FD)
-        return 0;
-    last_media_present = (s->fd >= 0);
-    if (s->fd >= 0 &&
-        (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
-        qemu_close(s->fd);
-        s->fd = -1;
-        DPRINTF("Floppy closed\n");
-    }
-    if (s->fd < 0) {
-        if (s->fd_got_error &&
-            (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
-            DPRINTF("No floppy (open delayed)\n");
-            return -EIO;
-        }
-        s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
-        if (s->fd < 0) {
-            s->fd_error_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-            s->fd_got_error = 1;
-            if (last_media_present)
-                s->fd_media_changed = 1;
-            DPRINTF("No floppy\n");
-            return -EIO;
-        }
-        DPRINTF("Floppy opened\n");
-    }
-    if (!last_media_present)
-        s->fd_media_changed = 1;
-    s->fd_open_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    s->fd_got_error = 0;
-    return 0;
-}
-
 static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
    BDRVRawState *s = bs->opaque;
@@ -2256,8 +2203,8 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }
+#endif /* linux */

-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static int fd_open(BlockDriverState *bs)
 {
    BDRVRawState *s = bs->opaque;
@@ -2267,14 +2214,6 @@ static int fd_open(BlockDriverState *bs)
        return 0;
    return -EIO;
 }
-#else /* !linux && !FreeBSD */
-
-static int fd_open(BlockDriverState *bs)
-{
-    return 0;
-}
-
-#endif /* !linux && !FreeBSD */

 static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors,
@@ -2318,14 +2257,13 @@ static int hdev_create(const char *filename, QemuOpts *opts,
    int64_t total_size = 0;
    bool has_prefix;

-    /* This function is used by all three protocol block drivers and therefore
-     * any of these three prefixes may be given.
+    /* This function is used by both protocol block drivers and therefore either
+     * of these prefixes may be given.
     * The return value has to be stored somewhere, otherwise this is an error
     * due to -Werror=unused-value. */
    has_prefix =
        strstart(filename, "host_device:", &filename) ||
-        strstart(filename, "host_cdrom:" , &filename) ||
-        strstart(filename, "host_floppy:", &filename);
+        strstart(filename, "host_cdrom:" , &filename);

    (void)has_prefix;

@@ -2405,155 +2343,6 @@ static BlockDriver bdrv_host_device = {
 #endif
 };

-#ifdef __linux__
-static void floppy_parse_filename(const char *filename, QDict *options,
-                                  Error **errp)
-{
-    /* The prefix is optional, just as for "file". */
-    strstart(filename, "host_floppy:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
-                       Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;
-
-    s->type = FTYPE_FD;
-
-    /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
-    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
-    if (ret) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        }
-        return ret;
-    }
-
-    /* close fd so that we can reopen it as needed */
-    qemu_close(s->fd);
-    s->fd = -1;
-    s->fd_media_changed = 1;
-
-    error_report("Host floppy pass-through is deprecated");
-    error_printf("Support for it will be removed in a future release.\n");
-    return 0;
-}
-
-static int floppy_probe_device(const char *filename)
-{
-    int fd, ret;
-    int prio = 0;
-    struct floppy_struct fdparam;
-    struct stat st;
-
-    if (strstart(filename, "/dev/fd", NULL) &&
-        !strstart(filename, "/dev/fdset/", NULL) &&
-        !strstart(filename, "/dev/fd/", NULL)) {
-        prio = 50;
-    }
-
-    fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
-    if (fd < 0) {
-        goto out;
-    }
-    ret = fstat(fd, &st);
-    if (ret == -1 || !S_ISBLK(st.st_mode)) {
-        goto outc;
-    }
-
-    /* Attempt to detect via a floppy specific ioctl */
-    ret = ioctl(fd, FDGETPRM, &fdparam);
-    if (ret >= 0)
-        prio = 100;
-
-outc:
-    qemu_close(fd);
-out:
-    return prio;
-}
-
-
-static int floppy_is_inserted(BlockDriverState *bs)
-{
-    return fd_open(bs) >= 0;
-}
-
-static int floppy_media_changed(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret;
-
-    /*
-     * XXX: we do not have a true media changed indication.
-     * It does not work if the floppy is changed without trying to read it.
-     */
-    fd_open(bs);
-    ret = s->fd_media_changed;
-    s->fd_media_changed = 0;
-    DPRINTF("Floppy changed=%d\n", ret);
-    return ret;
-}
-
-static void floppy_eject(BlockDriverState *bs, bool eject_flag)
-{
-    BDRVRawState *s = bs->opaque;
-    int fd;
-
-    if (s->fd >= 0) {
-        qemu_close(s->fd);
-        s->fd = -1;
-    }
-    fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
-    if (fd >= 0) {
-        if (ioctl(fd, FDEJECT, 0) < 0)
-            perror("FDEJECT");
-        qemu_close(fd);
-    }
-}
-
-static BlockDriver bdrv_host_floppy = {
-    .format_name        = "host_floppy",
-    .protocol_name      = "host_floppy",
-    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_probe_device	= floppy_probe_device,
-    .bdrv_parse_filename = floppy_parse_filename,
-    .bdrv_file_open     = floppy_open,
-    .bdrv_close         = raw_close,
-    .bdrv_reopen_prepare = raw_reopen_prepare,
-    .bdrv_reopen_commit  = raw_reopen_commit,
-    .bdrv_reopen_abort   = raw_reopen_abort,
-    .bdrv_create         = hdev_create,
-    .create_opts         = &raw_create_opts,
-
-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
-    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_refresh_limits = raw_refresh_limits,
-    .bdrv_io_plug = raw_aio_plug,
-    .bdrv_io_unplug = raw_aio_unplug,
-    .bdrv_flush_io_queue = raw_aio_flush_io_queue,
-
-    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
-
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
-    /* removable device support */
-    .bdrv_is_inserted   = floppy_is_inserted,
-    .bdrv_media_changed = floppy_media_changed,
-    .bdrv_eject         = floppy_eject,
-};
-#endif
-
 #if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static void cdrom_parse_filename(const char *filename, QDict *options,
                                 Error **errp)
@@ -2609,15 +2398,13 @@ out:
    return prio;
 }

-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
 {
    BDRVRawState *s = bs->opaque;
    int ret;

    ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
-    if (ret == CDS_DISC_OK)
-        return 1;
-    return 0;
+    return ret == CDS_DISC_OK;
 }

 static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
@@ -2743,7 +2530,7 @@ static int cdrom_reopen(BlockDriverState *bs)
    return 0;
 }

-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
 {
    return raw_getlength(bs) > 0;
 }
@@ -2831,7 +2618,6 @@ static void bdrv_file_init(void)
    bdrv_register(&bdrv_file);
    bdrv_register(&bdrv_host_device);
 #ifdef __linux__
-    bdrv_register(&bdrv_host_floppy);
    bdrv_register(&bdrv_host_cdrom);
 #endif
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -154,11 +154,6 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)
    return bdrv_truncate(bs->file->bs, offset);
 }

-static int raw_is_inserted(BlockDriverState *bs)
-{
-    return bdrv_is_inserted(bs->file->bs);
-}
-
 static int raw_media_changed(BlockDriverState *bs)
 {
    return bdrv_media_changed(bs->file->bs);
@@ -264,7 +259,6 @@ BlockDriver bdrv_raw = {
    .bdrv_refresh_limits  = &raw_refresh_limits,
    .bdrv_probe_blocksizes = &raw_probe_blocksizes,
    .bdrv_probe_geometry  = &raw_probe_geometry,
-    .bdrv_is_inserted     = &raw_is_inserted,
    .bdrv_media_changed   = &raw_media_changed,
    .bdrv_eject           = &raw_eject,
    .bdrv_lock_medium     = &raw_lock_medium,
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -651,14 +651,16 @@ static coroutine_fn void do_co_req(void *opaque)
    unsigned int *rlen = srco->rlen;

    co = qemu_coroutine_self();
-    aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       NULL, restart_co_req, co);

    ret = send_co_req(sockfd, hdr, data, wlen);
    if (ret < 0) {
        goto out;
    }

-    aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       restart_co_req, NULL, co);

    ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
    if (ret != sizeof(*hdr)) {
@@ -683,7 +685,8 @@ static coroutine_fn void do_co_req(void *opaque)
 out:
    /* there is at most one request for this sockfd, so it is safe to
     * set each handler to NULL. */
-    aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       NULL, NULL, NULL);

    srco->ret = ret;
    srco->finished = true;
@@ -735,7 +738,8 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
    BDRVSheepdogState *s = opaque;
    AIOReq *aio_req, *next;

-    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+                       NULL, NULL);
    close(s->fd);
    s->fd = -1;

@@ -938,7 +942,8 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
        return fd;
    }

-    aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, fd, false,
+                       co_read_response, NULL, s);
    return fd;
 }

@@ -1199,7 +1204,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,

    qemu_co_mutex_lock(&s->lock);
    s->co_send = qemu_coroutine_self();
-    aio_set_fd_handler(s->aio_context, s->fd,
+    aio_set_fd_handler(s->aio_context, s->fd, false,
                       co_read_response, co_write_request, s);
    socket_set_cork(s->fd, 1);

@@ -1218,7 +1223,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
    }
 out:
    socket_set_cork(s->fd, 0);
-    aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, s->fd, false,
+                       co_read_response, NULL, s);
    s->co_send = NULL;
    qemu_co_mutex_unlock(&s->lock);
 }
@@ -1368,7 +1374,8 @@ static void sd_detach_aio_context(BlockDriverState *bs)
 {
    BDRVSheepdogState *s = bs->opaque;

-    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+                       NULL, NULL);
 }

 static void sd_attach_aio_context(BlockDriverState *bs,
@@ -1377,7 +1384,8 @@ static void sd_attach_aio_context(BlockDriverState *bs,
    BDRVSheepdogState *s = bs->opaque;

    s->aio_context = new_context;
-    aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s);
+    aio_set_fd_handler(new_context, s->fd, false,
+                       co_read_response, NULL, s);
 }

 /* TODO Convert to fine grained options */
@@ -1490,7 +1498,8 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
    g_free(buf);
    return 0;
 out:
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+                       false, NULL, NULL, NULL);
    if (s->fd >= 0) {
        closesocket(s->fd);
    }
@@ -1528,7 +1537,8 @@ static void sd_reopen_commit(BDRVReopenState *state)
    BDRVSheepdogState *s = state->bs->opaque;

    if (s->fd) {
-        aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+        aio_set_fd_handler(s->aio_context, s->fd, false,
+                           NULL, NULL, NULL);
        closesocket(s->fd);
    }

@@ -1551,7 +1561,8 @@ static void sd_reopen_abort(BDRVReopenState *state)
    }

    if (re_s->fd) {
-        aio_set_fd_handler(s->aio_context, re_s->fd, NULL, NULL, NULL);
+        aio_set_fd_handler(s->aio_context, re_s->fd, false,
+                           NULL, NULL, NULL);
        closesocket(re_s->fd);
    }

@@ -1935,7 +1946,8 @@ static void sd_close(BlockDriverState *bs)
        error_report("%s, %s", sd_strerror(rsp->result), s->name);
    }

-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+                       false, NULL, NULL, NULL);
    closesocket(s->fd);
    g_free(s->host_spec);
 }
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -800,14 +800,15 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
            rd_handler, wr_handler);

    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       rd_handler, wr_handler, co);
+                       false, rd_handler, wr_handler, co);
 }

 static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
                                          BlockDriverState *bs)
 {
    DPRINTF("s->sock=%d", s->sock);
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
+                       false, NULL, NULL, NULL);
 }

 /* A non-blocking call returned EAGAIN, so yield, ensuring the
--- a/block/stream.c
+++ b/block/stream.c
@@ -16,6 +16,7 @@
 #include "block/blockjob.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"

 enum {
    /*
@@ -222,7 +223,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,

    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
        error_setg(errp, QERR_INVALID_PARAMETER, "on-error");
        return;
    }
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -33,8 +33,7 @@
 * its own locking.
 *
 * This locking is however handled internally in this file, so it's
- * mostly transparent to outside users (but see the documentation in
- * throttle_groups_lock()).
+ * transparent to outside users.
 *
 * The whole ThrottleGroup structure is private and invisible to
 * outside users, that only use it through its ThrottleState.
@@ -76,9 +75,9 @@ static QTAILQ_HEAD(, ThrottleGroup) throttle_groups =
 * created.
 *
 * @name: the name of the ThrottleGroup
- * @ret:  the ThrottleGroup
+ * @ret:  the ThrottleState member of the ThrottleGroup
 */
-static ThrottleGroup *throttle_group_incref(const char *name)
+ThrottleState *throttle_group_incref(const char *name)
 {
    ThrottleGroup *tg = NULL;
    ThrottleGroup *iter;
@@ -108,7 +107,7 @@ static ThrottleGroup *throttle_group_incref(const char *name)

    qemu_mutex_unlock(&throttle_groups_lock);

-    return tg;
+    return &tg->ts;
 }

 /* Decrease the reference count of a ThrottleGroup.
@@ -116,10 +115,12 @@ static ThrottleGroup *throttle_group_incref(const char *name)
 * When the reference count reaches zero the ThrottleGroup is
 * destroyed.
 *
- * @tg:  The ThrottleGroup to unref
+ * @ts:  The ThrottleGroup to unref, given by its ThrottleState member
 */
-static void throttle_group_unref(ThrottleGroup *tg)
+void throttle_group_unref(ThrottleState *ts)
 {
+    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+
    qemu_mutex_lock(&throttle_groups_lock);
    if (--tg->refcount == 0) {
        QTAILQ_REMOVE(&throttle_groups, tg, list);
@@ -401,7 +402,8 @@ static void write_timer_cb(void *opaque)
 void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
 {
    int i;
-    ThrottleGroup *tg = throttle_group_incref(groupname);
+    ThrottleState *ts = throttle_group_incref(groupname);
+    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
    int clock_type = QEMU_CLOCK_REALTIME;

    if (qtest_enabled()) {
@@ -409,7 +411,7 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
        clock_type = QEMU_CLOCK_VIRTUAL;
    }

-    bs->throttle_state = &tg->ts;
+    bs->throttle_state = ts;

    qemu_mutex_lock(&tg->lock);
    /* If the ThrottleGroup is new set this BlockDriverState as the token */
@@ -461,38 +463,10 @@ void throttle_group_unregister_bs(BlockDriverState *bs)
    throttle_timers_destroy(&bs->throttle_timers);
    qemu_mutex_unlock(&tg->lock);

-    throttle_group_unref(tg);
+    throttle_group_unref(&tg->ts);
    bs->throttle_state = NULL;
 }

-/* Acquire the lock of this throttling group.
- *
- * You won't normally need to use this. None of the functions from the
- * ThrottleGroup API require you to acquire the lock since all of them
- * deal with it internally.
- *
- * This should only be used in exceptional cases when you want to
- * access the protected fields of a BlockDriverState directly
- * (e.g. bdrv_swap()).
- *
- * @bs: a BlockDriverState that is member of the group
- */
-void throttle_group_lock(BlockDriverState *bs)
-{
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    qemu_mutex_lock(&tg->lock);
-}
-
-/* Release the lock of this throttling group.
- *
- * See the comments in throttle_group_lock().
- */
-void throttle_group_unlock(BlockDriverState *bs)
-{
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    qemu_mutex_unlock(&tg->lock);
-}
-
 static void throttle_groups_init(void)
 {
    qemu_mutex_init(&throttle_groups_lock);
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2161,19 +2161,19 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)
    ImageInfoList **next;

    *spec_info = (ImageInfoSpecific){
-        .kind = IMAGE_INFO_SPECIFIC_KIND_VMDK,
+        .type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
        {
            .vmdk = g_new0(ImageInfoSpecificVmdk, 1),
        },
    };

-    *spec_info->vmdk = (ImageInfoSpecificVmdk) {
+    *spec_info->u.vmdk = (ImageInfoSpecificVmdk) {
        .create_type = g_strdup(s->create_type),
        .cid = s->cid,
        .parent_cid = s->parent_cid,
    };

-    next = &spec_info->vmdk->extents;
+    next = &spec_info->u.vmdk->extents;
    for (i = 0; i < s->num_extents; i++) {
        *next = g_new0(ImageInfoList, 1);
        (*next)->value = vmdk_get_extent_info(&s->extents[i]);
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -174,7 +174,7 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
 void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
                                  AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &aio->e, NULL);
+    aio_set_event_notifier(old_context, &aio->e, false, NULL);
    aio->is_aio_context_attached = false;
 }

@@ -182,7 +182,8 @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
                                  AioContext *new_context)
 {
    aio->is_aio_context_attached = true;
-    aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb);
+    aio_set_event_notifier(new_context, &aio->e, false,
+                           win32_aio_completion_cb);
 }

 QEMUWin32AIOState *win32_aio_init(void)
--- a/blockdev.c
+++ b/blockdev.c
--- a/blockjob.c
+++ b/blockjob.c
@@ -29,6 +29,7 @@
 #include "block/block.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
 #include "qemu/coroutine.h"
@@ -354,8 +355,8 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
        job->user_paused = true;
        block_job_pause(job);
        block_job_iostatus_set_err(job, error);
-        if (bs != job->bs) {
-            bdrv_iostatus_set_err(bs, error);
+        if (bs->blk && bs != job->bs) {
+            blk_iostatus_set_err(bs->blk, error);
        }
    }
    return action;
--- a/132
+++ b/132
@@ -331,6 +331,8 @@ gtkabi=""
 gtk_gl="no"
 gnutls=""
 gnutls_hash=""
+nettle=""
+gcrypt=""
 vte=""
 virglrenderer=""
 tpm="yes"
@@ -417,9 +419,6 @@ if test "$debug_info" = "yes"; then
    LDFLAGS="-g $LDFLAGS"
 fi

-test_cflags=""
-test_libs=""
-
 # make source path absolute
 source_path=`cd "$source_path"; pwd`

@@ -1114,6 +1113,14 @@ for opt do
  ;;
  --enable-gnutls) gnutls="yes"
  ;;
+  --disable-nettle) nettle="no"
+  ;;
+  --enable-nettle) nettle="yes"
+  ;;
+  --disable-gcrypt) gcrypt="no"
+  ;;
+  --enable-gcrypt) gcrypt="yes"
+  ;;
  --enable-rdma) rdma="yes"
  ;;
  --disable-rdma) rdma="no"
@@ -1324,6 +1331,8 @@ disabled with --disable-FEATURE, default is enabled if available:
  sparse          sparse checker

  gnutls          GNUTLS cryptography support
+  nettle          nettle cryptography support
+  gcrypt          libgcrypt cryptography support
  sdl             SDL UI
  --with-sdlabi     select preferred SDL ABI 1.2 or 2.0
  gtk             gtk UI
@@ -2254,20 +2263,76 @@ else
    gnutls_hash="no"
 fi

-if test "$gnutls_gcrypt" != "no"; then
-    if has "libgcrypt-config"; then
+
+# If user didn't give a --disable/enable-gcrypt flag,
+# then mark as disabled if user requested nettle
+# explicitly, or if gnutls links to nettle
+if test -z "$gcrypt"
+then
+    if test "$nettle" = "yes" || test "$gnutls_nettle" = "yes"
+    then
+        gcrypt="no"
+    fi
+fi
+
+# If user didn't give a --disable/enable-nettle flag,
+# then mark as disabled if user requested gcrypt
+# explicitly, or if gnutls links to gcrypt
+if test -z "$nettle"
+then
+    if test "$gcrypt" = "yes" || test "$gnutls_gcrypt" = "yes"
+    then
+        nettle="no"
+    fi
+fi
+
+has_libgcrypt_config() {
+    if ! has "libgcrypt-config"
+    then
+	return 1
+    fi
+
+    if test -n "$cross_prefix"
+    then
+	host=`libgcrypt-config --host`
+	if test "$host-" != $cross_prefix
+	then
+	    return 1
+	fi
+    fi
+
+    return 0
+}
+
+if test "$gcrypt" != "no"; then
+    if has_libgcrypt_config; then
        gcrypt_cflags=`libgcrypt-config --cflags`
        gcrypt_libs=`libgcrypt-config --libs`
+        # Debian has remove -lgpg-error from libgcrypt-config
+        # as it "spreads unnecessary dependencies" which in
+        # turn breaks static builds...
+        if test "$static" = "yes"
+        then
+            gcrypt_libs="$gcrypt_libs -lgpg-error"
+        fi
        libs_softmmu="$gcrypt_libs $libs_softmmu"
        libs_tools="$gcrypt_libs $libs_tools"
        QEMU_CFLAGS="$QEMU_CFLAGS $gcrypt_cflags"
+        gcrypt="yes"
+        if test -z "$nettle"; then
+           nettle="no"
+        fi
    else
-        feature_not_found "gcrypt" "Install gcrypt devel"
+        if test "$gcrypt" = "yes"; then
+            feature_not_found "gcrypt" "Install gcrypt devel"
+        else
+            gcrypt="no"
+        fi
    fi
 fi


-if test "$gnutls_nettle" != "no"; then
+if test "$nettle" != "no"; then
    if $pkg_config --exists "nettle"; then
        nettle_cflags=`$pkg_config --cflags nettle`
        nettle_libs=`$pkg_config --libs nettle`
@@ -2275,20 +2340,30 @@ if test "$gnutls_nettle" != "no"; then
        libs_softmmu="$nettle_libs $libs_softmmu"
        libs_tools="$nettle_libs $libs_tools"
        QEMU_CFLAGS="$QEMU_CFLAGS $nettle_cflags"
+        nettle="yes"
    else
-        feature_not_found "nettle" "Install nettle devel"
+        if test "$nettle" = "yes"; then
+            feature_not_found "nettle" "Install nettle devel"
+        else
+            nettle="no"
+        fi
    fi
 fi

+if test "$gcrypt" = "yes" && test "$nettle" = "yes"
+then
+    error_exit "Only one of gcrypt & nettle can be enabled"
+fi
+
 ##########################################
 # libtasn1 - only for the TLS creds/session test suite

 tasn1=yes
+tasn1_cflags=""
+tasn1_libs=""
 if $pkg_config --exists "libtasn1"; then
    tasn1_cflags=`$pkg_config --cflags libtasn1`
    tasn1_libs=`$pkg_config --libs libtasn1`
-    test_cflags="$test_cflags $tasn1_cflags"
-    test_libs="$test_libs $tasn1_libs"
 else
    tasn1=no
 fi
@@ -3211,25 +3286,11 @@ fi
 libs_softmmu="$libs_softmmu $fdt_libs"

 ##########################################
-# opengl probe (for sdl2, milkymist-tmu2)
-
-# GLX probe, used by milkymist-tmu2
-# this is temporary, code will be switched to egl mid-term.
-cat > $TMPC << EOF
-#include <X11/Xlib.h>
-#include <GL/gl.h>
-#include <GL/glx.h>
-int main(void) { glBegin(0); glXQueryVersion(0,0,0); return 0; }
-EOF
-if compile_prog "" "-lGL -lX11" ; then
-  have_glx=yes
-else
-  have_glx=no
-fi
+# opengl probe (for sdl2, gtk, milkymist-tmu2)

 if test "$opengl" != "no" ; then
-  opengl_pkgs="gl glesv2 epoxy egl"
-  if $pkg_config $opengl_pkgs x11 && test "$have_glx" = "yes"; then
+  opengl_pkgs="epoxy"
+  if $pkg_config $opengl_pkgs x11; then
    opengl_cflags="$($pkg_config --cflags $opengl_pkgs) $x11_cflags"
    opengl_libs="$($pkg_config --libs $opengl_pkgs) $x11_libs"
    opengl=yes
@@ -4433,6 +4494,7 @@ if test "$want_tools" = "yes" ; then
  tools="qemu-img\$(EXESUF) qemu-io\$(EXESUF) $tools"
  if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then
    tools="qemu-nbd\$(EXESUF) $tools"
+    tools="ivshmem-client\$(EXESUF) ivshmem-server\$(EXESUF) $tools"
  fi
 fi
 if test "$softmmu" = yes ; then
@@ -4621,8 +4683,8 @@ echo "GTK support       $gtk"
 echo "GTK GL support    $gtk_gl"
 echo "GNUTLS support    $gnutls"
 echo "GNUTLS hash       $gnutls_hash"
-echo "GNUTLS gcrypt     $gnutls_gcrypt"
-echo "GNUTLS nettle     $gnutls_nettle ${gnutls_nettle+($nettle_version)}"
+echo "libgcrypt         $gcrypt"
+echo "nettle            $nettle ${nettle+($nettle_version)}"
 echo "libtasn1          $tasn1"
 echo "VTE support       $vte"
 echo "curses support    $curses"
@@ -4991,11 +5053,11 @@ fi
 if test "$gnutls_hash" = "yes" ; then
  echo "CONFIG_GNUTLS_HASH=y" >> $config_host_mak
 fi
-if test "$gnutls_gcrypt" = "yes" ; then
-  echo "CONFIG_GNUTLS_GCRYPT=y" >> $config_host_mak
+if test "$gcrypt" = "yes" ; then
+  echo "CONFIG_GCRYPT=y" >> $config_host_mak
 fi
-if test "$gnutls_nettle" = "yes" ; then
-  echo "CONFIG_GNUTLS_NETTLE=y" >> $config_host_mak
+if test "$nettle" = "yes" ; then
+  echo "CONFIG_NETTLE=y" >> $config_host_mak
  echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak
 fi
 if test "$tasn1" = "yes" ; then
@@ -5330,8 +5392,8 @@ echo "EXESUF=$EXESUF" >> $config_host_mak
 echo "DSOSUF=$DSOSUF" >> $config_host_mak
 echo "LDFLAGS_SHARED=$LDFLAGS_SHARED" >> $config_host_mak
 echo "LIBS_QGA+=$libs_qga" >> $config_host_mak
-echo "TEST_LIBS=$test_libs" >> $config_host_mak
-echo "TEST_CFLAGS=$test_cflags" >> $config_host_mak
+echo "TASN1_LIBS=$tasn1_libs" >> $config_host_mak
+echo "TASN1_CFLAGS=$tasn1_cflags" >> $config_host_mak
 echo "POD2MAN=$POD2MAN" >> $config_host_mak
 echo "TRANSLATE_OPT_CFLAGS=$TRANSLATE_OPT_CFLAGS" >> $config_host_mak
 if test "$gcov" = "yes" ; then
--- a/contrib/ivshmem-client/Makefile.objs
+++ b/contrib/ivshmem-client/Makefile.objs
@@ -0,0 +1 @@
+ivshmem-client-obj-y = ivshmem-client.o main.o
--- a/contrib/ivshmem-client/ivshmem-client.c
+++ b/contrib/ivshmem-client/ivshmem-client.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "qemu-common.h"
+#include "qemu/queue.h"
+
+#include "ivshmem-client.h"
+
+/* log a message on stdout if verbose=1 */
+#define IVSHMEM_CLIENT_DEBUG(client, fmt, ...) do { \
+        if ((client)->verbose) {         \
+            printf(fmt, ## __VA_ARGS__); \
+        }                                \
+    } while (0)
+
+/* read message from the unix socket */
+static int
+ivshmem_client_read_one_msg(IvshmemClient *client, int64_t *index, int *fd)
+{
+    int ret;
+    struct msghdr msg;
+    struct iovec iov[1];
+    union {
+        struct cmsghdr cmsg;
+        char control[CMSG_SPACE(sizeof(int))];
+    } msg_control;
+    struct cmsghdr *cmsg;
+
+    iov[0].iov_base = index;
+    iov[0].iov_len = sizeof(*index);
+
+    memset(&msg, 0, sizeof(msg));
+    msg.msg_iov = iov;
+    msg.msg_iovlen = 1;
+    msg.msg_control = &msg_control;
+    msg.msg_controllen = sizeof(msg_control);
+
+    ret = recvmsg(client->sock_fd, &msg, 0);
+    if (ret < sizeof(*index)) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot read message: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+    if (ret == 0) {
+        IVSHMEM_CLIENT_DEBUG(client, "lost connection to server\n");
+        return -1;
+    }
+
+    *index = GINT64_FROM_LE(*index);
+    *fd = -1;
+
+    for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+
+        if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)) ||
+            cmsg->cmsg_level != SOL_SOCKET ||
+            cmsg->cmsg_type != SCM_RIGHTS) {
+            continue;
+        }
+
+        memcpy(fd, CMSG_DATA(cmsg), sizeof(*fd));
+    }
+
+    return 0;
+}
+
+/* free a peer when the server advertises a disconnection or when the
+ * client is freed */
+static void
+ivshmem_client_free_peer(IvshmemClient *client, IvshmemClientPeer *peer)
+{
+    unsigned vector;
+
+    QTAILQ_REMOVE(&client->peer_list, peer, next);
+    for (vector = 0; vector < peer->vectors_count; vector++) {
+        close(peer->vectors[vector]);
+    }
+
+    g_free(peer);
+}
+
+/* handle message coming from server (new peer, new vectors) */
+static int
+ivshmem_client_handle_server_msg(IvshmemClient *client)
+{
+    IvshmemClientPeer *peer;
+    int64_t peer_id;
+    int ret, fd;
+
+    ret = ivshmem_client_read_one_msg(client, &peer_id, &fd);
+    if (ret < 0) {
+        return -1;
+    }
+
+    /* can return a peer or the local client */
+    peer = ivshmem_client_search_peer(client, peer_id);
+
+    /* delete peer */
+    if (fd == -1) {
+
+        if (peer == NULL || peer == &client->local) {
+            IVSHMEM_CLIENT_DEBUG(client, "receive delete for invalid "
+                                 "peer %" PRId64 "\n", peer_id);
+            return -1;
+        }
+
+        IVSHMEM_CLIENT_DEBUG(client, "delete peer id = %" PRId64 "\n", peer_id);
+        ivshmem_client_free_peer(client, peer);
+        return 0;
+    }
+
+    /* new peer */
+    if (peer == NULL) {
+        peer = g_malloc0(sizeof(*peer));
+        peer->id = peer_id;
+        peer->vectors_count = 0;
+        QTAILQ_INSERT_TAIL(&client->peer_list, peer, next);
+        IVSHMEM_CLIENT_DEBUG(client, "new peer id = %" PRId64 "\n", peer_id);
+    }
+
+    /* new vector */
+    IVSHMEM_CLIENT_DEBUG(client, "  new vector %d (fd=%d) for peer id %"
+                         PRId64 "\n", peer->vectors_count, fd, peer->id);
+    if (peer->vectors_count >= G_N_ELEMENTS(peer->vectors)) {
+        IVSHMEM_CLIENT_DEBUG(client, "Too many vectors received, failing");
+        return -1;
+    }
+
+    peer->vectors[peer->vectors_count] = fd;
+    peer->vectors_count++;
+
+    return 0;
+}
+
+/* init a new ivshmem client */
+int
+ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path,
+                    IvshmemClientNotifCb notif_cb, void *notif_arg,
+                    bool verbose)
+{
+    int ret;
+    unsigned i;
+
+    memset(client, 0, sizeof(*client));
+
+    ret = snprintf(client->unix_sock_path, sizeof(client->unix_sock_path),
+                   "%s", unix_sock_path);
+
+    if (ret < 0 || ret >= sizeof(client->unix_sock_path)) {
+        IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n");
+        return -1;
+    }
+
+    for (i = 0; i < IVSHMEM_CLIENT_MAX_VECTORS; i++) {
+        client->local.vectors[i] = -1;
+    }
+
+    QTAILQ_INIT(&client->peer_list);
+    client->local.id = -1;
+
+    client->notif_cb = notif_cb;
+    client->notif_arg = notif_arg;
+    client->verbose = verbose;
+    client->shm_fd = -1;
+    client->sock_fd = -1;
+
+    return 0;
+}
+
+/* create and connect to the unix socket */
+int
+ivshmem_client_connect(IvshmemClient *client)
+{
+    struct sockaddr_un sun;
+    int fd, ret;
+    int64_t tmp;
+
+    IVSHMEM_CLIENT_DEBUG(client, "connect to client %s\n",
+                         client->unix_sock_path);
+
+    client->sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (client->sock_fd < 0) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot create socket: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+
+    sun.sun_family = AF_UNIX;
+    ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
+                   client->unix_sock_path);
+    if (ret < 0 || ret >= sizeof(sun.sun_path)) {
+        IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n");
+        goto err_close;
+    }
+
+    if (connect(client->sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot connect to %s: %s\n", sun.sun_path,
+                             strerror(errno));
+        goto err_close;
+    }
+
+    /* first, we expect a protocol version */
+    if (ivshmem_client_read_one_msg(client, &tmp, &fd) < 0 ||
+        (tmp != IVSHMEM_PROTOCOL_VERSION) || fd != -1) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot read from server\n");
+        goto err_close;
+    }
+
+    /* then, we expect our index + a fd == -1 */
+    if (ivshmem_client_read_one_msg(client, &client->local.id, &fd) < 0 ||
+        client->local.id < 0 || fd != -1) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot read from server (2)\n");
+        goto err_close;
+    }
+    IVSHMEM_CLIENT_DEBUG(client, "our_id=%" PRId64 "\n", client->local.id);
+
+    /* now, we expect shared mem fd + a -1 index, note that shm fd
+     * is not used */
+    if (ivshmem_client_read_one_msg(client, &tmp, &fd) < 0 ||
+        tmp != -1 || fd < 0) {
+        if (fd >= 0) {
+            close(fd);
+        }
+        IVSHMEM_CLIENT_DEBUG(client, "cannot read from server (3)\n");
+        goto err_close;
+    }
+    client->shm_fd = fd;
+    IVSHMEM_CLIENT_DEBUG(client, "shm_fd=%d\n", fd);
+
+    return 0;
+
+err_close:
+    close(client->sock_fd);
+    client->sock_fd = -1;
+    return -1;
+}
+
+/* close connection to the server, and free all peer structures */
+void
+ivshmem_client_close(IvshmemClient *client)
+{
+    IvshmemClientPeer *peer;
+    unsigned i;
+
+    IVSHMEM_CLIENT_DEBUG(client, "close client\n");
+
+    while ((peer = QTAILQ_FIRST(&client->peer_list)) != NULL) {
+        ivshmem_client_free_peer(client, peer);
+    }
+
+    close(client->shm_fd);
+    client->shm_fd = -1;
+    close(client->sock_fd);
+    client->sock_fd = -1;
+    client->local.id = -1;
+    for (i = 0; i < IVSHMEM_CLIENT_MAX_VECTORS; i++) {
+        close(client->local.vectors[i]);
+        client->local.vectors[i] = -1;
+    }
+    client->local.vectors_count = 0;
+}
+
+/* get the fd_set according to the unix socket and peer list */
+void
+ivshmem_client_get_fds(const IvshmemClient *client, fd_set *fds, int *maxfd)
+{
+    int fd;
+    unsigned vector;
+
+    FD_SET(client->sock_fd, fds);
+    if (client->sock_fd >= *maxfd) {
+        *maxfd = client->sock_fd + 1;
+    }
+
+    for (vector = 0; vector < client->local.vectors_count; vector++) {
+        fd = client->local.vectors[vector];
+        FD_SET(fd, fds);
+        if (fd >= *maxfd) {
+            *maxfd = fd + 1;
+        }
+    }
+}
+
+/* handle events from eventfd: just print a message on notification */
+static int
+ivshmem_client_handle_event(IvshmemClient *client, const fd_set *cur, int maxfd)
+{
+    IvshmemClientPeer *peer;
+    uint64_t kick;
+    unsigned i;
+    int ret;
+
+    peer = &client->local;
+
+    for (i = 0; i < peer->vectors_count; i++) {
+        if (peer->vectors[i] >= maxfd || !FD_ISSET(peer->vectors[i], cur)) {
+            continue;
+        }
+
+        ret = read(peer->vectors[i], &kick, sizeof(kick));
+        if (ret < 0) {
+            return ret;
+        }
+        if (ret != sizeof(kick)) {
+            IVSHMEM_CLIENT_DEBUG(client, "invalid read size = %d\n", ret);
+            errno = EINVAL;
+            return -1;
+        }
+        IVSHMEM_CLIENT_DEBUG(client, "received event on fd %d vector %d: %"
+                             PRIu64 "\n", peer->vectors[i], i, kick);
+        if (client->notif_cb != NULL) {
+            client->notif_cb(client, peer, i, client->notif_arg);
+        }
+    }
+
+    return 0;
+}
+
+/* read and handle new messages on the given fd_set */
+int
+ivshmem_client_handle_fds(IvshmemClient *client, fd_set *fds, int maxfd)
+{
+    if (client->sock_fd < maxfd && FD_ISSET(client->sock_fd, fds) &&
+        ivshmem_client_handle_server_msg(client) < 0 && errno != EINTR) {
+        IVSHMEM_CLIENT_DEBUG(client, "ivshmem_client_handle_server_msg() "
+                             "failed\n");
+        return -1;
+    } else if (ivshmem_client_handle_event(client, fds, maxfd) < 0 &&
+               errno != EINTR) {
+        IVSHMEM_CLIENT_DEBUG(client, "ivshmem_client_handle_event() failed\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+/* send a notification on a vector of a peer */
+int
+ivshmem_client_notify(const IvshmemClient *client,
+                      const IvshmemClientPeer *peer, unsigned vector)
+{
+    uint64_t kick;
+    int fd;
+
+    if (vector >= peer->vectors_count) {
+        IVSHMEM_CLIENT_DEBUG(client, "invalid vector %u on peer %" PRId64 "\n",
+                             vector, peer->id);
+        return -1;
+    }
+    fd = peer->vectors[vector];
+    IVSHMEM_CLIENT_DEBUG(client, "notify peer %" PRId64
+                         " on vector %d, fd %d\n", peer->id, vector, fd);
+
+    kick = 1;
+    if (write(fd, &kick, sizeof(kick)) != sizeof(kick)) {
+        fprintf(stderr, "could not write to %d: %s\n", peer->vectors[vector],
+                strerror(errno));
+        return -1;
+    }
+    return 0;
+}
+
+/* send a notification to all vectors of a peer */
+int
+ivshmem_client_notify_all_vects(const IvshmemClient *client,
+                                const IvshmemClientPeer *peer)
+{
+    unsigned vector;
+    int ret = 0;
+
+    for (vector = 0; vector < peer->vectors_count; vector++) {
+        if (ivshmem_client_notify(client, peer, vector) < 0) {
+            ret = -1;
+        }
+    }
+
+    return ret;
+}
+
+/* send a notification to all peers */
+int
+ivshmem_client_notify_broadcast(const IvshmemClient *client)
+{
+    IvshmemClientPeer *peer;
+    int ret = 0;
+
+    QTAILQ_FOREACH(peer, &client->peer_list, next) {
+        if (ivshmem_client_notify_all_vects(client, peer) < 0) {
+            ret = -1;
+        }
+    }
+
+    return ret;
+}
+
+/* lookup peer from its id */
+IvshmemClientPeer *
+ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id)
+{
+    IvshmemClientPeer *peer;
+
+    if (peer_id == client->local.id) {
+        return &client->local;
+    }
+
+    QTAILQ_FOREACH(peer, &client->peer_list, next) {
+        if (peer->id == peer_id) {
+            return peer;
+        }
+    }
+    return NULL;
+}
+
+/* dump our info, the list of peers their vectors on stdout */
+void
+ivshmem_client_dump(const IvshmemClient *client)
+{
+    const IvshmemClientPeer *peer;
+    unsigned vector;
+
+    /* dump local infos */
+    peer = &client->local;
+    printf("our_id = %" PRId64 "\n", peer->id);
+    for (vector = 0; vector < peer->vectors_count; vector++) {
+        printf("  vector %d is enabled (fd=%d)\n", vector,
+               peer->vectors[vector]);
+    }
+
+    /* dump peers */
+    QTAILQ_FOREACH(peer, &client->peer_list, next) {
+        printf("peer_id = %" PRId64 "\n", peer->id);
+
+        for (vector = 0; vector < peer->vectors_count; vector++) {
+            printf("  vector %d is enabled (fd=%d)\n", vector,
+                   peer->vectors[vector]);
+        }
+    }
+}
--- a/contrib/ivshmem-client/ivshmem-client.h
+++ b/contrib/ivshmem-client/ivshmem-client.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef _IVSHMEM_CLIENT_H_
+#define _IVSHMEM_CLIENT_H_
+
+/**
+ * This file provides helper to implement an ivshmem client. It is used
+ * on the host to ask QEMU to send an interrupt to an ivshmem PCI device in a
+ * guest. QEMU also implements an ivshmem client similar to this one, they both
+ * connect to an ivshmem server.
+ *
+ * A standalone ivshmem client based on this file is provided for debug/test
+ * purposes.
+ */
+
+#include <limits.h>
+#include <sys/select.h>
+
+#include "qemu/queue.h"
+#include "hw/misc/ivshmem.h"
+
+/**
+ * Maximum number of notification vectors supported by the client
+ */
+#define IVSHMEM_CLIENT_MAX_VECTORS 64
+
+/**
+ * Structure storing a peer
+ *
+ * Each time a client connects to an ivshmem server, it is advertised to
+ * all connected clients through the unix socket. When our ivshmem
+ * client receives a notification, it creates a IvshmemClientPeer
+ * structure to store the infos of this peer.
+ *
+ * This structure is also used to store the information of our own
+ * client in (IvshmemClient)->local.
+ */
+typedef struct IvshmemClientPeer {
+    QTAILQ_ENTRY(IvshmemClientPeer) next;    /**< next in list*/
+    int64_t id;                              /**< the id of the peer */
+    int vectors[IVSHMEM_CLIENT_MAX_VECTORS]; /**< one fd per vector */
+    unsigned vectors_count;                  /**< number of vectors */
+} IvshmemClientPeer;
+QTAILQ_HEAD(IvshmemClientPeerList, IvshmemClientPeer);
+
+typedef struct IvshmemClientPeerList IvshmemClientPeerList;
+typedef struct IvshmemClient IvshmemClient;
+
+/**
+ * Typedef of callback function used when our IvshmemClient receives a
+ * notification from a peer.
+ */
+typedef void (*IvshmemClientNotifCb)(
+    const IvshmemClient *client,
+    const IvshmemClientPeer *peer,
+    unsigned vect, void *arg);
+
+/**
+ * Structure describing an ivshmem client
+ *
+ * This structure stores all information related to our client: the name
+ * of the server unix socket, the list of peers advertised by the
+ * server, our own client information, and a pointer the notification
+ * callback function used when we receive a notification from a peer.
+ */
+struct IvshmemClient {
+    char unix_sock_path[PATH_MAX];      /**< path to unix sock */
+    int sock_fd;                        /**< unix sock filedesc */
+    int shm_fd;                         /**< shm file descriptor */
+
+    IvshmemClientPeerList peer_list;    /**< list of peers */
+    IvshmemClientPeer local;            /**< our own infos */
+
+    IvshmemClientNotifCb notif_cb;      /**< notification callback */
+    void *notif_arg;                    /**< notification argument */
+
+    bool verbose;                       /**< true to enable debug */
+};
+
+/**
+ * Initialize an ivshmem client
+ *
+ * @client:         A pointer to an uninitialized IvshmemClient structure
+ * @unix_sock_path: The pointer to the unix socket file name
+ * @notif_cb:       If not NULL, the pointer to the function to be called when
+ *                  our IvshmemClient receives a notification from a peer
+ * @notif_arg:      Opaque pointer given as-is to the notification callback
+ *                  function
+ * @verbose:        True to enable debug
+ *
+ * Returns:         0 on success, or a negative value on error
+ */
+int ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path,
+                        IvshmemClientNotifCb notif_cb, void *notif_arg,
+                        bool verbose);
+
+/**
+ * Connect to the server
+ *
+ * Connect to the server unix socket, and read the first initial
+ * messages sent by the server, giving the ID of the client and the file
+ * descriptor of the shared memory.
+ *
+ * @client: The ivshmem client
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_connect(IvshmemClient *client);
+
+/**
+ * Close connection to the server and free all peer structures
+ *
+ * @client: The ivshmem client
+ */
+void ivshmem_client_close(IvshmemClient *client);
+
+/**
+ * Fill a fd_set with file descriptors to be monitored
+ *
+ * This function will fill a fd_set with all file descriptors
+ * that must be polled (unix server socket and peers eventfd). The
+ * function will not initialize the fd_set, it is up to the caller
+ * to do this.
+ *
+ * @client: The ivshmem client
+ * @fds:    The fd_set to be updated
+ * @maxfd:  Must be set to the max file descriptor + 1 in fd_set. This value is
+ *          updated if this function adds a greater fd in fd_set.
+ */
+void ivshmem_client_get_fds(const IvshmemClient *client, fd_set *fds,
+                            int *maxfd);
+
+/**
+ * Read and handle new messages
+ *
+ * Given a fd_set filled by select(), handle incoming messages from
+ * server or peers.
+ *
+ * @client: The ivshmem client
+ * @fds:    The fd_set containing the file descriptors to be checked. Note
+ *          that file descriptors that are not related to our client are
+ *          ignored.
+ * @maxfd:  The maximum fd in fd_set, plus one.
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_handle_fds(IvshmemClient *client, fd_set *fds, int maxfd);
+
+/**
+ * Send a notification to a vector of a peer
+ *
+ * @client: The ivshmem client
+ * @peer:   The peer to be notified
+ * @vector: The number of the vector
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_notify(const IvshmemClient *client,
+                          const IvshmemClientPeer *peer, unsigned vector);
+
+/**
+ * Send a notification to all vectors of a peer
+ *
+ * @client: The ivshmem client
+ * @peer:   The peer to be notified
+ *
+ * Returns: 0 on success, or a negative value on error (at least one
+ *          notification failed)
+ */
+int ivshmem_client_notify_all_vects(const IvshmemClient *client,
+                                    const IvshmemClientPeer *peer);
+
+/**
+ * Broadcat a notification to all vectors of all peers
+ *
+ * @client: The ivshmem client
+ *
+ * Returns: 0 on success, or a negative value on error (at least one
+ *          notification failed)
+ */
+int ivshmem_client_notify_broadcast(const IvshmemClient *client);
+
+/**
+ * Search a peer from its identifier
+ *
+ * Return the peer structure from its peer_id. If the given peer_id is
+ * the local id, the function returns the local peer structure.
+ *
+ * @client:  The ivshmem client
+ * @peer_id: The identifier of the peer structure
+ *
+ * Returns:  The peer structure, or NULL if not found
+ */
+IvshmemClientPeer *
+ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id);
+
+/**
+ * Dump information of this ivshmem client on stdout
+ *
+ * Dump the id and the vectors of the given ivshmem client and the list
+ * of its peers and their vectors on stdout.
+ *
+ * @client: The ivshmem client
+ */
+void ivshmem_client_dump(const IvshmemClient *client);
+
+#endif /* _IVSHMEM_CLIENT_H_ */
--- a/contrib/ivshmem-client/main.c
+++ b/contrib/ivshmem-client/main.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu-common.h"
+
+#include "ivshmem-client.h"
+
+#define IVSHMEM_CLIENT_DEFAULT_VERBOSE        0
+#define IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
+
+typedef struct IvshmemClientArgs {
+    bool verbose;
+    const char *unix_sock_path;
+} IvshmemClientArgs;
+
+/* show ivshmem_client_usage and exit with given error code */
+static void
+ivshmem_client_usage(const char *name, int code)
+{
+    fprintf(stderr, "%s [opts]\n", name);
+    fprintf(stderr, "  -h: show this help\n");
+    fprintf(stderr, "  -v: verbose mode\n");
+    fprintf(stderr, "  -S <unix_sock_path>: path to the unix socket\n"
+                    "     to connect to.\n"
+                    "     default=%s\n", IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH);
+    exit(code);
+}
+
+/* parse the program arguments, exit on error */
+static void
+ivshmem_client_parse_args(IvshmemClientArgs *args, int argc, char *argv[])
+{
+    int c;
+
+    while ((c = getopt(argc, argv,
+                       "h"  /* help */
+                       "v"  /* verbose */
+                       "S:" /* unix_sock_path */
+                      )) != -1) {
+
+        switch (c) {
+        case 'h': /* help */
+            ivshmem_client_usage(argv[0], 0);
+            break;
+
+        case 'v': /* verbose */
+            args->verbose = 1;
+            break;
+
+        case 'S': /* unix_sock_path */
+            args->unix_sock_path = optarg;
+            break;
+
+        default:
+            ivshmem_client_usage(argv[0], 1);
+            break;
+        }
+    }
+}
+
+/* show command line help */
+static void
+ivshmem_client_cmdline_help(void)
+{
+    printf("dump: dump peers (including us)\n"
+           "int <peer> <vector>: notify one vector on a peer\n"
+           "int <peer> all: notify all vectors of a peer\n"
+           "int all: notify all vectors of all peers (excepting us)\n");
+}
+
+/* read stdin and handle commands */
+static int
+ivshmem_client_handle_stdin_command(IvshmemClient *client)
+{
+    IvshmemClientPeer *peer;
+    char buf[128];
+    char *s, *token;
+    int ret;
+    int peer_id, vector;
+
+    memset(buf, 0, sizeof(buf));
+    ret = read(0, buf, sizeof(buf) - 1);
+    if (ret < 0) {
+        return -1;
+    }
+
+    s = buf;
+    while ((token = strsep(&s, "\n\r;")) != NULL) {
+        if (!strcmp(token, "")) {
+            continue;
+        }
+        if (!strcmp(token, "?")) {
+            ivshmem_client_cmdline_help();
+        }
+        if (!strcmp(token, "help")) {
+            ivshmem_client_cmdline_help();
+        } else if (!strcmp(token, "dump")) {
+            ivshmem_client_dump(client);
+        } else if (!strcmp(token, "int all")) {
+            ivshmem_client_notify_broadcast(client);
+        } else if (sscanf(token, "int %d %d", &peer_id, &vector) == 2) {
+            peer = ivshmem_client_search_peer(client, peer_id);
+            if (peer == NULL) {
+                printf("cannot find peer_id = %d\n", peer_id);
+                continue;
+            }
+            ivshmem_client_notify(client, peer, vector);
+        } else if (sscanf(token, "int %d all", &peer_id) == 1) {
+            peer = ivshmem_client_search_peer(client, peer_id);
+            if (peer == NULL) {
+                printf("cannot find peer_id = %d\n", peer_id);
+                continue;
+            }
+            ivshmem_client_notify_all_vects(client, peer);
+        } else {
+            printf("invalid command, type help\n");
+        }
+    }
+
+    printf("cmd> ");
+    fflush(stdout);
+    return 0;
+}
+
+/* listen on stdin (command line), on unix socket (notifications of new
+ * and dead peers), and on eventfd (IRQ request) */
+static int
+ivshmem_client_poll_events(IvshmemClient *client)
+{
+    fd_set fds;
+    int ret, maxfd;
+
+    while (1) {
+
+        FD_ZERO(&fds);
+        FD_SET(0, &fds); /* add stdin in fd_set */
+        maxfd = 1;
+
+        ivshmem_client_get_fds(client, &fds, &maxfd);
+
+        ret = select(maxfd, &fds, NULL, NULL, NULL);
+        if (ret < 0) {
+            if (errno == EINTR) {
+                continue;
+            }
+
+            fprintf(stderr, "select error: %s\n", strerror(errno));
+            break;
+        }
+        if (ret == 0) {
+            continue;
+        }
+
+        if (FD_ISSET(0, &fds) &&
+            ivshmem_client_handle_stdin_command(client) < 0 && errno != EINTR) {
+            fprintf(stderr, "ivshmem_client_handle_stdin_command() failed\n");
+            break;
+        }
+
+        if (ivshmem_client_handle_fds(client, &fds, maxfd) < 0) {
+            fprintf(stderr, "ivshmem_client_handle_fds() failed\n");
+            break;
+        }
+    }
+
+    return ret;
+}
+
+/* callback when we receive a notification (just display it) */
+static void
+ivshmem_client_notification_cb(const IvshmemClient *client,
+                               const IvshmemClientPeer *peer,
+                               unsigned vect, void *arg)
+{
+    (void)client;
+    (void)arg;
+    printf("receive notification from peer_id=%" PRId64 " vector=%u\n",
+           peer->id, vect);
+}
+
+int
+main(int argc, char *argv[])
+{
+    struct sigaction sa;
+    IvshmemClient client;
+    IvshmemClientArgs args = {
+        .verbose = IVSHMEM_CLIENT_DEFAULT_VERBOSE,
+        .unix_sock_path = IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH,
+    };
+
+    /* parse arguments, will exit on error */
+    ivshmem_client_parse_args(&args, argc, argv);
+
+    /* Ignore SIGPIPE, see this link for more info:
+     * http://www.mail-archive.com/libevent-users@monkey.org/msg01606.html */
+    sa.sa_handler = SIG_IGN;
+    sa.sa_flags = 0;
+    if (sigemptyset(&sa.sa_mask) == -1 ||
+        sigaction(SIGPIPE, &sa, 0) == -1) {
+        perror("failed to ignore SIGPIPE; sigaction");
+        return 1;
+    }
+
+    ivshmem_client_cmdline_help();
+    printf("cmd> ");
+    fflush(stdout);
+
+    if (ivshmem_client_init(&client, args.unix_sock_path,
+                            ivshmem_client_notification_cb, NULL,
+                            args.verbose) < 0) {
+        fprintf(stderr, "cannot init client\n");
+        return 1;
+    }
+
+    while (1) {
+        if (ivshmem_client_connect(&client) < 0) {
+            fprintf(stderr, "cannot connect to server, retry in 1 second\n");
+            sleep(1);
+            continue;
+        }
+
+        fprintf(stdout, "listen on server socket %d\n", client.sock_fd);
+
+        if (ivshmem_client_poll_events(&client) == 0) {
+            continue;
+        }
+
+        /* disconnected from server, reset all peers */
+        fprintf(stdout, "disconnected from server\n");
+
+        ivshmem_client_close(&client);
+    }
+
+    return 0;
+}
--- a/contrib/ivshmem-server/Makefile.objs
+++ b/contrib/ivshmem-server/Makefile.objs
@@ -0,0 +1 @@
+ivshmem-server-obj-y = ivshmem-server.o main.o
--- a/contrib/ivshmem-server/ivshmem-server.c
+++ b/contrib/ivshmem-server/ivshmem-server.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu-common.h"
+#include "qemu/sockets.h"
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#ifdef CONFIG_LINUX
+#include <sys/vfs.h>
+#endif
+
+#include "ivshmem-server.h"
+
+/* log a message on stdout if verbose=1 */
+#define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \
+        if ((server)->verbose) {         \
+            printf(fmt, ## __VA_ARGS__); \
+        }                                \
+    } while (0)
+
+/** maximum size of a huge page, used by ivshmem_server_ftruncate() */
+#define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024)
+
+/** default listen backlog (number of sockets not accepted) */
+#define IVSHMEM_SERVER_LISTEN_BACKLOG 10
+
+/* send message to a client unix socket */
+static int
+ivshmem_server_send_one_msg(int sock_fd, int64_t peer_id, int fd)
+{
+    int ret;
+    struct msghdr msg;
+    struct iovec iov[1];
+    union {
+        struct cmsghdr cmsg;
+        char control[CMSG_SPACE(sizeof(int))];
+    } msg_control;
+    struct cmsghdr *cmsg;
+
+    peer_id = GINT64_TO_LE(peer_id);
+    iov[0].iov_base = &peer_id;
+    iov[0].iov_len = sizeof(peer_id);
+
+    memset(&msg, 0, sizeof(msg));
+    msg.msg_iov = iov;
+    msg.msg_iovlen = 1;
+
+    /* if fd is specified, add it in a cmsg */
+    if (fd >= 0) {
+        memset(&msg_control, 0, sizeof(msg_control));
+        msg.msg_control = &msg_control;
+        msg.msg_controllen = sizeof(msg_control);
+        cmsg = CMSG_FIRSTHDR(&msg);
+        cmsg->cmsg_level = SOL_SOCKET;
+        cmsg->cmsg_type = SCM_RIGHTS;
+        cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+        memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
+    }
+
+    ret = sendmsg(sock_fd, &msg, 0);
+    if (ret <= 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+/* free a peer when the server advertises a disconnection or when the
+ * server is freed */
+static void
+ivshmem_server_free_peer(IvshmemServer *server, IvshmemServerPeer *peer)
+{
+    unsigned vector;
+    IvshmemServerPeer *other_peer;
+
+    IVSHMEM_SERVER_DEBUG(server, "free peer %" PRId64 "\n", peer->id);
+    close(peer->sock_fd);
+    QTAILQ_REMOVE(&server->peer_list, peer, next);
+
+    /* advertise the deletion to other peers */
+    QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+        ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, -1);
+    }
+
+    for (vector = 0; vector < peer->vectors_count; vector++) {
+        event_notifier_cleanup(&peer->vectors[vector]);
+    }
+
+    g_free(peer);
+}
+
+/* send the peer id and the shm_fd just after a new client connection */
+static int
+ivshmem_server_send_initial_info(IvshmemServer *server, IvshmemServerPeer *peer)
+{
+    int ret;
+
+    /* send our protocol version first */
+    ret = ivshmem_server_send_one_msg(peer->sock_fd, IVSHMEM_PROTOCOL_VERSION,
+                                      -1);
+    if (ret < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot send version: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+
+    /* send the peer id to the client */
+    ret = ivshmem_server_send_one_msg(peer->sock_fd, peer->id, -1);
+    if (ret < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot send peer id: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+
+    /* send the shm_fd */
+    ret = ivshmem_server_send_one_msg(peer->sock_fd, -1, server->shm_fd);
+    if (ret < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot send shm fd: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+
+    return 0;
+}
+
+/* handle message on listening unix socket (new client connection) */
+static int
+ivshmem_server_handle_new_conn(IvshmemServer *server)
+{
+    IvshmemServerPeer *peer, *other_peer;
+    struct sockaddr_un unaddr;
+    socklen_t unaddr_len;
+    int newfd;
+    unsigned i;
+
+    /* accept the incoming connection */
+    unaddr_len = sizeof(unaddr);
+    newfd = qemu_accept(server->sock_fd,
+                        (struct sockaddr *)&unaddr, &unaddr_len);
+
+    if (newfd < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot accept() %s\n", strerror(errno));
+        return -1;
+    }
+
+    qemu_set_nonblock(newfd);
+    IVSHMEM_SERVER_DEBUG(server, "accept()=%d\n", newfd);
+
+    /* allocate new structure for this peer */
+    peer = g_malloc0(sizeof(*peer));
+    peer->sock_fd = newfd;
+
+    /* get an unused peer id */
+    /* XXX: this could use id allocation such as Linux IDA, or simply
+     * a free-list */
+    for (i = 0; i < G_MAXUINT16; i++) {
+        if (ivshmem_server_search_peer(server, server->cur_id) == NULL) {
+            break;
+        }
+        server->cur_id++;
+    }
+    if (i == G_MAXUINT16) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot allocate new client id\n");
+        goto fail;
+    }
+    peer->id = server->cur_id++;
+
+    /* create eventfd, one per vector */
+    peer->vectors_count = server->n_vectors;
+    for (i = 0; i < peer->vectors_count; i++) {
+        if (event_notifier_init(&peer->vectors[i], FALSE) < 0) {
+            IVSHMEM_SERVER_DEBUG(server, "cannot create eventfd\n");
+            goto fail;
+        }
+    }
+
+    /* send peer id and shm fd */
+    if (ivshmem_server_send_initial_info(server, peer) < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot send initial info\n");
+        goto fail;
+    }
+
+    /* advertise the new peer to others */
+    QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+        for (i = 0; i < peer->vectors_count; i++) {
+            ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id,
+                                        peer->vectors[i].wfd);
+        }
+    }
+
+    /* advertise the other peers to the new one */
+    QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+        for (i = 0; i < peer->vectors_count; i++) {
+            ivshmem_server_send_one_msg(peer->sock_fd, other_peer->id,
+                                        other_peer->vectors[i].wfd);
+        }
+    }
+
+    /* advertise the new peer to itself */
+    for (i = 0; i < peer->vectors_count; i++) {
+        ivshmem_server_send_one_msg(peer->sock_fd, peer->id,
+                                    event_notifier_get_fd(&peer->vectors[i]));
+    }
+
+    QTAILQ_INSERT_TAIL(&server->peer_list, peer, next);
+    IVSHMEM_SERVER_DEBUG(server, "new peer id = %" PRId64 "\n",
+                         peer->id);
+    return 0;
+
+fail:
+    while (i--) {
+        event_notifier_cleanup(&peer->vectors[i]);
+    }
+    close(newfd);
+    g_free(peer);
+    return -1;
+}
+
+/* Try to ftruncate a file to next power of 2 of shmsize.
+ * If it fails; all power of 2 above shmsize are tested until
+ * we reach the maximum huge page size. This is useful
+ * if the shm file is in a hugetlbfs that cannot be truncated to the
+ * shm_size value. */
+static int
+ivshmem_server_ftruncate(int fd, unsigned shmsize)
+{
+    int ret;
+    struct stat mapstat;
+
+    /* align shmsize to next power of 2 */
+    shmsize = pow2ceil(shmsize);
+
+    if (fstat(fd, &mapstat) != -1 && mapstat.st_size == shmsize) {
+        return 0;
+    }
+
+    while (shmsize <= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE) {
+        ret = ftruncate(fd, shmsize);
+        if (ret == 0) {
+            return ret;
+        }
+        shmsize *= 2;
+    }
+
+    return -1;
+}
+
+/* Init a new ivshmem server */
+int
+ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
+                    const char *shm_path, size_t shm_size, unsigned n_vectors,
+                    bool verbose)
+{
+    int ret;
+
+    memset(server, 0, sizeof(*server));
+    server->verbose = verbose;
+
+    ret = snprintf(server->unix_sock_path, sizeof(server->unix_sock_path),
+                   "%s", unix_sock_path);
+    if (ret < 0 || ret >= sizeof(server->unix_sock_path)) {
+        IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
+        return -1;
+    }
+    ret = snprintf(server->shm_path, sizeof(server->shm_path),
+                   "%s", shm_path);
+    if (ret < 0 || ret >= sizeof(server->shm_path)) {
+        IVSHMEM_SERVER_DEBUG(server, "could not copy shm path\n");
+        return -1;
+    }
+
+    server->shm_size = shm_size;
+    server->n_vectors = n_vectors;
+
+    QTAILQ_INIT(&server->peer_list);
+
+    return 0;
+}
+
+#ifdef CONFIG_LINUX
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+static long gethugepagesize(const char *path)
+{
+    struct statfs fs;
+    int ret;
+
+    do {
+        ret = statfs(path, &fs);
+    } while (ret != 0 && errno == EINTR);
+
+    if (ret != 0) {
+        return -1;
+    }
+
+    if (fs.f_type != HUGETLBFS_MAGIC) {
+        return -1;
+    }
+
+    return fs.f_bsize;
+}
+#endif
+
+/* open shm, create and bind to the unix socket */
+int
+ivshmem_server_start(IvshmemServer *server)
+{
+    struct sockaddr_un sun;
+    int shm_fd, sock_fd, ret;
+
+    /* open shm file */
+#ifdef CONFIG_LINUX
+    long hpagesize;
+
+    hpagesize = gethugepagesize(server->shm_path);
+    if (hpagesize < 0 && errno != ENOENT) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot stat shm file %s: %s\n",
+                             server->shm_path, strerror(errno));
+    }
+
+    if (hpagesize > 0) {
+        gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
+        IVSHMEM_SERVER_DEBUG(server, "Using hugepages: %s\n", server->shm_path);
+        shm_fd = mkstemp(filename);
+        unlink(filename);
+        g_free(filename);
+    } else
+#endif
+    {
+        IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
+                             server->shm_path);
+        shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
+    }
+
+    if (shm_fd < 0) {
+        fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path,
+                strerror(errno));
+        return -1;
+    }
+    if (ivshmem_server_ftruncate(shm_fd, server->shm_size) < 0) {
+        fprintf(stderr, "ftruncate(%s) failed: %s\n", server->shm_path,
+                strerror(errno));
+        goto err_close_shm;
+    }
+
+    IVSHMEM_SERVER_DEBUG(server, "create & bind socket %s\n",
+                         server->unix_sock_path);
+
+    /* create the unix listening socket */
+    sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (sock_fd < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot create socket: %s\n",
+                             strerror(errno));
+        goto err_close_shm;
+    }
+
+    sun.sun_family = AF_UNIX;
+    ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
+                   server->unix_sock_path);
+    if (ret < 0 || ret >= sizeof(sun.sun_path)) {
+        IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
+        goto err_close_sock;
+    }
+    if (bind(sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n", sun.sun_path,
+                             strerror(errno));
+        goto err_close_sock;
+    }
+
+    if (listen(sock_fd, IVSHMEM_SERVER_LISTEN_BACKLOG) < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "listen() failed: %s\n", strerror(errno));
+        goto err_close_sock;
+    }
+
+    server->sock_fd = sock_fd;
+    server->shm_fd = shm_fd;
+
+    return 0;
+
+err_close_sock:
+    close(sock_fd);
+err_close_shm:
+    close(shm_fd);
+    return -1;
+}
+
+/* close connections to clients, the unix socket and the shm fd */
+void
+ivshmem_server_close(IvshmemServer *server)
+{
+    IvshmemServerPeer *peer, *npeer;
+
+    IVSHMEM_SERVER_DEBUG(server, "close server\n");
+
+    QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, npeer) {
+        ivshmem_server_free_peer(server, peer);
+    }
+
+    unlink(server->unix_sock_path);
+    close(server->sock_fd);
+    close(server->shm_fd);
+    server->sock_fd = -1;
+    server->shm_fd = -1;
+}
+
+/* get the fd_set according to the unix socket and the peer list */
+void
+ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd)
+{
+    IvshmemServerPeer *peer;
+
+    if (server->sock_fd == -1) {
+        return;
+    }
+
+    FD_SET(server->sock_fd, fds);
+    if (server->sock_fd >= *maxfd) {
+        *maxfd = server->sock_fd + 1;
+    }
+
+    QTAILQ_FOREACH(peer, &server->peer_list, next) {
+        FD_SET(peer->sock_fd, fds);
+        if (peer->sock_fd >= *maxfd) {
+            *maxfd = peer->sock_fd + 1;
+        }
+    }
+}
+
+/* process incoming messages on the sockets in fd_set */
+int
+ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd)
+{
+    IvshmemServerPeer *peer, *peer_next;
+
+    if (server->sock_fd < maxfd && FD_ISSET(server->sock_fd, fds) &&
+        ivshmem_server_handle_new_conn(server) < 0 && errno != EINTR) {
+        IVSHMEM_SERVER_DEBUG(server, "ivshmem_server_handle_new_conn() "
+                             "failed\n");
+        return -1;
+    }
+
+    QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, peer_next) {
+        /* any message from a peer socket result in a close() */
+        IVSHMEM_SERVER_DEBUG(server, "peer->sock_fd=%d\n", peer->sock_fd);
+        if (peer->sock_fd < maxfd && FD_ISSET(peer->sock_fd, fds)) {
+            ivshmem_server_free_peer(server, peer);
+        }
+    }
+
+    return 0;
+}
+
+/* lookup peer from its id */
+IvshmemServerPeer *
+ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id)
+{
+    IvshmemServerPeer *peer;
+
+    QTAILQ_FOREACH(peer, &server->peer_list, next) {
+        if (peer->id == peer_id) {
+            return peer;
+        }
+    }
+    return NULL;
+}
+
+/* dump our info, the list of peers their vectors on stdout */
+void
+ivshmem_server_dump(const IvshmemServer *server)
+{
+    const IvshmemServerPeer *peer;
+    unsigned vector;
+
+    /* dump peers */
+    QTAILQ_FOREACH(peer, &server->peer_list, next) {
+        printf("peer_id = %" PRId64 "\n", peer->id);
+
+        for (vector = 0; vector < peer->vectors_count; vector++) {
+            printf("  vector %d is enabled (fd=%d)\n", vector,
+                   event_notifier_get_fd(&peer->vectors[vector]));
+        }
+    }
+}
--- a/contrib/ivshmem-server/ivshmem-server.h
+++ b/contrib/ivshmem-server/ivshmem-server.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef _IVSHMEM_SERVER_H_
+#define _IVSHMEM_SERVER_H_
+
+/**
+ * The ivshmem server is a daemon that creates a unix socket in listen
+ * mode. The ivshmem clients (qemu or ivshmem-client) connect to this
+ * unix socket. For each client, the server will create some eventfd
+ * (see EVENTFD(2)), one per vector. These fd are transmitted to all
+ * clients using the SCM_RIGHTS cmsg message. Therefore, each client is
+ * able to send a notification to another client without beeing
+ * "profixied" by the server.
+ *
+ * We use this mechanism to send interruptions between guests.
+ * qemu is able to transform an event on a eventfd into a PCI MSI-x
+ * interruption in the guest.
+ *
+ * The ivshmem server is also able to share the file descriptor
+ * associated to the ivshmem shared memory.
+ */
+
+#include <limits.h>
+#include <sys/select.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "qemu/event_notifier.h"
+#include "qemu/queue.h"
+#include "hw/misc/ivshmem.h"
+
+/**
+ * Maximum number of notification vectors supported by the server
+ */
+#define IVSHMEM_SERVER_MAX_VECTORS 64
+
+/**
+ * Structure storing a peer
+ *
+ * Each time a client connects to an ivshmem server, a new
+ * IvshmemServerPeer structure is created. This peer and all its
+ * vectors are advertised to all connected clients through the connected
+ * unix sockets.
+ */
+typedef struct IvshmemServerPeer {
+    QTAILQ_ENTRY(IvshmemServerPeer) next;    /**< next in list*/
+    int sock_fd;                             /**< connected unix sock */
+    int64_t id;                              /**< the id of the peer */
+    EventNotifier vectors[IVSHMEM_SERVER_MAX_VECTORS]; /**< one per vector */
+    unsigned vectors_count;                  /**< number of vectors */
+} IvshmemServerPeer;
+QTAILQ_HEAD(IvshmemServerPeerList, IvshmemServerPeer);
+
+typedef struct IvshmemServerPeerList IvshmemServerPeerList;
+
+/**
+ * Structure describing an ivshmem server
+ *
+ * This structure stores all information related to our server: the name
+ * of the server unix socket and the list of connected peers.
+ */
+typedef struct IvshmemServer {
+    char unix_sock_path[PATH_MAX];   /**< path to unix socket */
+    int sock_fd;                     /**< unix sock file descriptor */
+    char shm_path[PATH_MAX];         /**< path to shm */
+    size_t shm_size;                 /**< size of shm */
+    int shm_fd;                      /**< shm file descriptor */
+    unsigned n_vectors;              /**< number of vectors */
+    uint16_t cur_id;                 /**< id to be given to next client */
+    bool verbose;                    /**< true in verbose mode */
+    IvshmemServerPeerList peer_list; /**< list of peers */
+} IvshmemServer;
+
+/**
+ * Initialize an ivshmem server
+ *
+ * @server:         A pointer to an uninitialized IvshmemServer structure
+ * @unix_sock_path: The pointer to the unix socket file name
+ * @shm_path:       Path to the shared memory. The path corresponds to a POSIX
+ *                  shm name or a hugetlbfs mount point.
+ * @shm_size:       Size of shared memory
+ * @n_vectors:      Number of interrupt vectors per client
+ * @verbose:        True to enable verbose mode
+ *
+ * Returns:         0 on success, or a negative value on error
+ */
+int
+ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
+                    const char *shm_path, size_t shm_size, unsigned n_vectors,
+                    bool verbose);
+
+/**
+ * Open the shm, then create and bind to the unix socket
+ *
+ * @server: The pointer to the initialized IvshmemServer structure
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_server_start(IvshmemServer *server);
+
+/**
+ * Close the server
+ *
+ * Close connections to all clients, close the unix socket and the
+ * shared memory file descriptor. The structure remains initialized, so
+ * it is possible to call ivshmem_server_start() again after a call to
+ * ivshmem_server_close().
+ *
+ * @server: The ivshmem server
+ */
+void ivshmem_server_close(IvshmemServer *server);
+
+/**
+ * Fill a fd_set with file descriptors to be monitored
+ *
+ * This function will fill a fd_set with all file descriptors that must
+ * be polled (unix server socket and peers unix socket). The function
+ * will not initialize the fd_set, it is up to the caller to do it.
+ *
+ * @server: The ivshmem server
+ * @fds:    The fd_set to be updated
+ * @maxfd:  Must be set to the max file descriptor + 1 in fd_set. This value is
+ *          updated if this function adds a greater fd in fd_set.
+ */
+void
+ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd);
+
+/**
+ * Read and handle new messages
+ *
+ * Given a fd_set (for instance filled by a call to select()), handle
+ * incoming messages from peers.
+ *
+ * @server: The ivshmem server
+ * @fds:    The fd_set containing the file descriptors to be checked. Note that
+ *          file descriptors that are not related to our server are ignored.
+ * @maxfd:  The maximum fd in fd_set, plus one.
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd);
+
+/**
+ * Search a peer from its identifier
+ *
+ * @server:  The ivshmem server
+ * @peer_id: The identifier of the peer structure
+ *
+ * Returns:  The peer structure, or NULL if not found
+ */
+IvshmemServerPeer *
+ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id);
+
+/**
+ * Dump information of this ivshmem server and its peers on stdout
+ *
+ * @server: The ivshmem server
+ */
+void ivshmem_server_dump(const IvshmemServer *server);
+
+#endif /* _IVSHMEM_SERVER_H_ */
--- a/contrib/ivshmem-server/main.c
+++ b/contrib/ivshmem-server/main.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu-common.h"
+
+#include "ivshmem-server.h"
+
+#define IVSHMEM_SERVER_DEFAULT_VERBOSE        0
+#define IVSHMEM_SERVER_DEFAULT_FOREGROUND     0
+#define IVSHMEM_SERVER_DEFAULT_PID_FILE       "/var/run/ivshmem-server.pid"
+#define IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
+#define IVSHMEM_SERVER_DEFAULT_SHM_PATH       "ivshmem"
+#define IVSHMEM_SERVER_DEFAULT_SHM_SIZE       (4*1024*1024)
+#define IVSHMEM_SERVER_DEFAULT_N_VECTORS      1
+
+/* used to quit on signal SIGTERM */
+static int ivshmem_server_quit;
+
+/* arguments given by the user */
+typedef struct IvshmemServerArgs {
+    bool verbose;
+    bool foreground;
+    const char *pid_file;
+    const char *unix_socket_path;
+    const char *shm_path;
+    uint64_t shm_size;
+    unsigned n_vectors;
+} IvshmemServerArgs;
+
+/* show ivshmem_server_usage and exit with given error code */
+static void
+ivshmem_server_usage(const char *name, int code)
+{
+    fprintf(stderr, "%s [opts]\n", name);
+    fprintf(stderr, "  -h: show this help\n");
+    fprintf(stderr, "  -v: verbose mode\n");
+    fprintf(stderr, "  -F: foreground mode (default is to daemonize)\n");
+    fprintf(stderr, "  -p <pid_file>: path to the PID file (used in daemon\n"
+                    "     mode only).\n"
+                    "     Default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
+    fprintf(stderr, "  -S <unix_socket_path>: path to the unix socket\n"
+                    "     to listen to.\n"
+                    "     Default=%s\n", IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH);
+    fprintf(stderr, "  -m <shm_path>: path to the shared memory.\n"
+                    "     The path corresponds to a POSIX shm name or a\n"
+                    "     hugetlbfs mount point.\n"
+                    "     default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
+    fprintf(stderr, "  -l <size>: size of shared memory in bytes. The suffix\n"
+                    "     K, M and G can be used (ex: 1K means 1024).\n"
+                    "     default=%u\n", IVSHMEM_SERVER_DEFAULT_SHM_SIZE);
+    fprintf(stderr, "  -n <n_vects>: number of vectors.\n"
+                    "     default=%u\n", IVSHMEM_SERVER_DEFAULT_N_VECTORS);
+
+    exit(code);
+}
+
+/* parse the program arguments, exit on error */
+static void
+ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
+{
+    int c;
+    unsigned long long v;
+    Error *errp = NULL;
+
+    while ((c = getopt(argc, argv,
+                       "h"  /* help */
+                       "v"  /* verbose */
+                       "F"  /* foreground */
+                       "p:" /* pid_file */
+                       "S:" /* unix_socket_path */
+                       "m:" /* shm_path */
+                       "l:" /* shm_size */
+                       "n:" /* n_vectors */
+                      )) != -1) {
+
+        switch (c) {
+        case 'h': /* help */
+            ivshmem_server_usage(argv[0], 0);
+            break;
+
+        case 'v': /* verbose */
+            args->verbose = 1;
+            break;
+
+        case 'F': /* foreground */
+            args->foreground = 1;
+            break;
+
+        case 'p': /* pid_file */
+            args->pid_file = optarg;
+            break;
+
+        case 'S': /* unix_socket_path */
+            args->unix_socket_path = optarg;
+            break;
+
+        case 'm': /* shm_path */
+            args->shm_path = optarg;
+            break;
+
+        case 'l': /* shm_size */
+            parse_option_size("shm_size", optarg, &args->shm_size, &errp);
+            if (errp) {
+                fprintf(stderr, "cannot parse shm size: %s\n",
+                        error_get_pretty(errp));
+                error_free(errp);
+                ivshmem_server_usage(argv[0], 1);
+            }
+            break;
+
+        case 'n': /* n_vectors */
+            if (parse_uint_full(optarg, &v, 0) < 0) {
+                fprintf(stderr, "cannot parse n_vectors\n");
+                ivshmem_server_usage(argv[0], 1);
+            }
+            args->n_vectors = v;
+            break;
+
+        default:
+            ivshmem_server_usage(argv[0], 1);
+            break;
+        }
+    }
+
+    if (args->n_vectors > IVSHMEM_SERVER_MAX_VECTORS) {
+        fprintf(stderr, "too many requested vectors (max is %d)\n",
+                IVSHMEM_SERVER_MAX_VECTORS);
+        ivshmem_server_usage(argv[0], 1);
+    }
+
+    if (args->verbose == 1 && args->foreground == 0) {
+        fprintf(stderr, "cannot use verbose in daemon mode\n");
+        ivshmem_server_usage(argv[0], 1);
+    }
+}
+
+/* wait for events on listening server unix socket and connected client
+ * sockets */
+static int
+ivshmem_server_poll_events(IvshmemServer *server)
+{
+    fd_set fds;
+    int ret = 0, maxfd;
+
+    while (!ivshmem_server_quit) {
+
+        FD_ZERO(&fds);
+        maxfd = 0;
+        ivshmem_server_get_fds(server, &fds, &maxfd);
+
+        ret = select(maxfd, &fds, NULL, NULL, NULL);
+
+        if (ret < 0) {
+            if (errno == EINTR) {
+                continue;
+            }
+
+            fprintf(stderr, "select error: %s\n", strerror(errno));
+            break;
+        }
+        if (ret == 0) {
+            continue;
+        }
+
+        if (ivshmem_server_handle_fds(server, &fds, maxfd) < 0) {
+            fprintf(stderr, "ivshmem_server_handle_fds() failed\n");
+            break;
+        }
+    }
+
+    return ret;
+}
+
+static void
+ivshmem_server_quit_cb(int signum)
+{
+    ivshmem_server_quit = 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+    IvshmemServer server;
+    struct sigaction sa, sa_quit;
+    IvshmemServerArgs args = {
+        .verbose = IVSHMEM_SERVER_DEFAULT_VERBOSE,
+        .foreground = IVSHMEM_SERVER_DEFAULT_FOREGROUND,
+        .pid_file = IVSHMEM_SERVER_DEFAULT_PID_FILE,
+        .unix_socket_path = IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH,
+        .shm_path = IVSHMEM_SERVER_DEFAULT_SHM_PATH,
+        .shm_size = IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
+        .n_vectors = IVSHMEM_SERVER_DEFAULT_N_VECTORS,
+    };
+    int ret = 1;
+
+    /* parse arguments, will exit on error */
+    ivshmem_server_parse_args(&args, argc, argv);
+
+    /* Ignore SIGPIPE, see this link for more info:
+     * http://www.mail-archive.com/libevent-users@monkey.org/msg01606.html */
+    sa.sa_handler = SIG_IGN;
+    sa.sa_flags = 0;
+    if (sigemptyset(&sa.sa_mask) == -1 ||
+        sigaction(SIGPIPE, &sa, 0) == -1) {
+        perror("failed to ignore SIGPIPE; sigaction");
+        goto err;
+    }
+
+    sa_quit.sa_handler = ivshmem_server_quit_cb;
+    sa_quit.sa_flags = 0;
+    if (sigemptyset(&sa_quit.sa_mask) == -1 ||
+        sigaction(SIGTERM, &sa_quit, 0) == -1) {
+        perror("failed to add SIGTERM handler; sigaction");
+        goto err;
+    }
+
+    /* init the ivshms structure */
+    if (ivshmem_server_init(&server, args.unix_socket_path, args.shm_path,
+                            args.shm_size, args.n_vectors, args.verbose) < 0) {
+        fprintf(stderr, "cannot init server\n");
+        goto err;
+    }
+
+    /* start the ivshmem server (open shm & unix socket) */
+    if (ivshmem_server_start(&server) < 0) {
+        fprintf(stderr, "cannot bind\n");
+        goto err;
+    }
+
+    /* daemonize if asked to */
+    if (!args.foreground) {
+        FILE *fp;
+
+        if (qemu_daemon(1, 1) < 0) {
+            fprintf(stderr, "cannot daemonize: %s\n", strerror(errno));
+            goto err_close;
+        }
+
+        /* write pid file */
+        fp = fopen(args.pid_file, "w");
+        if (fp == NULL) {
+            fprintf(stderr, "cannot write pid file: %s\n", strerror(errno));
+            goto err_close;
+        }
+
+        fprintf(fp, "%d\n", (int) getpid());
+        fclose(fp);
+    }
+
+    ivshmem_server_poll_events(&server);
+    fprintf(stdout, "server disconnected\n");
+    ret = 0;
+
+err_close:
+    ivshmem_server_close(&server);
+err:
+    return ret;
+}
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -477,7 +477,8 @@ int cpu_exec(CPUState *cpu)
                /* see if we can patch the calling TB. When the TB
                   spans two pages, we cannot safely do a direct
                   jump. */
-                if (next_tb != 0 && tb->page_addr[1] == -1) {
+                if (next_tb != 0 && tb->page_addr[1] == -1
+                    && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
                    tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
                                next_tb & TB_EXIT_MASK, tb);
                }
--- a/crypto/cipher-builtin.c
+++ b/crypto/cipher-builtin.c
@@ -25,8 +25,7 @@ typedef struct QCryptoCipherBuiltinAES QCryptoCipherBuiltinAES;
 struct QCryptoCipherBuiltinAES {
    AES_KEY encrypt_key;
    AES_KEY decrypt_key;
-    uint8_t *iv;
-    size_t niv;
+    uint8_t iv[AES_BLOCK_SIZE];
 };
 typedef struct QCryptoCipherBuiltinDESRFB QCryptoCipherBuiltinDESRFB;
 struct QCryptoCipherBuiltinDESRFB {
@@ -40,6 +39,7 @@ struct QCryptoCipherBuiltin {
        QCryptoCipherBuiltinAES aes;
        QCryptoCipherBuiltinDESRFB desrfb;
    } state;
+    size_t blocksize;
    void (*free)(QCryptoCipher *cipher);
    int (*setiv)(QCryptoCipher *cipher,
                 const uint8_t *iv, size_t niv,
@@ -61,7 +61,6 @@ static void qcrypto_cipher_free_aes(QCryptoCipher *cipher)
 {
    QCryptoCipherBuiltin *ctxt = cipher->opaque;

-    g_free(ctxt->state.aes.iv);
    g_free(ctxt);
    cipher->opaque = NULL;
 }
@@ -145,15 +144,13 @@ static int qcrypto_cipher_setiv_aes(QCryptoCipher *cipher,
                                     Error **errp)
 {
    QCryptoCipherBuiltin *ctxt = cipher->opaque;
-    if (niv != 16) {
-        error_setg(errp, "IV must be 16 bytes not %zu", niv);
+    if (niv != AES_BLOCK_SIZE) {
+        error_setg(errp, "IV must be %d bytes not %zu",
+                   AES_BLOCK_SIZE, niv);
        return -1;
    }

-    g_free(ctxt->state.aes.iv);
-    ctxt->state.aes.iv = g_new0(uint8_t, niv);
-    memcpy(ctxt->state.aes.iv, iv, niv);
-    ctxt->state.aes.niv = niv;
+    memcpy(ctxt->state.aes.iv, iv, AES_BLOCK_SIZE);

    return 0;
 }
@@ -185,6 +182,7 @@ static int qcrypto_cipher_init_aes(QCryptoCipher *cipher,
        goto error;
    }

+    ctxt->blocksize = AES_BLOCK_SIZE;
    ctxt->free = qcrypto_cipher_free_aes;
    ctxt->setiv = qcrypto_cipher_setiv_aes;
    ctxt->encrypt = qcrypto_cipher_encrypt_aes;
@@ -286,6 +284,7 @@ static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
    memcpy(ctxt->state.desrfb.key, key, nkey);
    ctxt->state.desrfb.nkey = nkey;

+    ctxt->blocksize = 8;
    ctxt->free = qcrypto_cipher_free_des_rfb;
    ctxt->setiv = qcrypto_cipher_setiv_des_rfb;
    ctxt->encrypt = qcrypto_cipher_encrypt_des_rfb;
@@ -374,6 +373,12 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
 {
    QCryptoCipherBuiltin *ctxt = cipher->opaque;

+    if (len % ctxt->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctxt->blocksize);
+        return -1;
+    }
+
    return ctxt->encrypt(cipher, in, out, len, errp);
 }

@@ -386,6 +391,12 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
 {
    QCryptoCipherBuiltin *ctxt = cipher->opaque;

+    if (len % ctxt->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctxt->blocksize);
+        return -1;
+    }
+
    return ctxt->decrypt(cipher, in, out, len, errp);
 }

--- a/crypto/cipher-gcrypt.c
+++ b/crypto/cipher-gcrypt.c
@@ -34,6 +34,11 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
    }
 }

+typedef struct QCryptoCipherGcrypt QCryptoCipherGcrypt;
+struct QCryptoCipherGcrypt {
+    gcry_cipher_hd_t handle;
+    size_t blocksize;
+};

 QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
                                  QCryptoCipherMode mode,
@@ -41,7 +46,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
                                  Error **errp)
 {
    QCryptoCipher *cipher;
-    gcry_cipher_hd_t handle;
+    QCryptoCipherGcrypt *ctx;
    gcry_error_t err;
    int gcryalg, gcrymode;

@@ -87,7 +92,9 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
    cipher->alg = alg;
    cipher->mode = mode;

-    err = gcry_cipher_open(&handle, gcryalg, gcrymode, 0);
+    ctx = g_new0(QCryptoCipherGcrypt, 1);
+
+    err = gcry_cipher_open(&ctx->handle, gcryalg, gcrymode, 0);
    if (err != 0) {
        error_setg(errp, "Cannot initialize cipher: %s",
                   gcry_strerror(err));
@@ -100,10 +107,12 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
         * bizarre RFB variant of DES :-)
         */
        uint8_t *rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey);
-        err = gcry_cipher_setkey(handle, rfbkey, nkey);
+        err = gcry_cipher_setkey(ctx->handle, rfbkey, nkey);
        g_free(rfbkey);
+        ctx->blocksize = 8;
    } else {
-        err = gcry_cipher_setkey(handle, key, nkey);
+        err = gcry_cipher_setkey(ctx->handle, key, nkey);
+        ctx->blocksize = 16;
    }
    if (err != 0) {
        error_setg(errp, "Cannot set key: %s",
@@ -111,11 +120,12 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        goto error;
    }

-    cipher->opaque = handle;
+    cipher->opaque = ctx;
    return cipher;

 error:
-    gcry_cipher_close(handle);
+    gcry_cipher_close(ctx->handle);
+    g_free(ctx);
    g_free(cipher);
    return NULL;
 }
@@ -123,12 +133,13 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,

 void qcrypto_cipher_free(QCryptoCipher *cipher)
 {
-    gcry_cipher_hd_t handle;
+    QCryptoCipherGcrypt *ctx;
    if (!cipher) {
        return;
    }
-    handle = cipher->opaque;
-    gcry_cipher_close(handle);
+    ctx = cipher->opaque;
+    gcry_cipher_close(ctx->handle);
+    g_free(ctx);
    g_free(cipher);
 }

@@ -139,10 +150,16 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
                           size_t len,
                           Error **errp)
 {
-    gcry_cipher_hd_t handle = cipher->opaque;
+    QCryptoCipherGcrypt *ctx = cipher->opaque;
    gcry_error_t err;

-    err = gcry_cipher_encrypt(handle,
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
+    err = gcry_cipher_encrypt(ctx->handle,
                              out, len,
                              in, len);
    if (err != 0) {
@@ -161,10 +178,16 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
                           size_t len,
                           Error **errp)
 {
-    gcry_cipher_hd_t handle = cipher->opaque;
+    QCryptoCipherGcrypt *ctx = cipher->opaque;
    gcry_error_t err;

-    err = gcry_cipher_decrypt(handle,
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
+    err = gcry_cipher_decrypt(ctx->handle,
                              out, len,
                              in, len);
    if (err != 0) {
@@ -180,11 +203,17 @@ int qcrypto_cipher_setiv(QCryptoCipher *cipher,
                         const uint8_t *iv, size_t niv,
                         Error **errp)
 {
-    gcry_cipher_hd_t handle = cipher->opaque;
+    QCryptoCipherGcrypt *ctx = cipher->opaque;
    gcry_error_t err;

-    gcry_cipher_reset(handle);
-    err = gcry_cipher_setiv(handle, iv, niv);
+    if (niv != ctx->blocksize) {
+        error_setg(errp, "Expected IV size %zu not %zu",
+                   ctx->blocksize, niv);
+        return -1;
+    }
+
+    gcry_cipher_reset(ctx->handle);
+    err = gcry_cipher_setiv(ctx->handle, iv, niv);
    if (err != 0) {
        error_setg(errp, "Cannot set IV: %s",
                   gcry_strerror(err));
--- a/crypto/cipher-nettle.c
+++ b/crypto/cipher-nettle.c
@@ -69,7 +69,7 @@ struct QCryptoCipherNettle {
    nettle_cipher_func *alg_encrypt;
    nettle_cipher_func *alg_decrypt;
    uint8_t *iv;
-    size_t niv;
+    size_t blocksize;
 };

 bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
@@ -125,7 +125,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        ctx->alg_encrypt = des_encrypt_wrapper;
        ctx->alg_decrypt = des_decrypt_wrapper;

-        ctx->niv = DES_BLOCK_SIZE;
+        ctx->blocksize = DES_BLOCK_SIZE;
        break;

    case QCRYPTO_CIPHER_ALG_AES_128:
@@ -140,14 +140,14 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        ctx->alg_encrypt = aes_encrypt_wrapper;
        ctx->alg_decrypt = aes_decrypt_wrapper;

-        ctx->niv = AES_BLOCK_SIZE;
+        ctx->blocksize = AES_BLOCK_SIZE;
        break;
    default:
        error_setg(errp, "Unsupported cipher algorithm %d", alg);
        goto error;
    }

-    ctx->iv = g_new0(uint8_t, ctx->niv);
+    ctx->iv = g_new0(uint8_t, ctx->blocksize);
    cipher->opaque = ctx;

    return cipher;
@@ -184,6 +184,12 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
 {
    QCryptoCipherNettle *ctx = cipher->opaque;

+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
    switch (cipher->mode) {
    case QCRYPTO_CIPHER_MODE_ECB:
        ctx->alg_encrypt(ctx->ctx_encrypt, len, out, in);
@@ -191,7 +197,7 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,

    case QCRYPTO_CIPHER_MODE_CBC:
        cbc_encrypt(ctx->ctx_encrypt, ctx->alg_encrypt,
-                    ctx->niv, ctx->iv,
+                    ctx->blocksize, ctx->iv,
                    len, out, in);
        break;
    default:
@@ -211,6 +217,12 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
 {
    QCryptoCipherNettle *ctx = cipher->opaque;

+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
    switch (cipher->mode) {
    case QCRYPTO_CIPHER_MODE_ECB:
        ctx->alg_decrypt(ctx->ctx_decrypt ? ctx->ctx_decrypt : ctx->ctx_encrypt,
@@ -219,7 +231,7 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,

    case QCRYPTO_CIPHER_MODE_CBC:
        cbc_decrypt(ctx->ctx_decrypt ? ctx->ctx_decrypt : ctx->ctx_encrypt,
-                    ctx->alg_decrypt, ctx->niv, ctx->iv,
+                    ctx->alg_decrypt, ctx->blocksize, ctx->iv,
                    len, out, in);
        break;
    default:
@@ -235,9 +247,9 @@ int qcrypto_cipher_setiv(QCryptoCipher *cipher,
                         Error **errp)
 {
    QCryptoCipherNettle *ctx = cipher->opaque;
-    if (niv != ctx->niv) {
+    if (niv != ctx->blocksize) {
        error_setg(errp, "Expected IV size %zu not %zu",
-                   ctx->niv, niv);
+                   ctx->blocksize, niv);
        return -1;
    }
    memcpy(ctx->iv, iv, niv);
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -47,7 +47,7 @@ qcrypto_cipher_validate_key_length(QCryptoCipherAlgorithm alg,
    return true;
 }

-#if defined(CONFIG_GNUTLS_GCRYPT) || defined(CONFIG_GNUTLS_NETTLE)
+#if defined(CONFIG_GCRYPT) || defined(CONFIG_NETTLE)
 static uint8_t *
 qcrypto_cipher_munge_des_rfb_key(const uint8_t *key,
                                 size_t nkey)
@@ -63,11 +63,11 @@ qcrypto_cipher_munge_des_rfb_key(const uint8_t *key,
    }
    return ret;
 }
-#endif /* CONFIG_GNUTLS_GCRYPT || CONFIG_GNUTLS_NETTLE */
+#endif /* CONFIG_GCRYPT || CONFIG_NETTLE */

-#ifdef CONFIG_GNUTLS_GCRYPT
+#ifdef CONFIG_GCRYPT
 #include "crypto/cipher-gcrypt.c"
-#elif defined CONFIG_GNUTLS_NETTLE
+#elif defined CONFIG_NETTLE
 #include "crypto/cipher-nettle.c"
 #else
 #include "crypto/cipher-builtin.c"
--- a/crypto/init.c
+++ b/crypto/init.c
@@ -24,8 +24,9 @@
 #ifdef CONFIG_GNUTLS
 #include <gnutls/gnutls.h>
 #include <gnutls/crypto.h>
+#endif

-#ifdef CONFIG_GNUTLS_GCRYPT
+#ifdef CONFIG_GCRYPT
 #include <gcrypt.h>
 #endif

@@ -37,6 +38,7 @@
 *  - When GNUTLS >= 2.12, we must not initialize gcrypt threading
 *    because GNUTLS will do that itself
 *  - When GNUTLS < 2.12 we must always initialize gcrypt threading
+ *  - When GNUTLS is disabled we must always initialize gcrypt threading
 *
 * But....
 *
@@ -47,12 +49,15 @@
 *
 *   - gcrypt < 1.6.0
 * AND
- *   - gnutls < 2.12
+ *      - gnutls < 2.12
+ *   OR
+ *      - gnutls is disabled
 *
 */

-#if (defined(CONFIG_GNUTLS_GCRYPT) &&           \
-     (!defined(GNUTLS_VERSION_NUMBER) ||        \
+#if (defined(CONFIG_GCRYPT) &&                  \
+     (!defined(CONFIG_GNUTLS) ||                \
+      !defined(GNUTLS_VERSION_NUMBER) ||       \
      (GNUTLS_VERSION_NUMBER < 0x020c00)) &&    \
     (!defined(GCRYPT_VERSION_NUMBER) ||        \
      (GCRYPT_VERSION_NUMBER < 0x010600)))
@@ -113,6 +118,7 @@ static struct gcry_thread_cbs qcrypto_gcrypt_thread_impl = {

 int qcrypto_init(Error **errp)
 {
+#ifdef CONFIG_GNUTLS
    int ret;
    ret = gnutls_global_init();
    if (ret < 0) {
@@ -125,8 +131,9 @@ int qcrypto_init(Error **errp)
    gnutls_global_set_log_level(10);
    gnutls_global_set_log_function(qcrypto_gnutls_log);
 #endif
+#endif

-#ifdef CONFIG_GNUTLS_GCRYPT
+#ifdef CONFIG_GCRYPT
    if (!gcry_check_version(GCRYPT_VERSION)) {
        error_setg(errp, "Unable to initialize gcrypt");
        return -1;
@@ -139,12 +146,3 @@ int qcrypto_init(Error **errp)

    return 0;
 }
-
-#else /* ! CONFIG_GNUTLS */
-
-int qcrypto_init(Error **errp G_GNUC_UNUSED)
-{
-    return 0;
-}
-
-#endif /* ! CONFIG_GNUTLS */
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -35,5 +35,5 @@ CONFIG_SDHCI=y
 CONFIG_EDU=y
 CONFIG_VGA=y
 CONFIG_VGA_PCI=y
-CONFIG_IVSHMEM=$(CONFIG_KVM)
+CONFIG_IVSHMEM=$(CONFIG_POSIX)
 CONFIG_ROCKER=y
--- a/disas/mips.c
+++ b/disas/mips.c
@@ -2420,9 +2420,11 @@ const struct mips_opcode mips_builtin_opcodes[] =
 {"hibernate","",        0x42000023, 0xffffffff,	0, 			0,		V1	},
 {"ins",     "t,r,+A,+B", 0x7c000004, 0xfc00003f, WR_t|RD_s,    		0,		I33	},
 {"jr",      "s",	0x00000008, 0xfc1fffff,	UBD|RD_s,		0,		I1	},
+{"jr",      "s",	0x00000009, 0xfc1fffff,	UBD|RD_s,		0,		I32R6	}, /* jalr */
 /* jr.hb is officially MIPS{32,64}R2, but it works on R1 as jr with
   the same hazard barrier effect.  */
 {"jr.hb",   "s",	0x00000408, 0xfc1fffff,	UBD|RD_s,		0,		I32	},
+{"jr.hb",   "s",	0x00000409, 0xfc1fffff,	UBD|RD_s,		0,		I32R6	}, /* jalr.hb */
 {"j",       "s",	0x00000008, 0xfc1fffff,	UBD|RD_s,		0,		I1	}, /* jr */
 /* SVR4 PIC code requires special handling for j, so it must be a
   macro.  */
--- a/docs/qapi-code-gen.txt
+++ b/docs/qapi-code-gen.txt
@@ -106,12 +106,15 @@ Types, commands, and events share a common namespace.  Therefore,
 generally speaking, type definitions should always use CamelCase for
 user-defined type names, while built-in types are lowercase. Type
 definitions should not end in 'Kind', as this namespace is used for
-creating implicit C enums for visiting union types.  Command names,
+creating implicit C enums for visiting union types, or in 'List', as
+this namespace is used for creating array types.  Command names,
 and field names within a type, should be all lower case with words
 separated by a hyphen.  However, some existing older commands and
 complex types use underscore; when extending such expressions,
 consistency is preferred over blindly avoiding underscore.  Event
-names should be ALL_CAPS with words separated by underscore.
+names should be ALL_CAPS with words separated by underscore.  Field
+names cannot start with 'has-' or 'has_', as this is reserved for
+tracking optional fields.

 Any name (command, event, type, field, or enum value) beginning with
 "x-" is marked experimental, and may be withdrawn or changed
@@ -122,9 +125,10 @@ vendor), even if the rest of the name uses dash (example:
 __com.redhat_drive-mirror).  Other than downstream extensions (with
 leading underscore and the use of dots), all names should begin with a
 letter, and contain only ASCII letters, digits, dash, and underscore.
-It is okay to reuse names that match C keywords; the generator will
-rename a field named "default" in the QAPI to "q_default" in the
-generated C code.
+Names beginning with 'q_' are reserved for the generator: QMP names
+that resemble C keywords or other problematic strings will be munged
+in C to use this prefix.  For example, a field named "default" in
+qapi becomes "q_default" in the generated C code.

 In the rest of this document, usage lines are given for each
 expression type, with literal strings written in lower case and
--- a/docs/qmp-events.txt
+++ b/docs/qmp-events.txt
@@ -28,6 +28,8 @@ Example:
    "data": { "actual": 944766976 },
    "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }

+Note: this event is rate-limited.
+
 BLOCK_IMAGE_CORRUPTED
 ---------------------

@@ -296,6 +298,8 @@ Example:
     "data": { "reference": "usr1", "sector-num": 345435, "sectors-count": 5 },
     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }

+Note: this event is rate-limited.
+
 QUORUM_REPORT_BAD
 -----------------

@@ -318,6 +322,8 @@ Example:
     "data": { "node-name": "1.raw", "sector-num": 345435, "sectors-count": 5 },
     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }

+Note: this event is rate-limited.
+
 RESET
 -----

@@ -358,6 +364,8 @@ Example:
    "data": { "offset": 78 },
    "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }

+Note: this event is rate-limited.
+
 SHUTDOWN
 --------

@@ -632,6 +640,8 @@ Example:
    "data": { "id": "channel0", "open": true },
    "timestamp": { "seconds": 1401385907, "microseconds": 422329 } }

+Note: this event is rate-limited separately for each "id".
+
 WAKEUP
 ------

@@ -662,3 +672,5 @@ Example:

 Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
 followed respectively by the RESET, SHUTDOWN, or STOP events.
+
+Note: this event is rate-limited.
--- a/docs/qmp-spec.txt
+++ b/docs/qmp-spec.txt
@@ -175,6 +175,11 @@ The format of asynchronous events is:
 For a listing of supported asynchronous events, please, refer to the
 qmp-events.txt file.

+Some events are rate-limited to at most one per second.  If additional
+"similar" events arrive within one second, all but the last one are
+dropped, and the last one is delayed.  "Similar" normally means same
+event type.  See qmp-events.txt for details.
+
 2.5 QGA Synchronization
 -----------------------

--- a/docs/specs/ivshmem_device_spec.txt
+++ b/docs/specs/ivshmem_device_spec.txt
@@ -2,30 +2,106 @@
 Device Specification for Inter-VM shared memory device
 ------------------------------------------------------

-The Inter-VM shared memory device is designed to share a region of memory to
-userspace in multiple virtual guests.  The memory region does not belong to any
-guest, but is a POSIX memory object on the host.  Optionally, the device may
-support sending interrupts to other guests sharing the same memory region.
+The Inter-VM shared memory device is designed to share a memory region (created
+on the host via the POSIX shared memory API) between multiple QEMU processes
+running different guests. In order for all guests to be able to pick up the
+shared memory area, it is modeled by QEMU as a PCI device exposing said memory
+to the guest as a PCI BAR.
+The memory region does not belong to any guest, but is a POSIX memory object on
+the host. The host can access this shared memory if needed.
+
+The device also provides an optional communication mechanism between guests
+sharing the same memory object. More details about that in the section 'Guest to
+guest communication' section.


 The Inter-VM PCI device
 -----------------------

-*BARs*
+From the VM point of view, the ivshmem PCI device supports three BARs.

-The device supports three BARs.  BAR0 is a 1 Kbyte MMIO region to support
-registers.  BAR1 is used for MSI-X when it is enabled in the device.  BAR2 is
-used to map the shared memory object from the host.  The size of BAR2 is
-specified when the guest is started and must be a power of 2 in size.
+- BAR0 is a 1 Kbyte MMIO region to support registers and interrupts when MSI is
+  not used.
+- BAR1 is used for MSI-X when it is enabled in the device.
+- BAR2 is used to access the shared memory object.

-*Registers*
+It is your choice how to use the device but you must choose between two
+behaviors :

-The device currently supports 4 registers of 32-bits each.  Registers
-are used for synchronization between guests sharing the same memory object when
-interrupts are supported (this requires using the shared memory server).
+- basically, if you only need the shared memory part, you will map BAR2.
+  This way, you have access to the shared memory in guest and can use it as you
+  see fit (memnic, for example, uses it in userland
+  http://dpdk.org/browse/memnic).

-The server assigns each VM an ID number and sends this ID number to the QEMU
-process when the guest starts.
+- BAR0 and BAR1 are used to implement an optional communication mechanism
+  through interrupts in the guests. If you need an event mechanism between the
+  guests accessing the shared memory, you will most likely want to write a
+  kernel driver that will handle interrupts. See details in the section 'Guest
+  to guest communication' section.
+
+The behavior is chosen when starting your QEMU processes:
+- no communication mechanism needed, the first QEMU to start creates the shared
+  memory on the host, subsequent QEMU processes will use it.
+
+- communication mechanism needed, an ivshmem server must be started before any
+  QEMU processes, then each QEMU process connects to the server unix socket.
+
+For more details on the QEMU ivshmem parameters, see qemu-doc documentation.
+
+
+Guest to guest communication
+----------------------------
+
+This section details the communication mechanism between the guests accessing
+the ivhsmem shared memory.
+
+*ivshmem server*
+
+This server code is available in qemu.git/contrib/ivshmem-server.
+
+The server must be started on the host before any guest.
+It creates a shared memory object then waits for clients to connect on a unix
+socket. All the messages are little-endian int64_t integer.
+
+For each client (QEMU process) that connects to the server:
+- the server sends a protocol version, if client does not support it, the client
+  closes the communication,
+- the server assigns an ID for this client and sends this ID to him as the first
+  message,
+- the server sends a fd to the shared memory object to this client,
+- the server creates a new set of host eventfds associated to the new client and
+  sends this set to all already connected clients,
+- finally, the server sends all the eventfds sets for all clients to the new
+  client.
+
+The server signals all clients when one of them disconnects.
+
+The client IDs are limited to 16 bits because of the current implementation (see
+Doorbell register in 'PCI device registers' subsection). Hence only 65536
+clients are supported.
+
+All the file descriptors (fd to the shared memory, eventfds for each client)
+are passed to clients using SCM_RIGHTS over the server unix socket.
+
+Apart from the current ivshmem implementation in QEMU, an ivshmem client has
+been provided in qemu.git/contrib/ivshmem-client for debug.
+
+*QEMU as an ivshmem client*
+
+At initialisation, when creating the ivshmem device, QEMU first receives a
+protocol version and closes communication with server if it does not match.
+Then, QEMU gets its ID from the server then makes it available through BAR0
+IVPosition register for the VM to use (see 'PCI device registers' subsection).
+QEMU then uses the fd to the shared memory to map it to BAR2.
+eventfds for all other clients received from the server are stored to implement
+BAR0 Doorbell register (see 'PCI device registers' subsection).
+Finally, eventfds assigned to this QEMU process are used to send interrupts in
+this VM.
+
+*PCI device registers*
+
+From the VM point of view, the ivshmem PCI device supports 4 registers of
+32-bits each.

 enum ivshmem_registers {
    IntrMask = 0,
@@ -49,8 +125,8 @@ bit to 0 and unmasked by setting the first bit to 1.
 IVPosition Register: The IVPosition register is read-only and reports the
 guest's ID number.  The guest IDs are non-negative integers.  When using the
 server, since the server is a separate process, the VM ID will only be set when
-the device is ready (shared memory is received from the server and accessible via
-the device).  If the device is not ready, the IVPosition will return -1.
+the device is ready (shared memory is received from the server and accessible
+via the device).  If the device is not ready, the IVPosition will return -1.
 Applications should ensure that they have a valid VM ID before accessing the
 shared memory.

@@ -59,8 +135,8 @@ Doorbell register.  The doorbell register is 32-bits, logically divided into
 two 16-bit fields.  The high 16-bits are the guest ID to interrupt and the low
 16-bits are the interrupt vector to trigger.  The semantics of the value
 written to the doorbell depends on whether the device is using MSI or a regular
-pin-based interrupt.  In short, MSI uses vectors while regular interrupts set the
-status register.
+pin-based interrupt.  In short, MSI uses vectors while regular interrupts set
+the status register.

 Regular Interrupts

@@ -71,7 +147,7 @@ interrupt in the destination guest.

 Message Signalled Interrupts

-A ivshmem device may support multiple MSI vectors.  If so, the lower 16-bits
+An ivshmem device may support multiple MSI vectors.  If so, the lower 16-bits
 written to the Doorbell register must be between 0 and the maximum number of
 vectors the guest supports.  The lower 16 bits written to the doorbell is the
 MSI vector that will be raised in the destination guest.  The number of MSI
@@ -83,14 +159,3 @@ interrupt itself should be communicated via the shared memory region.  Devices
 supporting multiple MSI vectors can use different vectors to indicate different
 events have occurred.  The semantics of interrupt vectors are left to the
 user's discretion.
-
-
-Usage in the Guest
------------------
-
-The shared memory device is intended to be used with the provided UIO driver.
-Very little configuration is needed.  The guest should map BAR0 to access the
-registers (an array of 32-bit ints allows simple writing) and map BAR2 to
-access the shared memory region itself.  The size of the shared memory region
-is specified when the guest (or shared memory server) is started.  A guest may
-map the whole shared memory region or only part of it.
--- a/hmp.c
+++ b/hmp.c
@@ -569,8 +569,8 @@ void hmp_info_vnc(Monitor *mon, const QDict *qdict)
        for (client = info->clients; client; client = client->next) {
            monitor_printf(mon, "Client:\n");
            monitor_printf(mon, "     address: %s:%s\n",
-                           client->value->base->host,
-                           client->value->base->service);
+                           client->value->host,
+                           client->value->service);
            monitor_printf(mon, "  x509_dname: %s\n",
                           client->value->x509_dname ?
                           client->value->x509_dname : "none");
@@ -638,7 +638,7 @@ void hmp_info_spice(Monitor *mon, const QDict *qdict)
        for (chan = info->channels; chan; chan = chan->next) {
            monitor_printf(mon, "Channel:\n");
            monitor_printf(mon, "     address: %s:%s%s\n",
-                           chan->value->base->host, chan->value->base->port,
+                           chan->value->host, chan->value->port,
                           chan->value->tls ? " [tls]" : "");
            monitor_printf(mon, "     session: %" PRId64 "\n",
                           chan->value->connection_id);
@@ -841,11 +841,11 @@ void hmp_info_tpm(Monitor *mon, const QDict *qdict)
                       c, TpmModel_lookup[ti->model]);

        monitor_printf(mon, "  \\ %s: type=%s",
-                       ti->id, TpmTypeOptionsKind_lookup[ti->options->kind]);
+                       ti->id, TpmTypeOptionsKind_lookup[ti->options->type]);

-        switch (ti->options->kind) {
+        switch (ti->options->type) {
        case TPM_TYPE_OPTIONS_KIND_PASSTHROUGH:
-            tpo = ti->options->passthrough;
+            tpo = ti->options->u.passthrough;
            monitor_printf(mon, "%s%s%s%s",
                           tpo->has_path ? ",path=" : "",
                           tpo->has_path ? tpo->path : "",
@@ -1735,15 +1735,15 @@ void hmp_sendkey(Monitor *mon, const QDict *qdict)
            if (*endp != '\0') {
                goto err_out;
            }
-            keylist->value->kind = KEY_VALUE_KIND_NUMBER;
-            keylist->value->number = value;
+            keylist->value->type = KEY_VALUE_KIND_NUMBER;
+            keylist->value->u.number = value;
        } else {
            int idx = index_from_key(keyname_buf);
            if (idx == Q_KEY_CODE_MAX) {
                goto err_out;
            }
-            keylist->value->kind = KEY_VALUE_KIND_QCODE;
-            keylist->value->qcode = idx;
+            keylist->value->type = KEY_VALUE_KIND_QCODE;
+            keylist->value->u.qcode = idx;
        }

        if (!separator) {
@@ -1958,12 +1958,12 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
        value = info->value;

        if (value) {
-            switch (value->kind) {
+            switch (value->type) {
            case MEMORY_DEVICE_INFO_KIND_DIMM:
-                di = value->dimm;
+                di = value->u.dimm;

                monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
-                               MemoryDeviceInfoKind_lookup[value->kind],
+                               MemoryDeviceInfoKind_lookup[value->type],
                               di->id ? di->id : "");
                monitor_printf(mon, "  addr: 0x%" PRIx64 "\n", di->addr);
                monitor_printf(mon, "  slot: %" PRId64 "\n", di->slot);
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c
@@ -238,10 +238,12 @@ void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st,

    mdev->dimm = dev;
    mdev->is_enabled = true;
-    mdev->is_inserting = true;
+    if (dev->hotplugged) {
+        mdev->is_inserting = true;

-    /* do ACPI magic */
-    acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS);
+        /* do ACPI magic */
+        acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS);
+    }
    return;
 }

--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -923,7 +923,7 @@ static void machvirt_init(MachineState *machine)
    qemu_irq pic[NUM_IRQS];
    MemoryRegion *sysmem = get_system_memory();
    int gic_version = vms->gic_version;
-    int n;
+    int n, max_cpus;
    MemoryRegion *ram = g_new(MemoryRegion, 1);
    const char *cpu_model = machine->cpu_model;
    VirtBoardInfo *vbi;
@@ -957,6 +957,22 @@ static void machvirt_init(MachineState *machine)
        exit(1);
    }

+    /* The maximum number of CPUs depends on the GIC version, or on how
+     * many redistributors we can fit into the memory map.
+     */
+    if (gic_version == 3) {
+        max_cpus = vbi->memmap[VIRT_GIC_REDIST].size / 0x20000;
+    } else {
+        max_cpus = GIC_NCPU;
+    }
+
+    if (smp_cpus > max_cpus) {
+        error_report("Number of SMP CPUs requested (%d) exceeds max CPUs "
+                     "supported by machine 'mach-virt' (%d)",
+                     smp_cpus, max_cpus);
+        exit(1);
+    }
+
    vbi->smp_cpus = smp_cpus;

    if (machine->ram_size > vbi->memmap[VIRT_MEM].size) {
@@ -1155,10 +1171,11 @@ static void virt_class_init(ObjectClass *oc, void *data)

    mc->desc = "ARM Virtual Machine",
    mc->init = machvirt_init;
-    /* Our maximum number of CPUs depends on how many redistributors
-     * we can fit into memory map
+    /* Start max_cpus at the maximum QEMU supports. We'll further restrict
+     * it later in machvirt_init, where we have more information about the
+     * configuration of the particular instance.
     */
-    mc->max_cpus = a15memmap[VIRT_GIC_REDIST].size / 0x20000;
+    mc->max_cpus = MAX_CPUMASK_BITS;
    mc->has_dynamic_sysbus = true;
    mc->block_default_type = IF_VIRTIO;
    mc->no_cdrom = 1;
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -48,6 +48,14 @@ static const int uart_intr[XLNX_ZYNQMP_NUM_UARTS] = {
    21, 22,
 };

+static const uint64_t sdhci_addr[XLNX_ZYNQMP_NUM_SDHCI] = {
+    0xFF160000, 0xFF170000,
+};
+
+static const int sdhci_intr[XLNX_ZYNQMP_NUM_SDHCI] = {
+    48, 49,
+};
+
 typedef struct XlnxZynqMPGICRegion {
    int region_index;
    uint32_t address;
@@ -97,6 +105,13 @@ static void xlnx_zynqmp_init(Object *obj)

    object_initialize(&s->sata, sizeof(s->sata), TYPE_SYSBUS_AHCI);
    qdev_set_parent_bus(DEVICE(&s->sata), sysbus_get_default());
+
+    for (i = 0; i < XLNX_ZYNQMP_NUM_SDHCI; i++) {
+        object_initialize(&s->sdhci[i], sizeof(s->sdhci[i]),
+                          TYPE_SYSBUS_SDHCI);
+        qdev_set_parent_bus(DEVICE(&s->sdhci[i]),
+                            sysbus_get_default());
+    }
 }

 static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
@@ -258,6 +273,19 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)

    sysbus_mmio_map(SYS_BUS_DEVICE(&s->sata), 0, SATA_ADDR);
    sysbus_connect_irq(SYS_BUS_DEVICE(&s->sata), 0, gic_spi[SATA_INTR]);
+
+    for (i = 0; i < XLNX_ZYNQMP_NUM_SDHCI; i++) {
+        object_property_set_bool(OBJECT(&s->sdhci[i]), true,
+                                 "realized", &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->sdhci[i]), 0,
+                        sdhci_addr[i]);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci[i]), 0,
+                           gic_spi[sdhci_intr[i]]);
+    }
 }

 static Property xlnx_zynqmp_props[] = {
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -283,7 +283,8 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)

    /* Get this show started by hooking up our callbacks */
    aio_context_acquire(s->ctx);
-    aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify);
+    aio_set_event_notifier(s->ctx, &s->host_notifier, true,
+                           handle_notify);
    aio_context_release(s->ctx);
    return;

@@ -319,7 +320,7 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
    aio_context_acquire(s->ctx);

    /* Stop notifications for new requests from guest */
-    aio_set_event_notifier(s->ctx, &s->host_notifier, NULL);
+    aio_set_event_notifier(s->ctx, &s->host_notifier, true, NULL);

    /* Drain and switch bs back to the QEMU main loop */
    blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -192,6 +192,8 @@ typedef struct FDrive {
    uint8_t ro;               /* Is read-only           */
    uint8_t media_changed;    /* Is media changed       */
    uint8_t media_rate;       /* Data rate of medium    */
+
+    bool media_inserted;      /* Is there a medium in the tray */
 } FDrive;

 static void fd_init(FDrive *drv)
@@ -261,7 +263,7 @@ static int fd_seek(FDrive *drv, uint8_t head, uint8_t track, uint8_t sect,
 #endif
        drv->head = head;
        if (drv->track != track) {
-            if (drv->blk != NULL && blk_is_inserted(drv->blk)) {
+            if (drv->media_inserted) {
                drv->media_changed = 0;
            }
            ret = 1;
@@ -270,7 +272,7 @@ static int fd_seek(FDrive *drv, uint8_t head, uint8_t track, uint8_t sect,
        drv->sect = sect;
    }

-    if (drv->blk == NULL || !blk_is_inserted(drv->blk)) {
+    if (!drv->media_inserted) {
        ret = 2;
    }

@@ -296,7 +298,7 @@ static void fd_revalidate(FDrive *drv)
        ro = blk_is_read_only(drv->blk);
        pick_geometry(drv->blk, &nb_heads, &max_track,
                      &last_sect, drv->drive, &drive, &rate);
-        if (!blk_is_inserted(drv->blk)) {
+        if (!drv->media_inserted) {
            FLOPPY_DPRINTF("No disk in drive\n");
        } else {
            FLOPPY_DPRINTF("Floppy disk (%d h %d t %d s) %s\n", nb_heads,
@@ -692,7 +694,7 @@ static bool fdrive_media_changed_needed(void *opaque)
 {
    FDrive *drive = opaque;

-    return (drive->blk != NULL && drive->media_changed != 1);
+    return (drive->media_inserted && drive->media_changed != 1);
 }

 static const VMStateDescription vmstate_fdrive_media_changed = {
@@ -2184,12 +2186,21 @@ static void fdctrl_change_cb(void *opaque, bool load)
 {
    FDrive *drive = opaque;

+    drive->media_inserted = load && drive->blk && blk_is_inserted(drive->blk);
+
    drive->media_changed = 1;
    fd_revalidate(drive);
 }

+static bool fdctrl_is_tray_open(void *opaque)
+{
+    FDrive *drive = opaque;
+    return !drive->media_inserted;
+}
+
 static const BlockDevOps fdctrl_block_ops = {
    .change_media_cb = fdctrl_change_cb,
+    .is_tray_open = fdctrl_is_tray_open,
 };

 /* Init functions */
@@ -2217,6 +2228,7 @@ static void fdctrl_connect_drives(FDCtrl *fdctrl, Error **errp)
        fdctrl_change_cb(drive, 0);
        if (drive->blk) {
            blk_set_dev_ops(drive->blk, &fdctrl_block_ops, drive);
+            drive->media_inserted = blk_is_inserted(drive->blk);
        }
    }
 }
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -798,6 +798,11 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
 static void virtio_blk_save(QEMUFile *f, void *opaque)
 {
    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+    VirtIOBlock *s = VIRTIO_BLK(vdev);
+
+    if (s->dataplane) {
+        virtio_blk_data_plane_stop(s->dataplane);
+    }

    virtio_save(vdev, f);
 }
@@ -839,10 +844,7 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
        req->next = s->rq;
        s->rq = req;

-        virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
-            req->elem.in_num, 1);
-        virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
-            req->elem.out_num, 0);
+        virtqueue_map(&req->elem);
    }

    return 0;
@@ -975,7 +977,7 @@ static Property virtio_blk_properties[] = {
    DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial),
    DEFINE_PROP_BIT("config-wce", VirtIOBlock, conf.config_wce, 0, true),
 #ifdef __linux__
-    DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, true),
+    DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, false),
 #endif
    DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0,
                    true),
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -931,9 +931,11 @@ static int blk_connect(struct XenDevice *xendev)
    blk_attach_dev_nofail(blkdev->blk, blkdev);
    blkdev->file_size = blk_getlength(blkdev->blk);
    if (blkdev->file_size < 0) {
+        BlockDriverState *bs = blk_bs(blkdev->blk);
+        const char *drv_name = bs ? bdrv_get_format_name(bs) : NULL;
        xen_be_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n",
                      (int)blkdev->file_size, strerror(-blkdev->file_size),
-                      bdrv_get_format_name(blk_bs(blkdev->blk)) ?: "-");
+                      drv_name ?: "-");
        blkdev->file_size = 0;
    }

--- a/hw/char/escc.c
+++ b/hw/char/escc.c
@@ -842,13 +842,13 @@ static void sunkbd_handle_event(DeviceState *dev, QemuConsole *src,
    ChannelState *s = (ChannelState *)dev;
    int qcode, keycode;

-    assert(evt->kind == INPUT_EVENT_KIND_KEY);
-    qcode = qemu_input_key_value_to_qcode(evt->key->key);
+    assert(evt->type == INPUT_EVENT_KIND_KEY);
+    qcode = qemu_input_key_value_to_qcode(evt->u.key->key);
    trace_escc_sunkbd_event_in(qcode, QKeyCode_lookup[qcode],
-                               evt->key->down);
+                               evt->u.key->down);

    if (qcode == Q_KEY_CODE_CAPS_LOCK) {
-        if (evt->key->down) {
+        if (evt->u.key->down) {
            s->caps_lock_mode ^= 1;
            if (s->caps_lock_mode == 2) {
                return; /* Drop second press */
@@ -862,7 +862,7 @@ static void sunkbd_handle_event(DeviceState *dev, QemuConsole *src,
    }

    if (qcode == Q_KEY_CODE_NUM_LOCK) {
-        if (evt->key->down) {
+        if (evt->u.key->down) {
            s->num_lock_mode ^= 1;
            if (s->num_lock_mode == 2) {
                return; /* Drop second press */
@@ -876,7 +876,7 @@ static void sunkbd_handle_event(DeviceState *dev, QemuConsole *src,
    }

    keycode = qcode_to_keycode[qcode];
-    if (!evt->key->down) {
+    if (!evt->u.key->down) {
        keycode |= 0x80;
    }
    trace_escc_sunkbd_event_out(keycode);
@@ -1035,6 +1035,7 @@ static void escc_class_init(ObjectClass *klass, void *data)
    dc->reset = escc_reset;
    dc->vmsd = &vmstate_escc;
    dc->props = escc_properties;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
 }

 static const TypeInfo escc_info = {
--- a/hw/char/imx_serial.c
+++ b/hw/char/imx_serial.c
@@ -22,25 +22,17 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/char.h"

-//#define DEBUG_SERIAL 1
-#ifdef DEBUG_SERIAL
-#define DPRINTF(fmt, args...) \
-do { printf("%s: " fmt , TYPE_IMX_SERIAL, ##args); } while (0)
-#else
-#define DPRINTF(fmt, args...) do {} while (0)
+#ifndef DEBUG_IMX_UART
+#define DEBUG_IMX_UART 0
 #endif

-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-//#define DEBUG_IMPLEMENTATION 1
-#ifdef DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-    do  { fprintf(stderr, "%s: " fmt, TYPE_IMX_SERIAL, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_UART) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_SERIAL, \
+                                             __func__, ##args); \
+        } \
+    } while (0)

 static const VMStateDescription vmstate_imx_serial = {
    .name = TYPE_IMX_SERIAL,
@@ -115,7 +107,8 @@ static uint64_t imx_serial_read(void *opaque, hwaddr offset,
    IMXSerialState *s = (IMXSerialState *)opaque;
    uint32_t c;

-    DPRINTF("read(offset=%x)\n", offset >> 2);
+    DPRINTF("read(offset=0x%" HWADDR_PRIx ")\n", offset);
+
    switch (offset >> 2) {
    case 0x0: /* URXD */
        c = s->readbuff;
@@ -167,7 +160,8 @@ static uint64_t imx_serial_read(void *opaque, hwaddr offset,
        return 0x0; /* TODO */

    default:
-        IPRINTF("%s: bad offset: 0x%x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
        return 0;
    }
 }
@@ -178,9 +172,8 @@ static void imx_serial_write(void *opaque, hwaddr offset,
    IMXSerialState *s = (IMXSerialState *)opaque;
    unsigned char ch;

-    DPRINTF("write(offset=%x, value = %x) to %s\n",
-            offset >> 2,
-            (unsigned int)value, s->chr ? s->chr->label : "NODEV");
+    DPRINTF("write(offset=0x%" HWADDR_PRIx ", value = 0x%x) to %s\n",
+            offset, (unsigned int)value, s->chr ? s->chr->label : "NODEV");

    switch (offset >> 2) {
    case 0x10: /* UTXD */
@@ -198,7 +191,9 @@ static void imx_serial_write(void *opaque, hwaddr offset,

    case 0x20: /* UCR1 */
        s->ucr1 = value & 0xffff;
+
        DPRINTF("write(ucr1=%x)\n", (unsigned int)value);
+
        imx_update(s);
        break;

@@ -266,12 +261,14 @@ static void imx_serial_write(void *opaque, hwaddr offset,

    case 0x2d: /* UTS1 */
    case 0x23: /* UCR4 */
-        IPRINTF("Unimplemented Register %x written to\n", offset >> 2);
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: Unimplemented reg 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
        /* TODO */
        break;

    default:
-        IPRINTF("%s: Bad offset 0x%x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
    }
 }

@@ -284,7 +281,9 @@ static int imx_can_receive(void *opaque)
 static void imx_put_data(void *opaque, uint32_t value)
 {
    IMXSerialState *s = (IMXSerialState *)opaque;
+
    DPRINTF("received char\n");
+
    s->usr1 |= USR1_RRDY;
    s->usr2 |= USR2_RDR;
    s->uts1 &= ~UTS1_RXEMPTY;
@@ -319,8 +318,7 @@ static void imx_serial_realize(DeviceState *dev, Error **errp)
        qemu_chr_add_handlers(s->chr, imx_can_receive, imx_receive,
                              imx_event, s);
    } else {
-        DPRINTF("No char dev for uart at 0x%lx\n",
-                (unsigned long)s->iomem.ram_addr);
+        DPRINTF("No char dev for uart\n");
    }
 }

--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -705,10 +705,7 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id,

                qemu_get_buffer(f, (unsigned char *)&port->elem,
                                sizeof(port->elem));
-                virtqueue_map_sg(port->elem.in_sg, port->elem.in_addr,
-                                 port->elem.in_num, 1);
-                virtqueue_map_sg(port->elem.out_sg, port->elem.out_addr,
-                                 port->elem.out_num, 1);
+                virtqueue_map(&port->elem);

                /*
                 *  Port was throttled on source machine.  Let's
--- a/hw/display/milkymist-tmu2.c
+++ b/hw/display/milkymist-tmu2.c
@@ -30,8 +30,8 @@
 #include "qemu/error-report.h"

 #include <X11/Xlib.h>
-#include <GL/gl.h>
-#include <GL/glx.h>
+#include <epoxy/gl.h>
+#include <epoxy/glx.h>

 enum {
    R_CTL = 0,
--- a/hw/gpio/imx_gpio.c
+++ b/hw/gpio/imx_gpio.c
@@ -29,11 +29,12 @@ typedef enum IMXGPIOLevel {
 } IMXGPIOLevel;

 #define DPRINTF(fmt, args...) \
-          do { \
-              if (DEBUG_IMX_GPIO) { \
-                  fprintf(stderr, "%s: " fmt , __func__, ##args); \
-              } \
-          } while (0)
+    do { \
+        if (DEBUG_IMX_GPIO) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_GPIO, \
+                                             __func__, ##args); \
+        } \
+    } while (0)

 static const char *imx_gpio_reg_name(uint32_t reg)
 {
@@ -176,19 +177,19 @@ static uint64_t imx_gpio_read(void *opaque, hwaddr offset, unsigned size)
        if (s->has_edge_sel) {
            reg_value = s->edge_sel;
        } else {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: EDGE_SEL register not "
+            qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: EDGE_SEL register not "
                          "present on this version of GPIO device\n",
                          TYPE_IMX_GPIO, __func__);
        }
        break;

    default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad register at offset %d\n",
-                      TYPE_IMX_GPIO, __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPIO, __func__, offset);
        break;
    }

-    DPRINTF("(%s) = 0x%"PRIx32"\n", imx_gpio_reg_name(offset), reg_value);
+    DPRINTF("(%s) = 0x%" PRIx32 "\n", imx_gpio_reg_name(offset), reg_value);

    return reg_value;
 }
@@ -198,7 +199,7 @@ static void imx_gpio_write(void *opaque, hwaddr offset, uint64_t value,
 {
    IMXGPIOState *s = IMX_GPIO(opaque);

-    DPRINTF("(%s, value = 0x%"PRIx32")\n", imx_gpio_reg_name(offset),
+    DPRINTF("(%s, value = 0x%" PRIx32 ")\n", imx_gpio_reg_name(offset),
            (uint32_t)value);

    switch (offset) {
@@ -238,15 +239,15 @@ static void imx_gpio_write(void *opaque, hwaddr offset, uint64_t value,
            s->edge_sel = value;
            imx_gpio_set_all_int_lines(s);
        } else {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: EDGE_SEL register not "
+            qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: EDGE_SEL register not "
                          "present on this version of GPIO device\n",
                          TYPE_IMX_GPIO, __func__);
        }
        break;

    default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad register at offset %d\n",
-                      TYPE_IMX_GPIO, __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPIO, __func__, offset);
        break;
    }

--- a/hw/i2c/imx_i2c.c
+++ b/hw/i2c/imx_i2c.c
@@ -21,13 +21,17 @@
 #include "hw/i2c/imx_i2c.h"
 #include "hw/i2c/i2c.h"

-#ifndef IMX_I2C_DEBUG
-#define IMX_I2C_DEBUG                 0
+#ifndef DEBUG_IMX_I2C
+#define DEBUG_IMX_I2C 0
 #endif

-#if IMX_I2C_DEBUG
-#define DPRINT(fmt, args...)              \
-    do { fprintf(stderr, "%s: "fmt, __func__, ## args); } while (0)
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_I2C) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_I2C, \
+                                             __func__, ##args); \
+        } \
+    } while (0)

 static const char *imx_i2c_get_regname(unsigned offset)
 {
@@ -46,9 +50,6 @@ static const char *imx_i2c_get_regname(unsigned offset)
        return "[?]";
    }
 }
-#else
-#define DPRINT(fmt, args...)              do { } while (0)
-#endif

 static inline bool imx_i2c_is_enabled(IMXI2CState *s)
 {
@@ -121,11 +122,11 @@ static uint64_t imx_i2c_read(void *opaque, hwaddr offset,

            if (s->address == ADDR_RESET) {
                /* something is wrong as the address is not set */
-                qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Trying to read "
+                qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Trying to read "
                              "without specifying the slave address\n",
                              TYPE_IMX_I2C, __func__);
            } else if (s->i2cr & I2CR_MTX) {
-                qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Trying to read "
+                qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Trying to read "
                              "but MTX is set\n", TYPE_IMX_I2C, __func__);
            } else {
                /* get the next byte */
@@ -134,7 +135,7 @@ static uint64_t imx_i2c_read(void *opaque, hwaddr offset,
                if (ret >= 0) {
                    imx_i2c_raise_interrupt(s);
                } else {
-                    qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: read failed "
+                    qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: read failed "
                                  "for device 0x%02x\n", TYPE_IMX_I2C,
                                  __func__, s->address);
                    ret = 0xff;
@@ -143,19 +144,19 @@ static uint64_t imx_i2c_read(void *opaque, hwaddr offset,

            s->i2dr_read = ret;
        } else {
-            qemu_log_mask(LOG_UNIMP, "%s[%s]: slave mode not implemented\n",
+            qemu_log_mask(LOG_UNIMP, "[%s]%s: slave mode not implemented\n",
                          TYPE_IMX_I2C, __func__);
        }
        break;
    default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_I2C, __func__, s->address);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_I2C, __func__, offset);
        value = 0;
        break;
    }

-    DPRINT("read %s [0x%02x] -> 0x%02x\n", imx_i2c_get_regname(offset),
-           (unsigned int)offset, value);
+    DPRINTF("read %s [0x%" HWADDR_PRIx "] -> 0x%02x\n",
+            imx_i2c_get_regname(offset), offset, value);

    return (uint64_t)value;
 }
@@ -165,8 +166,8 @@ static void imx_i2c_write(void *opaque, hwaddr offset,
 {
    IMXI2CState *s = IMX_I2C(opaque);

-    DPRINT("write %s [0x%02x] <- 0x%02x\n", imx_i2c_get_regname(offset),
-           (unsigned int)offset, (int)value);
+    DPRINTF("write %s [0x%" HWADDR_PRIx "] <- 0x%02x\n",
+            imx_i2c_get_regname(offset), offset, (int)value);

    value &= 0xff;

@@ -264,13 +265,13 @@ static void imx_i2c_write(void *opaque, hwaddr offset,
                }
            }
        } else {
-            qemu_log_mask(LOG_UNIMP, "%s[%s]: slave mode not implemented\n",
+            qemu_log_mask(LOG_UNIMP, "[%s]%s: slave mode not implemented\n",
                          TYPE_IMX_I2C, __func__);
        }
        break;
    default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_I2C, __func__, s->address);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_I2C, __func__, offset);
        break;
    }
 }
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1616,7 +1616,6 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
    HotplugHandlerClass *hhc;
    Error *local_err = NULL;
    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
-    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
    PCDIMMDevice *dimm = PC_DIMM(dev);
    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
    MemoryRegion *mr = ddc->get_memory_region(dimm);
@@ -1632,8 +1631,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
        goto out;
    }

-    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align,
-                        pcmc->inter_dimm_gap, &local_err);
+    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err);
    if (local_err) {
        goto out;
    }
@@ -1953,7 +1951,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
    PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);

-    pcmc->inter_dimm_gap = true;
    pcmc->get_hotplug_handler = mc->get_hotplug_handler;
    mc->get_hotplug_handler = pc_get_hotpug_handler;
    mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -487,7 +487,6 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m)
    m->alias = NULL;
    m->is_default = 0;
    pcmc->broken_reserved_end = true;
-    pcmc->inter_dimm_gap = false;
    SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
 }

--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -385,7 +385,6 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
    pc_q35_2_5_machine_options(m);
    m->alias = NULL;
    pcmc->broken_reserved_end = true;
-    pcmc->inter_dimm_gap = false;
    SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
 }

--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -33,6 +33,7 @@
 #include "trace.h"
 #include "exec/address-spaces.h"
 #include "sysemu/block-backend.h"
+#include "qemu/error-report.h"

 #include <xenguest.h>

@@ -382,13 +383,16 @@ static const VMStateDescription vmstate_xen_platform = {
    }
 };

-static int xen_platform_initfn(PCIDevice *dev)
+static void xen_platform_realize(PCIDevice *dev, Error **errp)
 {
    PCIXenPlatformState *d = XEN_PLATFORM(dev);
    uint8_t *pci_conf;

    /* Device will crash on reset if xen is not initialized */
-    assert(xen_enabled());
+    if (!xen_enabled()) {
+        error_setg(errp, "xen-platform device requires the Xen accelerator");
+        return;
+    }

    pci_conf = dev->config;

@@ -407,8 +411,6 @@ static int xen_platform_initfn(PCIDevice *dev)
                     &d->mmio_bar);

    platform_fixed_ioport_init(d);
-
-    return 0;
 }

 static void platform_reset(DeviceState *dev)
@@ -423,7 +425,7 @@ static void xen_platform_class_init(ObjectClass *klass, void *data)
    DeviceClass *dc = DEVICE_CLASS(klass);
    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);

-    k->init = xen_platform_initfn;
+    k->realize = xen_platform_realize;
    k->vendor_id = PCI_VENDOR_ID_XEN;
    k->device_id = PCI_DEVICE_ID_XEN_PLATFORM;
    k->class_id = PCI_CLASS_OTHERS << 8 | 0x80;
--- a/hw/ide/cmd646.c
+++ b/hw/ide/cmd646.c
@@ -417,6 +417,7 @@ static void cmd646_ide_class_init(ObjectClass *klass, void *data)
    k->config_read = cmd646_pci_config_read;
    k->config_write = cmd646_pci_config_write;
    dc->props = cmd646_ide_properties;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
 }

 static const TypeInfo cmd646_ide_info = {
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -590,6 +590,7 @@ static void macio_ide_class_init(ObjectClass *oc, void *data)
    dc->realize = macio_ide_realizefn;
    dc->reset = macio_ide_reset;
    dc->vmsd = &vmstate_pmac;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
 }

 static const TypeInfo macio_ide_type_info = {
--- a/hw/input/adb.c
+++ b/hw/input/adb.c
@@ -362,6 +362,7 @@ static void adb_kbd_class_init(ObjectClass *oc, void *data)

    akc->parent_realize = dc->realize;
    dc->realize = adb_kbd_realizefn;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);

    adc->devreq = adb_kbd_request;
    dc->reset = adb_kbd_reset;
@@ -566,6 +567,7 @@ static void adb_mouse_class_init(ObjectClass *oc, void *data)

    amc->parent_realize = dc->realize;
    dc->realize = adb_mouse_realizefn;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);

    adc->devreq = adb_mouse_request;
    dc->reset = adb_mouse_reset;
--- a/hw/input/hid.c
+++ b/hw/input/hid.c
@@ -119,33 +119,33 @@ static void hid_pointer_event(DeviceState *dev, QemuConsole *src,
    assert(hs->n < QUEUE_LENGTH);
    e = &hs->ptr.queue[(hs->head + hs->n) & QUEUE_MASK];

-    switch (evt->kind) {
+    switch (evt->type) {
    case INPUT_EVENT_KIND_REL:
-        if (evt->rel->axis == INPUT_AXIS_X) {
-            e->xdx += evt->rel->value;
-        } else if (evt->rel->axis == INPUT_AXIS_Y) {
-            e->ydy += evt->rel->value;
+        if (evt->u.rel->axis == INPUT_AXIS_X) {
+            e->xdx += evt->u.rel->value;
+        } else if (evt->u.rel->axis == INPUT_AXIS_Y) {
+            e->ydy += evt->u.rel->value;
        }
        break;

    case INPUT_EVENT_KIND_ABS:
-        if (evt->rel->axis == INPUT_AXIS_X) {
-            e->xdx = evt->rel->value;
-        } else if (evt->rel->axis == INPUT_AXIS_Y) {
-            e->ydy = evt->rel->value;
+        if (evt->u.rel->axis == INPUT_AXIS_X) {
+            e->xdx = evt->u.rel->value;
+        } else if (evt->u.rel->axis == INPUT_AXIS_Y) {
+            e->ydy = evt->u.rel->value;
        }
        break;

    case INPUT_EVENT_KIND_BTN:
-        if (evt->btn->down) {
-            e->buttons_state |= bmap[evt->btn->button];
-            if (evt->btn->button == INPUT_BUTTON_WHEEL_UP) {
+        if (evt->u.btn->down) {
+            e->buttons_state |= bmap[evt->u.btn->button];
+            if (evt->u.btn->button == INPUT_BUTTON_WHEEL_UP) {
                e->dz--;
-            } else if (evt->btn->button == INPUT_BUTTON_WHEEL_DOWN) {
+            } else if (evt->u.btn->button == INPUT_BUTTON_WHEEL_DOWN) {
                e->dz++;
            }
        } else {
-            e->buttons_state &= ~bmap[evt->btn->button];
+            e->buttons_state &= ~bmap[evt->u.btn->button];
        }
        break;

@@ -223,8 +223,8 @@ static void hid_keyboard_event(DeviceState *dev, QemuConsole *src,
    int scancodes[3], i, count;
    int slot;

-    count = qemu_input_key_value_to_scancode(evt->key->key,
-                                             evt->key->down,
+    count = qemu_input_key_value_to_scancode(evt->u.key->key,
+                                             evt->u.key->down,
                                             scancodes);
    if (hs->n + count > QUEUE_LENGTH) {
        fprintf(stderr, "usb-kbd: warning: key event queue full\n");
--- a/hw/input/ps2.c
+++ b/hw/input/ps2.c
@@ -183,8 +183,8 @@ static void ps2_keyboard_event(DeviceState *dev, QemuConsole *src,
    int scancodes[3], i, count;

    qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
-    count = qemu_input_key_value_to_scancode(evt->key->key,
-                                             evt->key->down,
+    count = qemu_input_key_value_to_scancode(evt->u.key->key,
+                                             evt->u.key->down,
                                             scancodes);
    for (i = 0; i < count; i++) {
        ps2_put_keycode(s, scancodes[i]);
@@ -393,25 +393,25 @@ static void ps2_mouse_event(DeviceState *dev, QemuConsole *src,
    if (!(s->mouse_status & MOUSE_STATUS_ENABLED))
        return;

-    switch (evt->kind) {
+    switch (evt->type) {
    case INPUT_EVENT_KIND_REL:
-        if (evt->rel->axis == INPUT_AXIS_X) {
-            s->mouse_dx += evt->rel->value;
-        } else if (evt->rel->axis == INPUT_AXIS_Y) {
-            s->mouse_dy -= evt->rel->value;
+        if (evt->u.rel->axis == INPUT_AXIS_X) {
+            s->mouse_dx += evt->u.rel->value;
+        } else if (evt->u.rel->axis == INPUT_AXIS_Y) {
+            s->mouse_dy -= evt->u.rel->value;
        }
        break;

    case INPUT_EVENT_KIND_BTN:
-        if (evt->btn->down) {
-            s->mouse_buttons |= bmap[evt->btn->button];
-            if (evt->btn->button == INPUT_BUTTON_WHEEL_UP) {
+        if (evt->u.btn->down) {
+            s->mouse_buttons |= bmap[evt->u.btn->button];
+            if (evt->u.btn->button == INPUT_BUTTON_WHEEL_UP) {
                s->mouse_dz--;
-            } else if (evt->btn->button == INPUT_BUTTON_WHEEL_DOWN) {
+            } else if (evt->u.btn->button == INPUT_BUTTON_WHEEL_DOWN) {
                s->mouse_dz++;
            }
        } else {
-            s->mouse_buttons &= ~bmap[evt->btn->button];
+            s->mouse_buttons &= ~bmap[evt->u.btn->button];
        }
        break;

--- a/hw/input/virtio-input-hid.c
+++ b/hw/input/virtio-input-hid.c
@@ -191,44 +191,45 @@ static void virtio_input_handle_event(DeviceState *dev, QemuConsole *src,
    virtio_input_event event;
    int qcode;

-    switch (evt->kind) {
+    switch (evt->type) {
    case INPUT_EVENT_KIND_KEY:
-        qcode = qemu_input_key_value_to_qcode(evt->key->key);
+        qcode = qemu_input_key_value_to_qcode(evt->u.key->key);
        if (qcode && keymap_qcode[qcode]) {
            event.type  = cpu_to_le16(EV_KEY);
            event.code  = cpu_to_le16(keymap_qcode[qcode]);
-            event.value = cpu_to_le32(evt->key->down ? 1 : 0);
+            event.value = cpu_to_le32(evt->u.key->down ? 1 : 0);
            virtio_input_send(vinput, &event);
        } else {
-            if (evt->key->down) {
+            if (evt->u.key->down) {
                fprintf(stderr, "%s: unmapped key: %d [%s]\n", __func__,
                        qcode, QKeyCode_lookup[qcode]);
            }
        }
        break;
    case INPUT_EVENT_KIND_BTN:
-        if (keymap_button[evt->btn->button]) {
+        if (keymap_button[evt->u.btn->button]) {
            event.type  = cpu_to_le16(EV_KEY);
-            event.code  = cpu_to_le16(keymap_button[evt->btn->button]);
-            event.value = cpu_to_le32(evt->btn->down ? 1 : 0);
+            event.code  = cpu_to_le16(keymap_button[evt->u.btn->button]);
+            event.value = cpu_to_le32(evt->u.btn->down ? 1 : 0);
            virtio_input_send(vinput, &event);
        } else {
-            if (evt->btn->down) {
+            if (evt->u.btn->down) {
                fprintf(stderr, "%s: unmapped button: %d [%s]\n", __func__,
-                        evt->btn->button, InputButton_lookup[evt->btn->button]);
+                        evt->u.btn->button,
+                        InputButton_lookup[evt->u.btn->button]);
            }
        }
        break;
    case INPUT_EVENT_KIND_REL:
        event.type  = cpu_to_le16(EV_REL);
-        event.code  = cpu_to_le16(axismap_rel[evt->rel->axis]);
-        event.value = cpu_to_le32(evt->rel->value);
+        event.code  = cpu_to_le16(axismap_rel[evt->u.rel->axis]);
+        event.value = cpu_to_le32(evt->u.rel->value);
        virtio_input_send(vinput, &event);
        break;
    case INPUT_EVENT_KIND_ABS:
        event.type  = cpu_to_le16(EV_ABS);
-        event.code  = cpu_to_le16(axismap_abs[evt->abs->axis]);
-        event.value = cpu_to_le32(evt->abs->value);
+        event.code  = cpu_to_le16(axismap_abs[evt->u.abs->axis]);
+        event.value = cpu_to_le32(evt->u.abs->value);
        virtio_input_send(vinput, &event);
        break;
    default:
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -20,6 +20,7 @@
 */

 #include "hw/sysbus.h"
+#include "migration/migration.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
 #include "gic_internal.h"
@@ -307,11 +308,6 @@ static void kvm_arm_gic_put(GICState *s)
    int num_cpu;
    int num_irq;

-    if (!kvm_arm_gic_can_save_restore(s)) {
-            DPRINTF("Cannot put kernel gic state, no kernel interface");
-            return;
-    }
-
    /* Note: We do the restore in a slightly different order than the save
     * (where the order doesn't matter and is simply ordered according to the
     * register offset values */
@@ -411,11 +407,6 @@ static void kvm_arm_gic_get(GICState *s)
    int i;
    int cpu;

-    if (!kvm_arm_gic_can_save_restore(s)) {
-            DPRINTF("Cannot get kernel gic state, no kernel interface");
-            return;
-    }
-
    /*****************************************************************
     * Distributor State
     */
@@ -503,7 +494,10 @@ static void kvm_arm_gic_reset(DeviceState *dev)
    KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);

    kgc->parent_reset(dev);
-    kvm_arm_gic_put(s);
+
+    if (kvm_arm_gic_can_save_restore(s)) {
+        kvm_arm_gic_put(s);
+    }
 }

 static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
@@ -573,6 +567,12 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
                            KVM_DEV_ARM_VGIC_GRP_ADDR,
                            KVM_VGIC_V2_ADDR_TYPE_CPU,
                            s->dev_fd);
+
+    if (!kvm_arm_gic_can_save_restore(s)) {
+        error_setg(&s->migration_blocker, "This operating system kernel does "
+                                          "not support vGICv2 migration");
+        migrate_add_blocker(s->migration_blocker);
+    }
 }

 static void kvm_arm_gic_class_init(ObjectClass *klass, void *data)
--- a/hw/intc/imx_avic.c
+++ b/hw/intc/imx_avic.c
@@ -17,27 +17,17 @@

 #include "hw/intc/imx_avic.h"

-#define DEBUG_INT 1
-#undef DEBUG_INT /* comment out for debugging */
+#ifndef DEBUG_IMX_AVIC
+#define DEBUG_IMX_AVIC 0
+#endif

-#ifdef DEBUG_INT
 #define DPRINTF(fmt, args...) \
-do { printf("%s: " fmt , TYPE_IMX_AVIC, ##args); } while (0)
-#else
-#define DPRINTF(fmt, args...) do {} while (0)
-#endif
-
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-#define DEBUG_IMPLEMENTATION 1
-#if DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-    do  { fprintf(stderr, "%s: " fmt, TYPE_IMX_AVIC, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
+    do { \
+        if (DEBUG_IMX_AVIC) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_AVIC, \
+                                             __func__, ##args); \
+        } \
+    } while (0)

 static const VMStateDescription vmstate_imx_avic = {
    .name = TYPE_IMX_AVIC,
@@ -115,8 +105,8 @@ static uint64_t imx_avic_read(void *opaque,
 {
    IMXAVICState *s = (IMXAVICState *)opaque;

+    DPRINTF("read(offset = 0x%" HWADDR_PRIx ")\n", offset);

-    DPRINTF("read(offset = 0x%x)\n", offset >> 2);
    switch (offset >> 2) {
    case 0: /* INTCNTL */
        return s->intcntl;
@@ -213,7 +203,8 @@ static uint64_t imx_avic_read(void *opaque,
        return 0x4;

    default:
-        IPRINTF("%s: Bad offset 0x%x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_AVIC, __func__, offset);
        return 0;
    }
 }
@@ -225,13 +216,13 @@ static void imx_avic_write(void *opaque, hwaddr offset,

    /* Vector Registers not yet supported */
    if (offset >= 0x100 && offset <= 0x2fc) {
-        IPRINTF("%s to vector register %d ignored\n", __func__,
-                (unsigned int)((offset - 0x100) >> 2));
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: vector %d ignored\n",
+                      TYPE_IMX_AVIC, __func__, (int)((offset - 0x100) >> 2));
        return;
    }

-    DPRINTF("%s(0x%x) = %x\n", __func__,
-            (unsigned int)offset>>2, (unsigned int)val);
+    DPRINTF("(0x%" HWADDR_PRIx ") = 0x%x\n", offset, (unsigned int)val);
+
    switch (offset >> 2) {
    case 0: /* Interrupt Control Register, INTCNTL */
        s->intcntl = val & (ABFEN | NIDIS | FIDIS | NIAD | FIAD | NM);
@@ -305,7 +296,8 @@ static void imx_avic_write(void *opaque, hwaddr offset,
        return;

    default:
-        IPRINTF("%s: Bad offset %x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_AVIC, __func__, offset);
    }
    imx_avic_update(s);
 }
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -1643,6 +1643,7 @@ static void openpic_class_init(ObjectClass *oc, void *data)
    dc->props = openpic_properties;
    dc->reset = openpic_reset;
    dc->vmsd = &vmstate_openpic;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }

 static const TypeInfo openpic_info = {
--- a/hw/intc/openpic_kvm.c
+++ b/hw/intc/openpic_kvm.c
@@ -275,6 +275,7 @@ static void kvm_openpic_class_init(ObjectClass *oc, void *data)
    dc->realize = kvm_openpic_realize;
    dc->props = kvm_openpic_properties;
    dc->reset = kvm_openpic_reset;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }

 static const TypeInfo kvm_openpic_info = {
--- a/hw/lm32/milkymist-hw.h
+++ b/hw/lm32/milkymist-hw.h
@@ -88,7 +88,8 @@ static inline DeviceState *milkymist_pfpu_create(hwaddr base,

 #ifdef CONFIG_OPENGL
 #include <X11/Xlib.h>
-#include <GL/glx.h>
+#include <epoxy/gl.h>
+#include <epoxy/glx.h>
 static const int glx_fbconfig_attr[] = {
    GLX_GREEN_SIZE, 5,
    GLX_GREEN_SIZE, 6,
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -33,8 +33,7 @@ typedef struct pc_dimms_capacity {
 } pc_dimms_capacity;

 void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
-                         MemoryRegion *mr, uint64_t align, bool gap,
-                         Error **errp)
+                         MemoryRegion *mr, uint64_t align, Error **errp)
 {
    int slot;
    MachineState *machine = MACHINE(qdev_get_machine());
@@ -50,7 +49,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,

    addr = pc_dimm_get_free_addr(hpms->base,
                                 memory_region_size(&hpms->mr),
-                                 !addr ? NULL : &addr, align, gap,
+                                 !addr ? NULL : &addr, align,
                                 memory_region_size(mr), &local_err);
    if (local_err) {
        goto out;
@@ -180,7 +179,7 @@ int qmp_pc_dimm_device_list(Object *obj, void *opaque)
                                               NULL);
            di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem));

-            info->dimm = di;
+            info->u.dimm = di;
            elem->value = info;
            elem->next = NULL;
            **prev = elem;
@@ -204,9 +203,9 @@ ram_addr_t get_current_ram_size(void)
        MemoryDeviceInfo *value = info->value;

        if (value) {
-            switch (value->kind) {
+            switch (value->type) {
            case MEMORY_DEVICE_INFO_KIND_DIMM:
-                size += value->dimm->size;
+                size += value->u.dimm->size;
                break;
            default:
                break;
@@ -295,8 +294,8 @@ static int pc_dimm_built_list(Object *obj, void *opaque)

 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
                               uint64_t address_space_size,
-                               uint64_t *hint, uint64_t align, bool gap,
-                               uint64_t size, Error **errp)
+                               uint64_t *hint, uint64_t align, uint64_t size,
+                               Error **errp)
 {
    GSList *list = NULL, *item;
    uint64_t new_addr, ret = 0;
@@ -341,15 +340,13 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
            goto out;
        }

-        if (ranges_overlap(dimm->addr, dimm_size, new_addr,
-                           size + (gap ? 1 : 0))) {
+        if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) {
            if (hint) {
                DeviceState *d = DEVICE(dimm);
                error_setg(errp, "address range conflicts with '%s'", d->id);
                goto out;
            }
-            new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size + (gap ? 1 : 0),
-                                     align);
+            new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size, align);
        }
    }
    ret = new_addr;
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -901,7 +901,7 @@ static void main_cpu_reset(void *opaque)

    if (kvm_enabled()) {
        /* Start running from the bootloader we wrote to end of RAM */
-        env->active_tc.PC = 0x40000000 + loaderparams.ram_size;
+        env->active_tc.PC = 0x40000000 + loaderparams.ram_low_size;
    }
 }

--- a/hw/misc/imx_ccm.c
+++ b/hw/misc/imx_ccm.c
@@ -16,14 +16,18 @@
 #define CKIH_FREQ 26000000 /* 26MHz crystal input */
 #define CKIL_FREQ    32768 /* nominal 32khz clock */

-//#define DEBUG_CCM 1
-#ifdef DEBUG_CCM
-#define DPRINTF(fmt, args...) \
-do { printf("%s: " fmt , TYPE_IMX_CCM, ##args); } while (0)
-#else
-#define DPRINTF(fmt, args...) do {} while (0)
+#ifndef DEBUG_IMX_CCM
+#define DEBUG_IMX_CCM 0
 #endif

+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_CCM) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_CCM, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
+
 static int imx_ccm_post_load(void *opaque, int version_id);

 static const VMStateDescription vmstate_imx_ccm = {
@@ -109,7 +113,7 @@ static void update_clocks(IMXCCMState *s)
    s->hsp_clk_freq = s->mcu_clk_freq / (1 + EXTRACT(s->pdr0, HSP));
    s->ipg_clk_freq = s->hsp_clk_freq / (1 + EXTRACT(s->pdr0, IPG));

-    DPRINTF("%s: mcu %uMHz, HSP %uMHz, IPG %uHz\n", __func__,
+    DPRINTF("mcu %uMHz, HSP %uMHz, IPG %uHz\n",
            s->mcu_clk_freq / 1000000,
            s->hsp_clk_freq / 1000000,
            s->ipg_clk_freq);
@@ -135,7 +139,8 @@ static uint64_t imx_ccm_read(void *opaque, hwaddr offset,
 {
    IMXCCMState *s = (IMXCCMState *)opaque;

-    DPRINTF("%s(offset=%x)", __func__, offset >> 2);
+    DPRINTF("(offset=0x%" HWADDR_PRIx ")\n", offset);
+
    switch (offset >> 2) {
    case 0: /* CCMR */
        DPRINTF(" ccmr = 0x%x\n", s->ccmr);
@@ -166,9 +171,11 @@ static uint64_t imx_ccm_read(void *opaque, hwaddr offset,
    case 23:
        DPRINTF(" pcmr0 = 0x%x\n", s->pmcr0);
        return s->pmcr0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_CCM, __func__, offset);
+        return 0;
    }
-    DPRINTF(" return 0\n");
-    return 0;
 }

 static void imx_ccm_write(void *opaque, hwaddr offset,
@@ -176,8 +183,9 @@ static void imx_ccm_write(void *opaque, hwaddr offset,
 {
    IMXCCMState *s = (IMXCCMState *)opaque;

-    DPRINTF("%s(offset=%x, value = %x)\n", __func__,
-            offset >> 2, (unsigned int)value);
+    DPRINTF("(offset=0x%" HWADDR_PRIx ", value = 0x%x)\n",
+            offset, (unsigned int)value);
+
    switch (offset >> 2) {
    case 0:
        s->ccmr = CCMR_FPMF | (value & 0x3b6fdfff);
@@ -205,6 +213,8 @@ static void imx_ccm_write(void *opaque, hwaddr offset,
        return;

    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_CCM, __func__, offset);
        return;
    }
    update_clocks(s);
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
--- a/hw/misc/macio/cuda.c
+++ b/hw/misc/macio/cuda.c
@@ -738,6 +738,7 @@ static void cuda_class_init(ObjectClass *oc, void *data)
    dc->reset = cuda_reset;
    dc->vmsd = &vmstate_cuda;
    dc->props = cuda_properties;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }

 static const TypeInfo cuda_type_info = {
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -393,6 +393,7 @@ static void macio_class_init(ObjectClass *klass, void *data)
    k->vendor_id = PCI_VENDOR_ID_APPLE;
    k->class_id = PCI_CLASS_OTHERS << 8;
    dc->props = macio_properties;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }

 static const TypeInfo macio_oldworld_type_info = {
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -964,6 +964,7 @@ static void gem_reset(DeviceState *d)
 {
    int i;
    CadenceGEMState *s = CADENCE_GEM(d);
+    const uint8_t *a;

    DB_PRINT("\n");

@@ -982,6 +983,11 @@ static void gem_reset(DeviceState *d)
    s->regs[GEM_DESCONF5] = 0x002f2145;
    s->regs[GEM_DESCONF6] = 0x00000200;

+    /* Set MAC address */
+    a = &s->conf.macaddr.a[0];
+    s->regs[GEM_SPADDR1LO] = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24);
+    s->regs[GEM_SPADDR1HI] = a[4] | (a[5] << 8);
+
    for (i = 0; i < 4; i++) {
        s->sar_active[i] = false;
    }
--- a/hw/net/imx_fec.c
+++ b/hw/net/imx_fec.c
@@ -27,31 +27,29 @@
 /* For crc32 */
 #include <zlib.h>

-#ifndef IMX_FEC_DEBUG
-#define IMX_FEC_DEBUG          0
+#ifndef DEBUG_IMX_FEC
+#define DEBUG_IMX_FEC 0
 #endif

-#ifndef IMX_PHY_DEBUG
-#define IMX_PHY_DEBUG          0
-#endif
-
-#if IMX_FEC_DEBUG
-#define FEC_PRINTF(fmt, ...) \
-    do { fprintf(stderr, "%s[%s]: " fmt , TYPE_IMX_FEC, __func__, \
-                 ## __VA_ARGS__); \
+#define FEC_PRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_FEC) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_FEC, \
+                                             __func__, ##args); \
+        } \
    } while (0)
-#else
-#define FEC_PRINTF(fmt, ...) do {} while (0)
+
+#ifndef DEBUG_IMX_PHY
+#define DEBUG_IMX_PHY 0
 #endif

-#if IMX_PHY_DEBUG
-#define PHY_PRINTF(fmt, ...) \
-    do { fprintf(stderr, "%s.phy[%s]: " fmt , TYPE_IMX_FEC, __func__, \
-                 ## __VA_ARGS__); \
+#define PHY_PRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_PHY) { \
+            fprintf(stderr, "[%s.phy]%s: " fmt , TYPE_IMX_FEC, \
+                                                 __func__, ##args); \
+        } \
    } while (0)
-#else
-#define PHY_PRINTF(fmt, ...) do {} while (0)
-#endif

 static const VMStateDescription vmstate_imx_fec = {
    .name = TYPE_IMX_FEC,
@@ -182,12 +180,12 @@ static uint32_t do_phy_read(IMXFECState *s, int reg)
    case 18:
    case 27:
    case 31:
-        qemu_log_mask(LOG_UNIMP, "%s.phy[%s]: reg %d not implemented\n",
+        qemu_log_mask(LOG_UNIMP, "[%s.phy]%s: reg %d not implemented\n",
                      TYPE_IMX_FEC, __func__, reg);
        val = 0;
        break;
    default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad address at offset %d\n",
                      TYPE_IMX_FEC, __func__, reg);
        val = 0;
        break;
@@ -230,11 +228,11 @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
    case 18:
    case 27:
    case 31:
-        qemu_log_mask(LOG_UNIMP, "%s.phy[%s]: reg %d not implemented\n",
+        qemu_log_mask(LOG_UNIMP, "[%s.phy)%s: reg %d not implemented\n",
                      TYPE_IMX_FEC, __func__, reg);
        break;
    default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s.phy[%s]: Bad address at offset %d\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad address at offset %d\n",
                      TYPE_IMX_FEC, __func__, reg);
        break;
    }
@@ -357,7 +355,7 @@ static uint64_t imx_fec_read(void *opaque, hwaddr addr, unsigned size)
 {
    IMXFECState *s = IMX_FEC(opaque);

-    FEC_PRINTF("reading from @ 0x%03x\n", (int)addr);
+    FEC_PRINTF("reading from @ 0x%" HWADDR_PRIx "\n", addr);

    switch (addr & 0x3ff) {
    case 0x004:
@@ -417,8 +415,8 @@ static uint64_t imx_fec_read(void *opaque, hwaddr addr, unsigned size)
    case 0x308:
        return s->miigsk_enr;
    default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_FEC, __func__, (int)addr);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_FEC, __func__, addr);
        return 0;
    }
 }
@@ -428,7 +426,7 @@ static void imx_fec_write(void *opaque, hwaddr addr,
 {
    IMXFECState *s = IMX_FEC(opaque);

-    FEC_PRINTF("writing 0x%08x @ 0x%03x\n", (int)value, (int)addr);
+    FEC_PRINTF("writing 0x%08x @ 0x%" HWADDR_PRIx "\n", (int)value, addr);

    switch (addr & 0x3ff) {
    case 0x004: /* EIR */
@@ -530,8 +528,8 @@ static void imx_fec_write(void *opaque, hwaddr addr,
        s->miigsk_enr = (value & 0x2) ? 0x6 : 0;
        break;
    default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_FEC, __func__, (int)addr);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_FEC, __func__, addr);
        break;
    }

@@ -561,7 +559,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
    FEC_PRINTF("len %d\n", (int)size);

    if (!s->rx_enabled) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Unexpected packet\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n",
                      TYPE_IMX_FEC, __func__);
        return 0;
    }
@@ -592,14 +590,16 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
             * save the remainder for when more RX buffers are
             * available, or flag an error.
             */
-            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Lost end of frame\n",
+            qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Lost end of frame\n",
                          TYPE_IMX_FEC, __func__);
            break;
        }
        buf_len = (size <= s->emrbr) ? size : s->emrbr;
        bd.length = buf_len;
        size -= buf_len;
-        FEC_PRINTF("rx_bd %x length %d\n", addr, bd.length);
+
+        FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length);
+
        /* The last 4 bytes are the CRC.  */
        if (size < 4) {
            buf_len += size - 4;
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1289,6 +1289,10 @@ static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s)
 static void vmxnet3_fill_stats(VMXNET3State *s)
 {
    int i;
+
+    if (!s->device_active)
+        return;
+
    for (i = 0; i < s->txq_num; i++) {
        cpu_physical_memory_write(s->txq_descr[i].tx_stats_pa,
                                  &s->txq_descr[i].txq_stats,
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -123,6 +123,7 @@ static void macio_nvram_class_init(ObjectClass *oc, void *data)
    dc->reset = macio_nvram_reset;
    dc->vmsd = &vmstate_macio_nvram;
    dc->props = macio_nvram_properties;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }

 static const TypeInfo macio_nvram_type_info = {
--- a/hw/pci-host/grackle.c
+++ b/hw/pci-host/grackle.c
@@ -146,8 +146,10 @@ static const TypeInfo grackle_pci_info = {
 static void pci_grackle_class_init(ObjectClass *klass, void *data)
 {
    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);

    k->init = pci_grackle_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }

 static const TypeInfo grackle_pci_host_info = {
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c
@@ -446,8 +446,10 @@ static const TypeInfo unin_internal_pci_host_info = {
 static void pci_unin_main_class_init(ObjectClass *klass, void *data)
 {
    SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);

    sbc->init = pci_unin_main_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }

 static const TypeInfo pci_unin_main_info = {
@@ -460,8 +462,10 @@ static const TypeInfo pci_unin_main_info = {
 static void pci_u3_agp_class_init(ObjectClass *klass, void *data)
 {
    SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);

    sbc->init = pci_u3_agp_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }

 static const TypeInfo pci_u3_agp_info = {
@@ -474,8 +478,10 @@ static const TypeInfo pci_u3_agp_info = {
 static void pci_unin_agp_class_init(ObjectClass *klass, void *data)
 {
    SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);

    sbc->init = pci_unin_agp_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }

 static const TypeInfo pci_unin_agp_info = {
@@ -488,8 +494,10 @@ static const TypeInfo pci_unin_agp_info = {
 static void pci_unin_internal_class_init(ObjectClass *klass, void *data)
 {
    SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);

    sbc->init = pci_unin_internal_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }

 static const TypeInfo pci_unin_internal_info = {
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -200,8 +200,14 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
    return pci_get_long(dev->msix_pba + addr);
 }

+static void msix_pba_mmio_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+}
+
 static const MemoryRegionOps msix_pba_mmio_ops = {
    .read = msix_pba_mmio_read,
+    .write = msix_pba_mmio_write,
    .endianness = DEVICE_LITTLE_ENDIAN,
    .valid = {
        .min_access_size = 4,
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -847,6 +847,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
    PCIConfigWriteFunc *config_write = pc->config_write;
    Error *local_err = NULL;
    AddressSpace *dma_as;
+    DeviceState *dev = DEVICE(pci_dev);
+
+    pci_dev->bus = bus;

    if (devfn < 0) {
        for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
@@ -864,9 +867,17 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                   PCI_SLOT(devfn), PCI_FUNC(devfn), name,
                   bus->devices[devfn]->name);
        return NULL;
+    } else if (dev->hotplugged &&
+               pci_get_function_0(pci_dev)) {
+        error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s,"
+                   " new func %s cannot be exposed to guest.",
+                   PCI_SLOT(devfn),
+                   bus->devices[PCI_DEVFN(PCI_SLOT(devfn), 0)]->name,
+                   name);
+
+       return NULL;
    }

-    pci_dev->bus = bus;
    pci_dev->devfn = devfn;
    dma_as = pci_device_iommu_address_space(pci_dev);

@@ -2454,6 +2465,33 @@ void pci_bus_get_w64_range(PCIBus *bus, Range *range)
    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
 }

+static bool pcie_has_upstream_port(PCIDevice *dev)
+{
+    PCIDevice *parent_dev = pci_bridge_get_device(dev->bus);
+
+    /* Device associated with an upstream port.
+     * As there are several types of these, it's easier to check the
+     * parent device: upstream ports are always connected to
+     * root or downstream ports.
+     */
+    return parent_dev &&
+        pci_is_express(parent_dev) &&
+        parent_dev->exp.exp_cap &&
+        (pcie_cap_get_type(parent_dev) == PCI_EXP_TYPE_ROOT_PORT ||
+         pcie_cap_get_type(parent_dev) == PCI_EXP_TYPE_DOWNSTREAM);
+}
+
+PCIDevice *pci_get_function_0(PCIDevice *pci_dev)
+{
+    if(pcie_has_upstream_port(pci_dev)) {
+        /* With an upstream PCIe port, we only support 1 device at slot 0 */
+        return pci_dev->bus->devices[0];
+    } else {
+        /* Other bus types might support multiple devices at slots 0-31 */
+        return pci_dev->bus->devices[PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 0)];
+    }
+}
+
 static const TypeInfo pci_device_type_info = {
    .name = TYPE_PCI_DEVICE,
    .parent = TYPE_DEVICE,
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -20,6 +20,7 @@

 #include "hw/pci/pci.h"
 #include "hw/pci/pci_host.h"
+#include "hw/pci/pci_bus.h"
 #include "trace.h"

 /* debug PCI */
@@ -52,6 +53,13 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
                                  uint32_t limit, uint32_t val, uint32_t len)
 {
    assert(len <= 4);
+    /* non-zero functions are only exposed when function 0 is present,
+     * allowing direct removal of unexposed functions.
+     */
+    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+        return;
+    }
+
    trace_pci_cfg_write(pci_dev->name, PCI_SLOT(pci_dev->devfn),
                        PCI_FUNC(pci_dev->devfn), addr, val);
    pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr));
@@ -63,6 +71,13 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
    uint32_t ret;

    assert(len <= 4);
+    /* non-zero functions are only exposed when function 0 is present,
+     * allowing direct removal of unexposed functions.
+     */
+    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+        return ~0x0;
+    }
+
    ret = pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr));
    trace_pci_cfg_read(pci_dev->name, PCI_SLOT(pci_dev->devfn),
                       PCI_FUNC(pci_dev->devfn), addr, ret);
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -249,25 +249,43 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
        return;
    }

-    /* TODO: multifunction hot-plug.
-     * Right now, only a device of function = 0 is allowed to be
-     * hot plugged/unplugged.
+    /* To enable multifunction hot-plug, we just ensure the function
+     * 0 added last. When function 0 is added, we set the sltsta and
+     * inform OS via event notification.
     */
-    assert(PCI_FUNC(pci_dev->devfn) == 0);
+    if (pci_get_function_0(pci_dev)) {
+        pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
+                                   PCI_EXP_SLTSTA_PDS);
+        pcie_cap_slot_event(PCI_DEVICE(hotplug_dev),
+                            PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+    }
+}

-    pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
-                               PCI_EXP_SLTSTA_PDS);
-    pcie_cap_slot_event(PCI_DEVICE(hotplug_dev),
-                        PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
+{
+    object_unparent(OBJECT(dev));
 }

 void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
                                         DeviceState *dev, Error **errp)
 {
    uint8_t *exp_cap;
+    PCIDevice *pci_dev = PCI_DEVICE(dev);
+    PCIBus *bus = pci_dev->bus;

    pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);

+    /* In case user cancel the operation of multi-function hot-add,
+     * remove the function that is unexposed to guest individually,
+     * without interaction with guest.
+     */
+    if (pci_dev->devfn &&
+        !bus->devices[0]) {
+        pcie_unplug_device(bus, pci_dev, NULL);
+
+        return;
+    }
+
    pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev));
 }

@@ -378,11 +396,6 @@ void pcie_cap_slot_reset(PCIDevice *dev)
    hotplug_event_update_event_status(dev);
 }

-static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
-{
-    object_unparent(OBJECT(dev));
-}
-
 void pcie_cap_slot_write_config(PCIDevice *dev,
                                uint32_t addr, uint32_t val, int len)
 {
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -42,11 +42,9 @@
 #include "sysemu/arch_init.h"
 #include "sysemu/qtest.h"
 #include "exec/address-spaces.h"
+#include "trace.h"
 #include "elf.h"

-//#define HARD_DEBUG_PPC_IO
-//#define DEBUG_PPC_IO
-
 /* SMP is not enabled, for now */
 #define MAX_CPUS 1

@@ -57,26 +55,6 @@
 #define KERNEL_LOAD_ADDR 0x01000000
 #define INITRD_LOAD_ADDR 0x01800000

-#if defined (HARD_DEBUG_PPC_IO) && !defined (DEBUG_PPC_IO)
-#define DEBUG_PPC_IO
-#endif
-
-#if defined (HARD_DEBUG_PPC_IO)
-#define PPC_IO_DPRINTF(fmt, ...)                         \
-do {                                                     \
-    if (qemu_loglevel_mask(CPU_LOG_IOPORT)) {            \
-        qemu_log("%s: " fmt, __func__ , ## __VA_ARGS__); \
-    } else {                                             \
-        printf("%s : " fmt, __func__ , ## __VA_ARGS__);  \
-    }                                                    \
-} while (0)
-#elif defined (DEBUG_PPC_IO)
-#define PPC_IO_DPRINTF(fmt, ...) \
-qemu_log_mask(CPU_LOG_IOPORT, fmt, ## __VA_ARGS__)
-#else
-#define PPC_IO_DPRINTF(fmt, ...) do { } while (0)
-#endif
-
 /* Constants for devices init */
 static const int ide_iobase[2] = { 0x1f0, 0x170 };
 static const int ide_iobase2[2] = { 0x3f6, 0x376 };
@@ -199,8 +177,7 @@ static void PREP_io_800_writeb (void *opaque, uint32_t addr, uint32_t val)
 {
    sysctrl_t *sysctrl = opaque;

-    PPC_IO_DPRINTF("0x%08" PRIx32 " => 0x%02" PRIx32 "\n",
-                   addr - PPC_IO_BASE, val);
+    trace_prep_io_800_writeb(addr - PPC_IO_BASE, val);
    switch (addr) {
    case 0x0092:
        /* Special port 92 */
@@ -327,8 +304,7 @@ static uint32_t PREP_io_800_readb (void *opaque, uint32_t addr)
        printf("ERROR: unaffected IO port: %04" PRIx32 " read\n", addr);
        break;
    }
-    PPC_IO_DPRINTF("0x%08" PRIx32 " <= 0x%02" PRIx32 "\n",
-                   addr - PPC_IO_BASE, retval);
+    trace_prep_io_800_readb(addr - PPC_IO_BASE, retval);

    return retval;
 }
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -597,6 +597,24 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
    uint32_t vcpus_per_socket = smp_threads * smp_cores;
    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};

+    /* Note: we keep CI large pages off for now because a 64K capable guest
+     * provisioned with large pages might otherwise try to map a qemu
+     * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages
+     * even if that qemu runs on a 4k host.
+     *
+     * We can later add this bit back when we are confident this is not
+     * an issue (!HV KVM or 64K host)
+     */
+    uint8_t pa_features_206[] = { 6, 0,
+        0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
+    uint8_t pa_features_207[] = { 24, 0,
+        0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
+        0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
+        0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
+    uint8_t *pa_features;
+    size_t pa_size;
+
    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));

@@ -625,6 +643,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,

    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "slb-size", env->slb_nr)));
    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
@@ -662,6 +681,19 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
                          page_sizes_prop, page_sizes_prop_size)));
    }

+    /* Do the ibm,pa-features property, adjust it for ci-large-pages */
+    if (env->mmu_model == POWERPC_MMU_2_06) {
+        pa_features = pa_features_206;
+        pa_size = sizeof(pa_features_206);
+    } else /* env->mmu_model == POWERPC_MMU_2_07 */ {
+        pa_features = pa_features_207;
+        pa_size = sizeof(pa_features_207);
+    }
+    if (env->ci_large_pages) {
+        pa_features[3] |= 0x20;
+    }
+    _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
+
    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
                           cs->cpu_index / vcpus_per_socket)));

@@ -979,7 +1011,7 @@ static void emulate_spapr_hypercall(PowerPCCPU *cpu)
 #define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
 #define DIRTY_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))

-static void spapr_reset_htab(sPAPRMachineState *spapr)
+static void spapr_alloc_htab(sPAPRMachineState *spapr)
 {
    long shift;
    int index;
@@ -992,20 +1024,47 @@ static void spapr_reset_htab(sPAPRMachineState *spapr)

    if (shift > 0) {
        /* Kernel handles htab, we don't need to allocate one */
+        if (shift != spapr->htab_shift) {
+            error_setg(&error_abort, "Failed to allocate HTAB of requested size, try with smaller maxmem");
+        }
+
        spapr->htab_shift = shift;
        kvmppc_kern_htab = true;
+    } else {
+        /* Allocate htab */
+        spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
+
+        /* And clear it */
+        memset(spapr->htab, 0, HTAB_SIZE(spapr));
+
+        for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) {
+            DIRTY_HPTE(HPTE(spapr->htab, index));
+        }
+    }
+}
+
+/*
+ * Clear HTAB entries during reset.
+ *
+ * If host kernel has allocated HTAB, KVM_PPC_ALLOCATE_HTAB ioctl is
+ * used to clear HTAB. Otherwise QEMU-allocated HTAB is cleared manually.
+ */
+static void spapr_reset_htab(sPAPRMachineState *spapr)
+{
+    long shift;
+    int index;
+
+    shift = kvmppc_reset_htab(spapr->htab_shift);
+    if (shift > 0) {
+        if (shift != spapr->htab_shift) {
+            error_setg(&error_abort, "Requested HTAB allocation failed during reset");
+        }

        /* Tell readers to update their file descriptor */
        if (spapr->htab_fd >= 0) {
            spapr->htab_fd_stale = true;
        }
    } else {
-        if (!spapr->htab) {
-            /* Allocate an htab if we don't yet have one */
-            spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
-        }
-
-        /* And clear it */
        memset(spapr->htab, 0, HTAB_SIZE(spapr));

        for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) {
@@ -1710,6 +1769,7 @@ static void ppc_spapr_init(MachineState *machine)
        }
        spapr->htab_shift++;
    }
+    spapr_alloc_htab(spapr);

    /* Set up Interrupt Controller before we create the VCPUs */
    spapr->icp = xics_system_init(machine,
@@ -2097,7 +2157,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
        goto out;
    }

-    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, false, &local_err);
+    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
    if (local_err) {
        goto out;
    }
@@ -2225,7 +2285,11 @@ static const TypeInfo spapr_machine_info = {
    },
 };

+#define SPAPR_COMPAT_2_4 \
+        HW_COMPAT_2_4
+
 #define SPAPR_COMPAT_2_3 \
+        SPAPR_COMPAT_2_4 \
        HW_COMPAT_2_3 \
        {\
            .driver   = "spapr-pci-host-bridge",\
@@ -2339,11 +2403,16 @@ static const TypeInfo spapr_machine_2_3_info = {

 static void spapr_machine_2_4_class_init(ObjectClass *oc, void *data)
 {
+    static GlobalProperty compat_props[] = {
+        SPAPR_COMPAT_2_4
+        { /* end of list */ }
+    };
    MachineClass *mc = MACHINE_CLASS(oc);

    mc->desc = "pSeries Logical Partition (PAPR compliant) v2.4";
    mc->alias = "pseries";
    mc->is_default = 0;
+    mc->compat_props = compat_props;
 }

 static const TypeInfo spapr_machine_2_4_info = {
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -146,7 +146,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
        tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
                                              window_size,
                                              &tcet->fd,
-                                              tcet->vfio_accel);
+                                              tcet->need_vfio);
    }

    if (!tcet->table) {
@@ -168,11 +168,43 @@ static int spapr_tce_table_realize(DeviceState *dev)
    return 0;
 }

+void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool need_vfio)
+{
+    size_t table_size = tcet->nb_table * sizeof(uint64_t);
+    void *newtable;
+
+    if (need_vfio == tcet->need_vfio) {
+        /* Nothing to do */
+        return;
+    }
+
+    if (!need_vfio) {
+        /* FIXME: We don't support transition back to KVM accelerated
+         * TCEs yet */
+        return;
+    }
+
+    tcet->need_vfio = true;
+
+    if (tcet->fd < 0) {
+        /* Table is already in userspace, nothing to be do */
+        return;
+    }
+
+    newtable = g_malloc(table_size);
+    memcpy(newtable, tcet->table, table_size);
+
+    kvmppc_remove_spapr_tce(tcet->table, tcet->fd, tcet->nb_table);
+
+    tcet->fd = -1;
+    tcet->table = newtable;
+}
+
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                   uint64_t bus_offset,
                                   uint32_t page_shift,
                                   uint32_t nb_table,
-                                   bool vfio_accel)
+                                   bool need_vfio)
 {
    sPAPRTCETable *tcet;
    char tmp[64];
@@ -192,7 +224,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
    tcet->bus_offset = bus_offset;
    tcet->page_shift = page_shift;
    tcet->nb_table = nb_table;
-    tcet->vfio_accel = vfio_accel;
+    tcet->need_vfio = need_vfio;

    snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn);
    object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL);
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1083,6 +1083,12 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
    void *fdt = NULL;
    int fdt_start_offset = 0, fdt_size;

+    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+        sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
+
+        spapr_tce_set_need_vfio(tcet, true);
+    }
+
    if (dev->hotplugged) {
        fdt = create_device_tree(&fdt_size);
        fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
@@ -1387,7 +1393,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
    sPAPRTCETable *tcet;
    uint32_t nb_table;

-    nb_table = SPAPR_PCI_DMA32_SIZE >> SPAPR_TCE_PAGE_SHIFT;
+    nb_table = sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT;
    tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
                               0, SPAPR_TCE_PAGE_SHIFT, nb_table, false);
    if (!tcet) {
@@ -1397,7 +1403,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
    }

    /* Register default 32bit DMA window */
-    memory_region_add_subregion(&sphb->iommu_root, 0,
+    memory_region_add_subregion(&sphb->iommu_root, sphb->dma_win_addr,
                                spapr_tce_get_iommu(tcet));
 }

@@ -1430,6 +1436,9 @@ static Property spapr_phb_properties[] = {
                       SPAPR_PCI_IO_WIN_SIZE),
    DEFINE_PROP_BOOL("dynamic-reconfiguration", sPAPRPHBState, dr_enabled,
                     true),
+    /* Default DMA window is 0..1GB */
+    DEFINE_PROP_UINT64("dma_win_addr", sPAPRPHBState, dma_win_addr, 0),
+    DEFINE_PROP_UINT64("dma_win_size", sPAPRPHBState, dma_win_size, 0x40000000),
    DEFINE_PROP_END_OF_LIST(),
 };

--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`ivshmem-client-obj-y = ivshmem-client.o main.o`
				`@@ -0,0 +1 @@`
				`ivshmem-server-obj-y = ivshmem-server.o main.o`