cirrus: add blit_is_unsafe call to cirrus_bitblt_cputovideo (CVE-2017-2620)

CIRRUS_BLTMODE_MEMSYSSRC blits do NOT check blit destination and blit width, at all. Oops. Fix it. Security impact: high. The missing blit destination check allows to write to host memory. Basically same as CVE-2014-8106 for the other blit variants. Cc: qemu-stable@nongnu.org Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Merge remote-tracking branch 'remotes/yongbok/tags/mips-20170222' into staging
2017-02-24 14:35:50 +01:00 · 2017-02-23 09:59:40 +00:00 · 2017-02-21 23:49:30 +00:00 · 2017-02-21 23:49:29 +00:00 · 2017-02-21 23:47:40 +00:00 · 2017-02-21 22:24:58 +00:00
172 changed files with 4690 additions and 1475 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -107,6 +107,7 @@ docs/qemu-ga-ref.info*
 docs/qemu-qmp-ref.info*
 /qemu-ga-qapi.texi
 /qemu-qapi.texi
+/version.texi
 *.tps
 .stgit-*
 cscope.*
--- a/11
+++ b/11
@@ -561,20 +561,19 @@ F: hw/lm32/milkymist.c
 M68K Machines
 -------------
 an5206
-S: Orphan
+M: Thomas Huth <huth@tuxfamily.org>
+S: Odd Fixes
 F: hw/m68k/an5206.c
 F: hw/m68k/mcf5206.c

-dummy_m68k
-S: Orphan
-F: hw/m68k/dummy_m68k.c
-
 mcf5208
-S: Orphan
+M: Thomas Huth <huth@tuxfamily.org>
+S: Odd Fixes
 F: hw/m68k/mcf5208.c
 F: hw/m68k/mcf_intc.c
 F: hw/char/mcf_uart.c
 F: hw/net/mcf_fec.c
+F: include/hw/m68k/mcf*.h

 MicroBlaze Machines
 -------------------
--- a/17
+++ b/17
@@ -516,7 +516,7 @@ distclean: clean
 	rm -f qemu-doc.vr qemu-doc.txt
 	rm -f config.log
 	rm -f linux-headers/asm
-	rm -f qemu-ga-qapi.texi qemu-qapi.texi
+	rm -f qemu-ga-qapi.texi qemu-qapi.texi version.texi
 	rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
 	rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
 	rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
@@ -663,21 +663,24 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \

 # documentation
 MAKEINFO=makeinfo
-MAKEINFOFLAGS=--no-split --number-sections -D 'VERSION $(VERSION)'
-TEXIFLAG=$(if $(V),,--quiet) --command='@set VERSION $(VERSION)'
+MAKEINFOFLAGS=--no-split --number-sections
+TEXIFLAG=$(if $(V),,--quiet)

-%.html: %.texi
+version.texi: $(SRC_PATH)/VERSION
+	$(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
+
+%.html: %.texi version.texi
 	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
 	--html $< -o $@,"GEN","$@")

-%.info: %.texi
+%.info: %.texi version.texi
 	$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")

-%.txt: %.texi
+%.txt: %.texi version.texi
 	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
 	--plaintext $< -o $@,"GEN","$@")

-%.pdf: %.texi
+%.pdf: %.texi version.texi
 	$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I . $< -o $@,"GEN","$@")

 qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -9,12 +9,8 @@ chardev-obj-y = chardev/
 #######################################################################
 # block-obj-y is code used by both qemu system emulation and qemu-img

-block-obj-y = async.o thread-pool.o
 block-obj-y += nbd/
 block-obj-y += block.o blockjob.o
-block-obj-y += main-loop.o iohandler.o qemu-timer.o
-block-obj-$(CONFIG_POSIX) += aio-posix.o
-block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
 block-obj-y += qemu-io-cmds.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
--- a/block/backup.c
+++ b/block/backup.c
@@ -64,7 +64,7 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
        retry = false;
        QLIST_FOREACH(req, &job->inflight_reqs, list) {
            if (end > req->start && start < req->end) {
-                qemu_co_queue_wait(&req->wait_queue);
+                qemu_co_queue_wait(&req->wait_queue, NULL);
                retry = true;
                break;
            }
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -405,12 +405,6 @@ out:
    return ret;
 }

-static void error_callback_bh(void *opaque)
-{
-    Coroutine *co = opaque;
-    qemu_coroutine_enter(co);
-}
-
 static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
 {
    BDRVBlkdebugState *s = bs->opaque;
@@ -423,8 +417,7 @@ static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
    }

    if (!immediately) {
-        aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
-                                qemu_coroutine_self());
+        aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
        qemu_coroutine_yield();
    }

--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -60,7 +60,7 @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
 static void blkreplay_bh_cb(void *opaque)
 {
    Request *req = opaque;
-    qemu_coroutine_enter(req->co);
+    aio_co_wake(req->co);
    qemu_bh_delete(req->bh);
    g_free(req);
 }
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -880,7 +880,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
 {
    QEMUIOVector qiov;
    struct iovec iov;
-    Coroutine *co;
    BlkRwCo rwco;

    iov = (struct iovec) {
@@ -897,9 +896,14 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
        .ret    = NOT_DONE,
    };

-    co = qemu_coroutine_create(co_entry, &rwco);
-    qemu_coroutine_enter(co);
-    BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
+    if (qemu_in_coroutine()) {
+        /* Fast-path if already in coroutine context */
+        co_entry(&rwco);
+    } else {
+        Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
+        qemu_coroutine_enter(co);
+        BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
+    }

    return rwco.ret;
 }
@@ -979,7 +983,6 @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
 static void blk_aio_complete_bh(void *opaque)
 {
    BlkAioEmAIOCB *acb = opaque;
-
    assert(acb->has_returned);
    blk_aio_complete(acb);
 }
--- a/block/curl.c
+++ b/block/curl.c
@@ -386,9 +386,8 @@ static void curl_multi_check_completion(BDRVCURLState *s)
    }
 }

-static void curl_multi_do(void *arg)
+static void curl_multi_do_locked(CURLState *s)
 {
-    CURLState *s = (CURLState *)arg;
    CURLSocket *socket, *next_socket;
    int running;
    int r;
@@ -406,12 +405,23 @@ static void curl_multi_do(void *arg)
    }
 }

+static void curl_multi_do(void *arg)
+{
+    CURLState *s = (CURLState *)arg;
+
+    aio_context_acquire(s->s->aio_context);
+    curl_multi_do_locked(s);
+    aio_context_release(s->s->aio_context);
+}
+
 static void curl_multi_read(void *arg)
 {
    CURLState *s = (CURLState *)arg;

-    curl_multi_do(arg);
+    aio_context_acquire(s->s->aio_context);
+    curl_multi_do_locked(s);
    curl_multi_check_completion(s->s);
+    aio_context_release(s->s->aio_context);
 }

 static void curl_multi_timeout_do(void *arg)
@@ -424,9 +434,11 @@ static void curl_multi_timeout_do(void *arg)
        return;
    }

+    aio_context_acquire(s->aio_context);
    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);

    curl_multi_check_completion(s);
+    aio_context_release(s->aio_context);
 #else
    abort();
 #endif
@@ -784,13 +796,18 @@ static void curl_readv_bh_cb(void *p)
 {
    CURLState *state;
    int running;
+    int ret = -EINPROGRESS;

    CURLAIOCB *acb = p;
-    BDRVCURLState *s = acb->common.bs->opaque;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVCURLState *s = bs->opaque;
+    AioContext *ctx = bdrv_get_aio_context(bs);

    size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
    size_t end;

+    aio_context_acquire(ctx);
+
    // In case we have the requested data already (e.g. read-ahead),
    // we can just call the callback and be done.
    switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
@@ -798,7 +815,7 @@ static void curl_readv_bh_cb(void *p)
            qemu_aio_unref(acb);
            // fall through
        case FIND_RET_WAIT:
-            return;
+            goto out;
        default:
            break;
    }
@@ -806,9 +823,8 @@ static void curl_readv_bh_cb(void *p)
    // No cache found, so let's start a new request
    state = curl_init_state(acb->common.bs, s);
    if (!state) {
-        acb->common.cb(acb->common.opaque, -EIO);
-        qemu_aio_unref(acb);
-        return;
+        ret = -EIO;
+        goto out;
    }

    acb->start = 0;
@@ -822,9 +838,8 @@ static void curl_readv_bh_cb(void *p)
    state->orig_buf = g_try_malloc(state->buf_len);
    if (state->buf_len && state->orig_buf == NULL) {
        curl_clean_state(state);
-        acb->common.cb(acb->common.opaque, -ENOMEM);
-        qemu_aio_unref(acb);
-        return;
+        ret = -ENOMEM;
+        goto out;
    }
    state->acb[0] = acb;

@@ -837,6 +852,13 @@ static void curl_readv_bh_cb(void *p)

    /* Tell curl it needs to kick things off */
    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
+
+out:
+    aio_context_release(ctx);
+    if (ret != -EINPROGRESS) {
+        acb->common.cb(acb->common.opaque, ret);
+        qemu_aio_unref(acb);
+    }
 }

 static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -698,13 +698,6 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
    return qemu_gluster_glfs_init(gconf, errp);
 }

-static void qemu_gluster_complete_aio(void *opaque)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
-
-    qemu_coroutine_enter(acb->coroutine);
-}
-
 /*
 * AIO callback routine called from GlusterFS thread.
 */
@@ -720,7 +713,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
        acb->ret = -EIO; /* Partial read/write - fail it */
    }

-    aio_bh_schedule_oneshot(acb->aio_context, qemu_gluster_complete_aio, acb);
+    aio_co_schedule(acb->aio_context, acb->coroutine);
 }

 static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
--- a/block/io.c
+++ b/block/io.c
@@ -189,7 +189,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
    bdrv_dec_in_flight(bs);
    bdrv_drained_begin(bs);
    data->done = true;
-    qemu_coroutine_enter(co);
+    aio_co_wake(co);
 }

 static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
@@ -539,7 +539,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
                 * (instead of producing a deadlock in the former case). */
                if (!req->waiting_for) {
                    self->waiting_for = req;
-                    qemu_co_queue_wait(&req->wait_queue);
+                    qemu_co_queue_wait(&req->wait_queue, NULL);
                    self->waiting_for = NULL;
                    retry = true;
                    waited = true;
@@ -813,7 +813,7 @@ static void bdrv_co_io_em_complete(void *opaque, int ret)
    CoroutineIOCompletion *co = opaque;

    co->ret = ret;
-    qemu_coroutine_enter(co->coroutine);
+    aio_co_wake(co->coroutine);
 }

 static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
@@ -2080,6 +2080,11 @@ void bdrv_aio_cancel(BlockAIOCB *acb)
        if (acb->aiocb_info->get_aio_context) {
            aio_poll(acb->aiocb_info->get_aio_context(acb), true);
        } else if (acb->bs) {
+            /* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
+             * assert that we're not using an I/O thread.  Thread-safe
+             * code should use bdrv_aio_cancel_async exclusively.
+             */
+            assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
            aio_poll(bdrv_get_aio_context(acb->bs), true);
        } else {
            abort();
@@ -2239,35 +2244,6 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
    return &acb->common;
 }

-void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
-                   BlockCompletionFunc *cb, void *opaque)
-{
-    BlockAIOCB *acb;
-
-    acb = g_malloc(aiocb_info->aiocb_size);
-    acb->aiocb_info = aiocb_info;
-    acb->bs = bs;
-    acb->cb = cb;
-    acb->opaque = opaque;
-    acb->refcnt = 1;
-    return acb;
-}
-
-void qemu_aio_ref(void *p)
-{
-    BlockAIOCB *acb = p;
-    acb->refcnt++;
-}
-
-void qemu_aio_unref(void *p)
-{
-    BlockAIOCB *acb = p;
-    assert(acb->refcnt > 0);
-    if (--acb->refcnt == 0) {
-        g_free(acb);
-    }
-}
-
 /**************************************************************/
 /* Coroutine block device emulation */

@@ -2299,7 +2275,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)

    /* Wait until any previous flushes are completed */
    while (bs->active_flush_req) {
-        qemu_co_queue_wait(&bs->flush_queue);
+        qemu_co_queue_wait(&bs->flush_queue, NULL);
    }

    bs->active_flush_req = true;
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -165,8 +165,9 @@ iscsi_schedule_bh(IscsiAIOCB *acb)
 static void iscsi_co_generic_bh_cb(void *opaque)
 {
    struct IscsiTask *iTask = opaque;
+
    iTask->complete = 1;
-    qemu_coroutine_enter(iTask->co);
+    aio_co_wake(iTask->co);
 }

 static void iscsi_retry_timer_expired(void *opaque)
@@ -174,7 +175,7 @@ static void iscsi_retry_timer_expired(void *opaque)
    struct IscsiTask *iTask = opaque;
    iTask->complete = 1;
    if (iTask->co) {
-        qemu_coroutine_enter(iTask->co);
+        aio_co_wake(iTask->co);
    }
 }

@@ -394,8 +395,10 @@ iscsi_process_read(void *arg)
    IscsiLun *iscsilun = arg;
    struct iscsi_context *iscsi = iscsilun->iscsi;

+    aio_context_acquire(iscsilun->aio_context);
    iscsi_service(iscsi, POLLIN);
    iscsi_set_events(iscsilun);
+    aio_context_release(iscsilun->aio_context);
 }

 static void
@@ -404,8 +407,10 @@ iscsi_process_write(void *arg)
    IscsiLun *iscsilun = arg;
    struct iscsi_context *iscsi = iscsilun->iscsi;

+    aio_context_acquire(iscsilun->aio_context);
    iscsi_service(iscsi, POLLOUT);
    iscsi_set_events(iscsilun);
+    aio_context_release(iscsilun->aio_context);
 }

 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
@@ -1240,29 +1245,14 @@ retry:
    return 0;
 }

-static void parse_chap(struct iscsi_context *iscsi, const char *target,
+static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
                       Error **errp)
 {
-    QemuOptsList *list;
-    QemuOpts *opts;
    const char *user = NULL;
    const char *password = NULL;
    const char *secretid;
    char *secret = NULL;

-    list = qemu_find_opts("iscsi");
-    if (!list) {
-        return;
-    }
-
-    opts = qemu_opts_find(list, target);
-    if (opts == NULL) {
-        opts = QTAILQ_FIRST(&list->head);
-        if (!opts) {
-            return;
-        }
-    }
-
    user = qemu_opt_get(opts, "user");
    if (!user) {
        return;
@@ -1293,64 +1283,36 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
    g_free(secret);
 }

-static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
+static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
                                Error **errp)
 {
-    QemuOptsList *list;
-    QemuOpts *opts;
    const char *digest = NULL;

-    list = qemu_find_opts("iscsi");
-    if (!list) {
-        return;
-    }
-
-    opts = qemu_opts_find(list, target);
-    if (opts == NULL) {
-        opts = QTAILQ_FIRST(&list->head);
-        if (!opts) {
-            return;
-        }
-    }
-
    digest = qemu_opt_get(opts, "header-digest");
    if (!digest) {
-        return;
-    }
-
-    if (!strcmp(digest, "CRC32C")) {
+        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
+    } else if (!strcmp(digest, "crc32c")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
-    } else if (!strcmp(digest, "NONE")) {
+    } else if (!strcmp(digest, "none")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
-    } else if (!strcmp(digest, "CRC32C-NONE")) {
+    } else if (!strcmp(digest, "crc32c-none")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
-    } else if (!strcmp(digest, "NONE-CRC32C")) {
+    } else if (!strcmp(digest, "none-crc32c")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
    } else {
        error_setg(errp, "Invalid header-digest setting : %s", digest);
    }
 }

-static char *parse_initiator_name(const char *target)
+static char *get_initiator_name(QemuOpts *opts)
 {
-    QemuOptsList *list;
-    QemuOpts *opts;
    const char *name;
    char *iscsi_name;
    UuidInfo *uuid_info;

-    list = qemu_find_opts("iscsi");
-    if (list) {
-        opts = qemu_opts_find(list, target);
-        if (!opts) {
-            opts = QTAILQ_FIRST(&list->head);
-        }
-        if (opts) {
-            name = qemu_opt_get(opts, "initiator-name");
-            if (name) {
-                return g_strdup(name);
-            }
-        }
+    name = qemu_opt_get(opts, "initiator-name");
+    if (name) {
+        return g_strdup(name);
    }

    uuid_info = qmp_query_uuid(NULL);
@@ -1365,43 +1327,24 @@ static char *parse_initiator_name(const char *target)
    return iscsi_name;
 }

-static int parse_timeout(const char *target)
-{
-    QemuOptsList *list;
-    QemuOpts *opts;
-    const char *timeout;
-
-    list = qemu_find_opts("iscsi");
-    if (list) {
-        opts = qemu_opts_find(list, target);
-        if (!opts) {
-            opts = QTAILQ_FIRST(&list->head);
-        }
-        if (opts) {
-            timeout = qemu_opt_get(opts, "timeout");
-            if (timeout) {
-                return atoi(timeout);
-            }
-        }
-    }
-
-    return 0;
-}
-
 static void iscsi_nop_timed_event(void *opaque)
 {
    IscsiLun *iscsilun = opaque;

+    aio_context_acquire(iscsilun->aio_context);
    if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
        error_report("iSCSI: NOP timeout. Reconnecting...");
        iscsilun->request_timed_out = true;
    } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
        error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
-        return;
+        goto out;
    }

    timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
    iscsi_set_events(iscsilun);
+
+out:
+    aio_context_release(iscsilun->aio_context);
 }

 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
@@ -1474,20 +1417,6 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
    }
 }

-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "iscsi",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the iscsi image",
-        },
-        { /* end of list */ }
-    },
-};
-
 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
                                          int evpd, int pc, void **inq, Error **errp)
 {
@@ -1605,24 +1534,178 @@ out:
    }
 }

+static void iscsi_parse_iscsi_option(const char *target, QDict *options)
+{
+    QemuOptsList *list;
+    QemuOpts *opts;
+    const char *user, *password, *password_secret, *initiator_name,
+               *header_digest, *timeout;
+
+    list = qemu_find_opts("iscsi");
+    if (!list) {
+        return;
+    }
+
+    opts = qemu_opts_find(list, target);
+    if (opts == NULL) {
+        opts = QTAILQ_FIRST(&list->head);
+        if (!opts) {
+            return;
+        }
+    }
+
+    user = qemu_opt_get(opts, "user");
+    if (user) {
+        qdict_set_default_str(options, "user", user);
+    }
+
+    password = qemu_opt_get(opts, "password");
+    if (password) {
+        qdict_set_default_str(options, "password", password);
+    }
+
+    password_secret = qemu_opt_get(opts, "password-secret");
+    if (password_secret) {
+        qdict_set_default_str(options, "password-secret", password_secret);
+    }
+
+    initiator_name = qemu_opt_get(opts, "initiator-name");
+    if (initiator_name) {
+        qdict_set_default_str(options, "initiator-name", initiator_name);
+    }
+
+    header_digest = qemu_opt_get(opts, "header-digest");
+    if (header_digest) {
+        /* -iscsi takes upper case values, but QAPI only supports lower case
+         * enum constant names, so we have to convert here. */
+        char *qapi_value = g_ascii_strdown(header_digest, -1);
+        qdict_set_default_str(options, "header-digest", qapi_value);
+        g_free(qapi_value);
+    }
+
+    timeout = qemu_opt_get(opts, "timeout");
+    if (timeout) {
+        qdict_set_default_str(options, "timeout", timeout);
+    }
+}
+
 /*
 * We support iscsi url's on the form
 * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
 */
+static void iscsi_parse_filename(const char *filename, QDict *options,
+                                 Error **errp)
+{
+    struct iscsi_url *iscsi_url;
+    const char *transport_name;
+    char *lun_str;
+
+    iscsi_url = iscsi_parse_full_url(NULL, filename);
+    if (iscsi_url == NULL) {
+        error_setg(errp, "Failed to parse URL : %s", filename);
+        return;
+    }
+
+#if LIBISCSI_API_VERSION >= (20160603)
+    switch (iscsi_url->transport) {
+    case TCP_TRANSPORT:
+        transport_name = "tcp";
+        break;
+    case ISER_TRANSPORT:
+        transport_name = "iser";
+        break;
+    default:
+        error_setg(errp, "Unknown transport type (%d)",
+                   iscsi_url->transport);
+        return;
+    }
+#else
+    transport_name = "tcp";
+#endif
+
+    qdict_set_default_str(options, "transport", transport_name);
+    qdict_set_default_str(options, "portal", iscsi_url->portal);
+    qdict_set_default_str(options, "target", iscsi_url->target);
+
+    lun_str = g_strdup_printf("%d", iscsi_url->lun);
+    qdict_set_default_str(options, "lun", lun_str);
+    g_free(lun_str);
+
+    /* User/password from -iscsi take precedence over those from the URL */
+    iscsi_parse_iscsi_option(iscsi_url->target, options);
+
+    if (iscsi_url->user[0] != '\0') {
+        qdict_set_default_str(options, "user", iscsi_url->user);
+        qdict_set_default_str(options, "password", iscsi_url->passwd);
+    }
+
+    iscsi_destroy_url(iscsi_url);
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "iscsi",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "transport",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "portal",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "target",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "user",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "password",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "lun",
+            .type = QEMU_OPT_NUMBER,
+        },
+        {
+            .name = "initiator-name",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "header-digest",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "timeout",
+            .type = QEMU_OPT_NUMBER,
+        },
+        { /* end of list */ }
+    },
+};
+
 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
                      Error **errp)
 {
    IscsiLun *iscsilun = bs->opaque;
    struct iscsi_context *iscsi = NULL;
-    struct iscsi_url *iscsi_url = NULL;
    struct scsi_task *task = NULL;
    struct scsi_inquiry_standard *inq = NULL;
    struct scsi_inquiry_supported_pages *inq_vpd;
    char *initiator_name = NULL;
    QemuOpts *opts;
    Error *local_err = NULL;
-    const char *filename;
-    int i, ret = 0, timeout = 0;
+    const char *transport_name, *portal, *target;
+#if LIBISCSI_API_VERSION >= (20160603)
+    enum iscsi_transport_type transport;
+#endif
+    int i, ret = 0, timeout = 0, lun;

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1632,18 +1715,34 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

-    filename = qemu_opt_get(opts, "filename");
+    transport_name = qemu_opt_get(opts, "transport");
+    portal = qemu_opt_get(opts, "portal");
+    target = qemu_opt_get(opts, "target");
+    lun = qemu_opt_get_number(opts, "lun", 0);

-    iscsi_url = iscsi_parse_full_url(iscsi, filename);
-    if (iscsi_url == NULL) {
-        error_setg(errp, "Failed to parse URL : %s", filename);
+    if (!transport_name || !portal || !target) {
+        error_setg(errp, "Need all of transport, portal and target options");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (!strcmp(transport_name, "tcp")) {
+#if LIBISCSI_API_VERSION >= (20160603)
+        transport = TCP_TRANSPORT;
+    } else if (!strcmp(transport_name, "iser")) {
+        transport = ISER_TRANSPORT;
+#else
+        /* TCP is what older libiscsi versions always use */
+#endif
+    } else {
+        error_setg(errp, "Unknown transport: %s", transport_name);
        ret = -EINVAL;
        goto out;
    }

    memset(iscsilun, 0, sizeof(IscsiLun));

-    initiator_name = parse_initiator_name(iscsi_url->target);
+    initiator_name = get_initiator_name(opts);

    iscsi = iscsi_create_context(initiator_name);
    if (iscsi == NULL) {
@@ -1652,30 +1751,20 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }
 #if LIBISCSI_API_VERSION >= (20160603)
-    if (iscsi_init_transport(iscsi, iscsi_url->transport)) {
+    if (iscsi_init_transport(iscsi, transport)) {
        error_setg(errp, ("Error initializing transport."));
        ret = -EINVAL;
        goto out;
    }
 #endif
-    if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
+    if (iscsi_set_targetname(iscsi, target)) {
        error_setg(errp, "iSCSI: Failed to set target name.");
        ret = -EINVAL;
        goto out;
    }

-    if (iscsi_url->user[0] != '\0') {
-        ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
-                                              iscsi_url->passwd);
-        if (ret != 0) {
-            error_setg(errp, "Failed to set initiator username and password");
-            ret = -EINVAL;
-            goto out;
-        }
-    }
-
    /* check if we got CHAP username/password via the options */
-    parse_chap(iscsi, iscsi_url->target, &local_err);
+    apply_chap(iscsi, opts, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
@@ -1688,10 +1777,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

-    iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
-
    /* check if we got HEADER_DIGEST via the options */
-    parse_header_digest(iscsi, iscsi_url->target, &local_err);
+    apply_header_digest(iscsi, opts, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
@@ -1699,7 +1786,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* timeout handling is broken in libiscsi before 1.15.0 */
-    timeout = parse_timeout(iscsi_url->target);
+    timeout = qemu_opt_get_number(opts, "timeout", 0);
 #if LIBISCSI_API_VERSION >= 20150621
    iscsi_set_timeout(iscsi, timeout);
 #else
@@ -1708,7 +1795,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    }
 #endif

-    if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
+    if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
        error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
            iscsi_get_error(iscsi));
        ret = -EINVAL;
@@ -1717,7 +1804,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,

    iscsilun->iscsi = iscsi;
    iscsilun->aio_context = bdrv_get_aio_context(bs);
-    iscsilun->lun   = iscsi_url->lun;
+    iscsilun->lun = lun;
    iscsilun->has_write_same = true;

    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
@@ -1820,9 +1907,6 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
 out:
    qemu_opts_del(opts);
    g_free(initiator_name);
-    if (iscsi_url != NULL) {
-        iscsi_destroy_url(iscsi_url);
-    }
    if (task != NULL) {
        scsi_free_scsi_task(task);
    }
@@ -2031,15 +2115,15 @@ static BlockDriver bdrv_iscsi = {
    .format_name     = "iscsi",
    .protocol_name   = "iscsi",

-    .instance_size   = sizeof(IscsiLun),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open  = iscsi_open,
-    .bdrv_close      = iscsi_close,
-    .bdrv_create     = iscsi_create,
-    .create_opts     = &iscsi_create_opts,
-    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
-    .bdrv_reopen_commit    = iscsi_reopen_commit,
-    .bdrv_invalidate_cache = iscsi_invalidate_cache,
+    .instance_size          = sizeof(IscsiLun),
+    .bdrv_parse_filename    = iscsi_parse_filename,
+    .bdrv_file_open         = iscsi_open,
+    .bdrv_close             = iscsi_close,
+    .bdrv_create            = iscsi_create,
+    .create_opts            = &iscsi_create_opts,
+    .bdrv_reopen_prepare    = iscsi_reopen_prepare,
+    .bdrv_reopen_commit     = iscsi_reopen_commit,
+    .bdrv_invalidate_cache  = iscsi_invalidate_cache,

    .bdrv_getlength  = iscsi_getlength,
    .bdrv_get_info   = iscsi_get_info,
@@ -2066,15 +2150,15 @@ static BlockDriver bdrv_iser = {
    .format_name     = "iser",
    .protocol_name   = "iser",

-    .instance_size   = sizeof(IscsiLun),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open  = iscsi_open,
-    .bdrv_close      = iscsi_close,
-    .bdrv_create     = iscsi_create,
-    .create_opts     = &iscsi_create_opts,
-    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
-    .bdrv_reopen_commit    = iscsi_reopen_commit,
-    .bdrv_invalidate_cache = iscsi_invalidate_cache,
+    .instance_size          = sizeof(IscsiLun),
+    .bdrv_parse_filename    = iscsi_parse_filename,
+    .bdrv_file_open         = iscsi_open,
+    .bdrv_close             = iscsi_close,
+    .bdrv_create            = iscsi_create,
+    .create_opts            = &iscsi_create_opts,
+    .bdrv_reopen_prepare    = iscsi_reopen_prepare,
+    .bdrv_reopen_commit     = iscsi_reopen_commit,
+    .bdrv_invalidate_cache  = iscsi_invalidate_cache,

    .bdrv_getlength  = iscsi_getlength,
    .bdrv_get_info   = iscsi_get_info,
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -54,10 +54,10 @@ struct LinuxAioState {
    io_context_t ctx;
    EventNotifier e;

-    /* io queue for submit at batch */
+    /* io queue for submit at batch.  Protected by AioContext lock. */
    LaioQueue io_q;

-    /* I/O completion processing */
+    /* I/O completion processing.  Only runs in I/O thread.  */
    QEMUBH *completion_bh;
    int event_idx;
    int event_max;
@@ -100,7 +100,7 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
         * that!
         */
        if (!qemu_coroutine_entered(laiocb->co)) {
-            qemu_coroutine_enter(laiocb->co);
+            aio_co_wake(laiocb->co);
        }
    } else {
        laiocb->common.cb(laiocb->common.opaque, ret);
@@ -234,9 +234,12 @@ static void qemu_laio_process_completions(LinuxAioState *s)
 static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
 {
    qemu_laio_process_completions(s);
+
+    aio_context_acquire(s->aio_context);
    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
        ioq_submit(s);
    }
+    aio_context_release(s->aio_context);
 }

 static void qemu_laio_completion_bh(void *opaque)
@@ -455,6 +458,7 @@ void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 {
    aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
    qemu_bh_delete(s->completion_bh);
+    s->aio_context = NULL;
 }

 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -69,6 +69,7 @@ typedef struct MirrorBlockJob {
    bool waiting_for_io;
    int target_cluster_sectors;
    int max_iov;
+    bool initial_zeroing_ongoing;
 } MirrorBlockJob;

 typedef struct MirrorOp {
@@ -117,9 +118,10 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
        if (s->cow_bitmap) {
            bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
        }
-        s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
+        if (!s->initial_zeroing_ongoing) {
+            s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
+        }
    }
-
    qemu_iovec_destroy(&op->qiov);
    g_free(op);

@@ -132,6 +134,8 @@ static void mirror_write_complete(void *opaque, int ret)
 {
    MirrorOp *op = opaque;
    MirrorBlockJob *s = op->s;
+
+    aio_context_acquire(blk_get_aio_context(s->common.blk));
    if (ret < 0) {
        BlockErrorAction action;

@@ -142,12 +146,15 @@ static void mirror_write_complete(void *opaque, int ret)
        }
    }
    mirror_iteration_done(op, ret);
+    aio_context_release(blk_get_aio_context(s->common.blk));
 }

 static void mirror_read_complete(void *opaque, int ret)
 {
    MirrorOp *op = opaque;
    MirrorBlockJob *s = op->s;
+
+    aio_context_acquire(blk_get_aio_context(s->common.blk));
    if (ret < 0) {
        BlockErrorAction action;

@@ -158,10 +165,11 @@ static void mirror_read_complete(void *opaque, int ret)
        }

        mirror_iteration_done(op, ret);
-        return;
+    } else {
+        blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
+                        0, mirror_write_complete, op);
    }
-    blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
-                    0, mirror_write_complete, op);
+    aio_context_release(blk_get_aio_context(s->common.blk));
 }

 static inline void mirror_clip_sectors(MirrorBlockJob *s,
@@ -566,6 +574,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
            return 0;
        }

+        s->initial_zeroing_ongoing = true;
        for (sector_num = 0; sector_num < end; ) {
            int nb_sectors = MIN(end - sector_num,
                QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
@@ -573,6 +582,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
            mirror_throttle(s);

            if (block_job_is_cancelled(&s->common)) {
+                s->initial_zeroing_ongoing = false;
                return 0;
            }

@@ -587,6 +597,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
        }

        mirror_wait_for_all_io(s);
+        s->initial_zeroing_ongoing = false;
    }

    /* First part, loop on the sectors and initialize the dirty bitmap.  */
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -33,8 +33,9 @@
 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
 #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))

-static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
+static void nbd_recv_coroutines_enter_all(BlockDriverState *bs)
 {
+    NBDClientSession *s = nbd_get_client_session(bs);
    int i;

    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
@@ -42,6 +43,7 @@ static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
            qemu_coroutine_enter(s->recv_coroutine[i]);
        }
    }
+    BDRV_POLL_WHILE(bs, s->read_reply_co);
 }

 static void nbd_teardown_connection(BlockDriverState *bs)
@@ -56,7 +58,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
    qio_channel_shutdown(client->ioc,
                         QIO_CHANNEL_SHUTDOWN_BOTH,
                         NULL);
-    nbd_recv_coroutines_enter_all(client);
+    nbd_recv_coroutines_enter_all(bs);

    nbd_client_detach_aio_context(bs);
    object_unref(OBJECT(client->sioc));
@@ -65,54 +67,43 @@ static void nbd_teardown_connection(BlockDriverState *bs)
    client->ioc = NULL;
 }

-static void nbd_reply_ready(void *opaque)
+static coroutine_fn void nbd_read_reply_entry(void *opaque)
 {
-    BlockDriverState *bs = opaque;
-    NBDClientSession *s = nbd_get_client_session(bs);
+    NBDClientSession *s = opaque;
    uint64_t i;
    int ret;

-    if (!s->ioc) { /* Already closed */
-        return;
-    }
-
-    if (s->reply.handle == 0) {
-        /* No reply already in flight.  Fetch a header.  It is possible
-         * that another thread has done the same thing in parallel, so
-         * the socket is not readable anymore.
-         */
+    for (;;) {
+        assert(s->reply.handle == 0);
        ret = nbd_receive_reply(s->ioc, &s->reply);
-        if (ret == -EAGAIN) {
-            return;
-        }
        if (ret < 0) {
-            s->reply.handle = 0;
-            goto fail;
+            break;
        }
+
+        /* There's no need for a mutex on the receive side, because the
+         * handler acts as a synchronization point and ensures that only
+         * one coroutine is called until the reply finishes.
+         */
+        i = HANDLE_TO_INDEX(s, s->reply.handle);
+        if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
+            break;
+        }
+
+        /* We're woken up by the recv_coroutine itself.  Note that there
+         * is no race between yielding and reentering read_reply_co.  This
+         * is because:
+         *
+         * - if recv_coroutine[i] runs on the same AioContext, it is only
+         *   entered after we yield
+         *
+         * - if recv_coroutine[i] runs on a different AioContext, reentering
+         *   read_reply_co happens through a bottom half, which can only
+         *   run after we yield.
+         */
+        aio_co_wake(s->recv_coroutine[i]);
+        qemu_coroutine_yield();
    }
-
-    /* There's no need for a mutex on the receive side, because the
-     * handler acts as a synchronization point and ensures that only
-     * one coroutine is called until the reply finishes.  */
-    i = HANDLE_TO_INDEX(s, s->reply.handle);
-    if (i >= MAX_NBD_REQUESTS) {
-        goto fail;
-    }
-
-    if (s->recv_coroutine[i]) {
-        qemu_coroutine_enter(s->recv_coroutine[i]);
-        return;
-    }
-
-fail:
-    nbd_teardown_connection(bs);
-}
-
-static void nbd_restart_write(void *opaque)
-{
-    BlockDriverState *bs = opaque;
-
-    qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
+    s->read_reply_co = NULL;
 }

 static int nbd_co_send_request(BlockDriverState *bs,
@@ -120,7 +111,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
                               QEMUIOVector *qiov)
 {
    NBDClientSession *s = nbd_get_client_session(bs);
-    AioContext *aio_context;
    int rc, ret, i;

    qemu_co_mutex_lock(&s->send_mutex);
@@ -141,11 +131,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
        return -EPIPE;
    }

-    s->send_coroutine = qemu_coroutine_self();
-    aio_context = bdrv_get_aio_context(bs);
-
-    aio_set_fd_handler(aio_context, s->sioc->fd, false,
-                       nbd_reply_ready, nbd_restart_write, NULL, bs);
    if (qiov) {
        qio_channel_set_cork(s->ioc, true);
        rc = nbd_send_request(s->ioc, request);
@@ -160,9 +145,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
    } else {
        rc = nbd_send_request(s->ioc, request);
    }
-    aio_set_fd_handler(aio_context, s->sioc->fd, false,
-                       nbd_reply_ready, NULL, NULL, bs);
-    s->send_coroutine = NULL;
    qemu_co_mutex_unlock(&s->send_mutex);
    return rc;
 }
@@ -174,8 +156,7 @@ static void nbd_co_receive_reply(NBDClientSession *s,
 {
    int ret;

-    /* Wait until we're woken up by the read handler.  TODO: perhaps
-     * peek at the next reply and avoid yielding if it's ours?  */
+    /* Wait until we're woken up by nbd_read_reply_entry.  */
    qemu_coroutine_yield();
    *reply = s->reply;
    if (reply->handle != request->handle ||
@@ -201,7 +182,7 @@ static void nbd_coroutine_start(NBDClientSession *s,
    /* Poor man semaphore.  The free_sema is locked when no other request
     * can be accepted, and unlocked after receiving one reply.  */
    if (s->in_flight == MAX_NBD_REQUESTS) {
-        qemu_co_queue_wait(&s->free_sema);
+        qemu_co_queue_wait(&s->free_sema, NULL);
        assert(s->in_flight < MAX_NBD_REQUESTS);
    }
    s->in_flight++;
@@ -209,13 +190,19 @@ static void nbd_coroutine_start(NBDClientSession *s,
    /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
 }

-static void nbd_coroutine_end(NBDClientSession *s,
+static void nbd_coroutine_end(BlockDriverState *bs,
                              NBDRequest *request)
 {
+    NBDClientSession *s = nbd_get_client_session(bs);
    int i = HANDLE_TO_INDEX(s, request->handle);
+
    s->recv_coroutine[i] = NULL;
-    if (s->in_flight-- == MAX_NBD_REQUESTS) {
-        qemu_co_queue_next(&s->free_sema);
+    s->in_flight--;
+    qemu_co_queue_next(&s->free_sema);
+
+    /* Kick the read_reply_co to get the next reply.  */
+    if (s->read_reply_co) {
+        aio_co_wake(s->read_reply_co);
    }
 }

@@ -241,7 +228,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
    } else {
        nbd_co_receive_reply(client, &request, &reply, qiov);
    }
-    nbd_coroutine_end(client, &request);
+    nbd_coroutine_end(bs, &request);
    return -reply.error;
 }

@@ -271,7 +258,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    nbd_coroutine_end(client, &request);
+    nbd_coroutine_end(bs, &request);
    return -reply.error;
 }

@@ -306,7 +293,7 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    nbd_coroutine_end(client, &request);
+    nbd_coroutine_end(bs, &request);
    return -reply.error;
 }

@@ -331,7 +318,7 @@ int nbd_client_co_flush(BlockDriverState *bs)
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    nbd_coroutine_end(client, &request);
+    nbd_coroutine_end(bs, &request);
    return -reply.error;
 }

@@ -357,23 +344,23 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    nbd_coroutine_end(client, &request);
+    nbd_coroutine_end(bs, &request);
    return -reply.error;

 }

 void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
-    aio_set_fd_handler(bdrv_get_aio_context(bs),
-                       nbd_get_client_session(bs)->sioc->fd,
-                       false, NULL, NULL, NULL, NULL);
+    NBDClientSession *client = nbd_get_client_session(bs);
+    qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
 }

 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                   AioContext *new_context)
 {
-    aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
-                       false, nbd_reply_ready, NULL, NULL, bs);
+    NBDClientSession *client = nbd_get_client_session(bs);
+    qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
+    aio_co_schedule(new_context, client->read_reply_co);
 }

 void nbd_client_close(BlockDriverState *bs)
@@ -434,7 +421,7 @@ int nbd_client_init(BlockDriverState *bs,
    /* Now that we're connected, set the socket to be non-blocking and
     * kick the reply mechanism.  */
    qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
-
+    client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
    nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));

    logout("Established connection with NBD server\n");
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -25,7 +25,7 @@ typedef struct NBDClientSession {

    CoMutex send_mutex;
    CoQueue free_sema;
-    Coroutine *send_coroutine;
+    Coroutine *read_reply_co;
    int in_flight;

    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -208,15 +208,21 @@ static void nfs_set_events(NFSClient *client)
 static void nfs_process_read(void *arg)
 {
    NFSClient *client = arg;
+
+    aio_context_acquire(client->aio_context);
    nfs_service(client->context, POLLIN);
    nfs_set_events(client);
+    aio_context_release(client->aio_context);
 }

 static void nfs_process_write(void *arg)
 {
    NFSClient *client = arg;
+
+    aio_context_acquire(client->aio_context);
    nfs_service(client->context, POLLOUT);
    nfs_set_events(client);
+    aio_context_release(client->aio_context);
 }

 static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
@@ -231,8 +237,9 @@ static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
 static void nfs_co_generic_bh_cb(void *opaque)
 {
    NFSRPC *task = opaque;
+
    task->complete = 1;
-    qemu_coroutine_enter(task->co);
+    aio_co_wake(task->co);
 }

 static void
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -932,9 +932,7 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
            if (bytes == 0) {
                /* Wait for the dependency to complete. We need to recheck
                 * the free/allocated clusters when we continue. */
-                qemu_co_mutex_unlock(&s->lock);
-                qemu_co_queue_wait(&old_alloc->dependent_requests);
-                qemu_co_mutex_lock(&s->lock);
+                qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
                return -EAGAIN;
            }
        }
--- a/block/qed-cluster.c
+++ b/block/qed-cluster.c
@@ -83,6 +83,7 @@ static void qed_find_cluster_cb(void *opaque, int ret)
    unsigned int index;
    unsigned int n;

+    qed_acquire(s);
    if (ret) {
        goto out;
    }
@@ -109,6 +110,7 @@ static void qed_find_cluster_cb(void *opaque, int ret)

 out:
    find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
+    qed_release(s);
    g_free(find_cluster_cb);
 }

--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -31,6 +31,7 @@ static void qed_read_table_cb(void *opaque, int ret)
 {
    QEDReadTableCB *read_table_cb = opaque;
    QEDTable *table = read_table_cb->table;
+    BDRVQEDState *s = read_table_cb->s;
    int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
    int i;

@@ -40,13 +41,15 @@ static void qed_read_table_cb(void *opaque, int ret)
    }

    /* Byteswap offsets */
+    qed_acquire(s);
    for (i = 0; i < noffsets; i++) {
        table->offsets[i] = le64_to_cpu(table->offsets[i]);
    }
+    qed_release(s);

 out:
    /* Completion */
-    trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
+    trace_qed_read_table_cb(s, read_table_cb->table, ret);
    gencb_complete(&read_table_cb->gencb, ret);
 }

@@ -84,8 +87,9 @@ typedef struct {
 static void qed_write_table_cb(void *opaque, int ret)
 {
    QEDWriteTableCB *write_table_cb = opaque;
+    BDRVQEDState *s = write_table_cb->s;

-    trace_qed_write_table_cb(write_table_cb->s,
+    trace_qed_write_table_cb(s,
                             write_table_cb->orig_table,
                             write_table_cb->flush,
                             ret);
@@ -97,8 +101,10 @@ static void qed_write_table_cb(void *opaque, int ret)
    if (write_table_cb->flush) {
        /* We still need to flush first */
        write_table_cb->flush = false;
+        qed_acquire(s);
        bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
                       write_table_cb);
+        qed_release(s);
        return;
    }

@@ -213,6 +219,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
    CachedL2Table *l2_table = request->l2_table;
    uint64_t l2_offset = read_l2_table_cb->l2_offset;

+    qed_acquire(s);
    if (ret) {
        /* can't trust loaded L2 table anymore */
        qed_unref_l2_cache_entry(l2_table);
@@ -228,6 +235,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
        request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
        assert(request->l2_table != NULL);
    }
+    qed_release(s);

    gencb_complete(&read_l2_table_cb->gencb, ret);
 }
--- a/block/qed.c
+++ b/block/qed.c
@@ -273,7 +273,19 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
    return l2_table;
 }

-static void qed_aio_next_io(void *opaque, int ret);
+static void qed_aio_next_io(QEDAIOCB *acb, int ret);
+
+static void qed_aio_start_io(QEDAIOCB *acb)
+{
+    qed_aio_next_io(acb, 0);
+}
+
+static void qed_aio_next_io_cb(void *opaque, int ret)
+{
+    QEDAIOCB *acb = opaque;
+
+    qed_aio_next_io(acb, ret);
+}

 static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
 {
@@ -292,7 +304,7 @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)

    acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
    if (acb) {
-        qed_aio_next_io(acb, 0);
+        qed_aio_start_io(acb);
    }
 }

@@ -333,10 +345,22 @@ static void qed_need_check_timer_cb(void *opaque)

    trace_qed_need_check_timer_cb(s);

+    qed_acquire(s);
    qed_plug_allocating_write_reqs(s);

    /* Ensure writes are on disk before clearing flag */
    bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
+    qed_release(s);
+}
+
+void qed_acquire(BDRVQEDState *s)
+{
+    aio_context_acquire(bdrv_get_aio_context(s->bs));
+}
+
+void qed_release(BDRVQEDState *s)
+{
+    aio_context_release(bdrv_get_aio_context(s->bs));
 }

 static void qed_start_need_check_timer(BDRVQEDState *s)
@@ -721,7 +745,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
    }

    if (cb->co) {
-        qemu_coroutine_enter(cb->co);
+        aio_co_wake(cb->co);
    }
 }

@@ -918,6 +942,7 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
 static void qed_aio_complete_bh(void *opaque)
 {
    QEDAIOCB *acb = opaque;
+    BDRVQEDState *s = acb_to_s(acb);
    BlockCompletionFunc *cb = acb->common.cb;
    void *user_opaque = acb->common.opaque;
    int ret = acb->bh_ret;
@@ -925,7 +950,9 @@ static void qed_aio_complete_bh(void *opaque)
    qemu_aio_unref(acb);

    /* Invoke callback */
+    qed_acquire(s);
    cb(user_opaque, ret);
+    qed_release(s);
 }

 static void qed_aio_complete(QEDAIOCB *acb, int ret)
@@ -959,7 +986,7 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
        QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
        acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
        if (acb) {
-            qed_aio_next_io(acb, 0);
+            qed_aio_start_io(acb);
        } else if (s->header.features & QED_F_NEED_CHECK) {
            qed_start_need_check_timer(s);
        }
@@ -984,7 +1011,7 @@ static void qed_commit_l2_update(void *opaque, int ret)
    acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
    assert(acb->request.l2_table != NULL);

-    qed_aio_next_io(opaque, ret);
+    qed_aio_next_io(acb, ret);
 }

 /**
@@ -1032,11 +1059,11 @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
    if (need_alloc) {
        /* Write out the whole new L2 table */
        qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
-                            qed_aio_write_l1_update, acb);
+                           qed_aio_write_l1_update, acb);
    } else {
        /* Write out only the updated part of the L2 table */
        qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
-                            qed_aio_next_io, acb);
+                           qed_aio_next_io_cb, acb);
    }
    return;

@@ -1088,7 +1115,7 @@ static void qed_aio_write_main(void *opaque, int ret)
    }

    if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
-        next_fn = qed_aio_next_io;
+        next_fn = qed_aio_next_io_cb;
    } else {
        if (s->bs->backing) {
            next_fn = qed_aio_write_flush_before_l2_update;
@@ -1201,7 +1228,7 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
    if (acb->flags & QED_AIOCB_ZERO) {
        /* Skip ahead if the clusters are already zero */
        if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
-            qed_aio_next_io(acb, 0);
+            qed_aio_start_io(acb);
            return;
        }

@@ -1321,18 +1348,18 @@ static void qed_aio_read_data(void *opaque, int ret,
    /* Handle zero cluster and backing file reads */
    if (ret == QED_CLUSTER_ZERO) {
        qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
-        qed_aio_next_io(acb, 0);
+        qed_aio_start_io(acb);
        return;
    } else if (ret != QED_CLUSTER_FOUND) {
        qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
-                              &acb->backing_qiov, qed_aio_next_io, acb);
+                              &acb->backing_qiov, qed_aio_next_io_cb, acb);
        return;
    }

    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
    bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
                   &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
-                   qed_aio_next_io, acb);
+                   qed_aio_next_io_cb, acb);
    return;

 err:
@@ -1342,9 +1369,8 @@ err:
 /**
 * Begin next I/O or complete the request
 */
-static void qed_aio_next_io(void *opaque, int ret)
+static void qed_aio_next_io(QEDAIOCB *acb, int ret)
 {
-    QEDAIOCB *acb = opaque;
    BDRVQEDState *s = acb_to_s(acb);
    QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
                                qed_aio_write_data : qed_aio_read_data;
@@ -1400,7 +1426,7 @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
    qemu_iovec_init(&acb->cur_qiov, qiov->niov);

    /* Start request */
-    qed_aio_next_io(acb, 0);
+    qed_aio_start_io(acb);
    return &acb->common;
 }

@@ -1436,7 +1462,7 @@ static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
    cb->done = true;
    cb->ret = ret;
    if (cb->co) {
-        qemu_coroutine_enter(cb->co);
+        aio_co_wake(cb->co);
    }
 }

--- a/block/qed.h
+++ b/block/qed.h
@@ -198,6 +198,9 @@ enum {
 */
 typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);

+void qed_acquire(BDRVQEDState *s);
+void qed_release(BDRVQEDState *s);
+
 /**
 * Generic callback for chaining async callbacks
 */
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -486,7 +486,7 @@ static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
 retry:
    QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
        if (AIOCBOverlapping(acb, cb)) {
-            qemu_co_queue_wait(&s->overlapping_queue);
+            qemu_co_queue_wait(&s->overlapping_queue, NULL);
            goto retry;
        }
    }
@@ -575,13 +575,6 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
    return ret;
 }

-static void restart_co_req(void *opaque)
-{
-    Coroutine *co = opaque;
-
-    qemu_coroutine_enter(co);
-}
-
 typedef struct SheepdogReqCo {
    int sockfd;
    BlockDriverState *bs;
@@ -592,12 +585,19 @@ typedef struct SheepdogReqCo {
    unsigned int *rlen;
    int ret;
    bool finished;
+    Coroutine *co;
 } SheepdogReqCo;

+static void restart_co_req(void *opaque)
+{
+    SheepdogReqCo *srco = opaque;
+
+    aio_co_wake(srco->co);
+}
+
 static coroutine_fn void do_co_req(void *opaque)
 {
    int ret;
-    Coroutine *co;
    SheepdogReqCo *srco = opaque;
    int sockfd = srco->sockfd;
    SheepdogReq *hdr = srco->hdr;
@@ -605,9 +605,9 @@ static coroutine_fn void do_co_req(void *opaque)
    unsigned int *wlen = srco->wlen;
    unsigned int *rlen = srco->rlen;

-    co = qemu_coroutine_self();
+    srco->co = qemu_coroutine_self();
    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       NULL, restart_co_req, NULL, co);
+                       NULL, restart_co_req, NULL, srco);

    ret = send_co_req(sockfd, hdr, data, wlen);
    if (ret < 0) {
@@ -615,7 +615,7 @@ static coroutine_fn void do_co_req(void *opaque)
    }

    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       restart_co_req, NULL, NULL, co);
+                       restart_co_req, NULL, NULL, srco);

    ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
    if (ret != sizeof(*hdr)) {
@@ -643,6 +643,7 @@ out:
    aio_set_fd_handler(srco->aio_context, sockfd, false,
                       NULL, NULL, NULL, NULL);

+    srco->co = NULL;
    srco->ret = ret;
    srco->finished = true;
    if (srco->bs) {
@@ -866,7 +867,7 @@ static void coroutine_fn aio_read_response(void *opaque)
         * We've finished all requests which belong to the AIOCB, so
         * we can switch back to sd_co_readv/writev now.
         */
-        qemu_coroutine_enter(acb->coroutine);
+        aio_co_wake(acb->coroutine);
    }

    return;
@@ -883,14 +884,14 @@ static void co_read_response(void *opaque)
        s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
    }

-    qemu_coroutine_enter(s->co_recv);
+    aio_co_wake(s->co_recv);
 }

 static void co_write_request(void *opaque)
 {
    BDRVSheepdogState *s = opaque;

-    qemu_coroutine_enter(s->co_send);
+    aio_co_wake(s->co_send);
 }

 /*
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -889,10 +889,14 @@ static void restart_coroutine(void *opaque)

    DPRINTF("co=%p", co);

-    qemu_coroutine_enter(co);
+    aio_co_wake(co);
 }

-static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
+/* A non-blocking call returned EAGAIN, so yield, ensuring the
+ * handlers are set up so that we'll be rescheduled when there is an
+ * interesting event on the socket.
+ */
+static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
 {
    int r;
    IOHandler *rd_handler = NULL, *wr_handler = NULL;
@@ -912,25 +916,10 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)

    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
                       false, rd_handler, wr_handler, NULL, co);
-}
-
-static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
-                                          BlockDriverState *bs)
-{
-    DPRINTF("s->sock=%d", s->sock);
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, NULL, NULL, NULL, NULL);
-}
-
-/* A non-blocking call returned EAGAIN, so yield, ensuring the
- * handlers are set up so that we'll be rescheduled when there is an
- * interesting event on the socket.
- */
-static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
-{
-    set_fd_handler(s, bs);
    qemu_coroutine_yield();
-    clear_fd_handler(s, bs);
+    DPRINTF("s->sock=%d - back", s->sock);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
+                       NULL, NULL, NULL, NULL);
 }

 /* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -326,7 +326,7 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
    if (must_wait || blkp->pending_reqs[is_write]) {
        blkp->pending_reqs[is_write]++;
        qemu_mutex_unlock(&tg->lock);
-        qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
+        qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
        qemu_mutex_lock(&tg->lock);
        blkp->pending_reqs[is_write]--;
    }
@@ -416,7 +416,9 @@ static void timer_cb(BlockBackend *blk, bool is_write)
    qemu_mutex_unlock(&tg->lock);

    /* Run the request that was waiting for this timer */
+    aio_context_acquire(blk_get_aio_context(blk));
    empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
+    aio_context_release(blk_get_aio_context(blk));

    /* If the request queue was empty then we have to take care of
     * scheduling the next one */
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -41,7 +41,7 @@ struct QEMUWin32AIOState {
    HANDLE hIOCP;
    EventNotifier e;
    int count;
-    bool is_aio_context_attached;
+    AioContext *aio_ctx;
 };

 typedef struct QEMUWin32AIOCB {
@@ -87,7 +87,6 @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
        qemu_vfree(waiocb->buf);
    }

-
    waiocb->common.cb(waiocb->common.opaque, ret);
    qemu_aio_unref(waiocb);
 }
@@ -176,13 +175,13 @@ void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
                                  AioContext *old_context)
 {
    aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL);
-    aio->is_aio_context_attached = false;
+    aio->aio_ctx = NULL;
 }

 void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
                                  AioContext *new_context)
 {
-    aio->is_aio_context_attached = true;
+    aio->aio_ctx = new_context;
    aio_set_event_notifier(new_context, &aio->e, false,
                           win32_aio_completion_cb, NULL);
 }
@@ -212,7 +211,7 @@ out_free_state:

 void win32_aio_cleanup(QEMUWin32AIOState *aio)
 {
-    assert(!aio->is_aio_context_attached);
+    assert(!aio->aio_ctx);
    CloseHandle(aio->hIOCP);
    event_notifier_cleanup(&aio->e);
    g_free(aio);
--- a/blockdev.c
+++ b/blockdev.c
@@ -227,27 +227,30 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
    return NULL;
 }

-bool drive_check_orphaned(void)
+void drive_check_orphaned(void)
 {
    BlockBackend *blk;
    DriveInfo *dinfo;
-    bool rs = false;
+    Location loc;
+    bool orphans = false;

    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
        dinfo = blk_legacy_dinfo(blk);
-        /* If dinfo->bdrv->dev is NULL, it has no device attached. */
-        /* Unless this is a default drive, this may be an oversight. */
        if (!blk_get_attached_dev(blk) && !dinfo->is_default &&
            dinfo->type != IF_NONE) {
-            fprintf(stderr, "Warning: Orphaned drive without device: "
-                    "id=%s,file=%s,if=%s,bus=%d,unit=%d\n",
-                    blk_name(blk), blk_bs(blk) ? blk_bs(blk)->filename : "",
-                    if_name[dinfo->type], dinfo->bus, dinfo->unit);
-            rs = true;
+            loc_push_none(&loc);
+            qemu_opts_loc_restore(dinfo->opts);
+            error_report("machine type does not support"
+                         " if=%s,bus=%d,unit=%d",
+                         if_name[dinfo->type], dinfo->bus, dinfo->unit);
+            loc_pop(&loc);
+            orphans = true;
        }
    }

-    return rs;
+    if (orphans) {
+        exit(1);
+    }
 }

 DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
--- a/8
+++ b/8
@@ -3378,7 +3378,7 @@ fi
 fdt_required=no
 for target in $target_list; do
  case $target in
-    aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu)
+    aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu|mips64el-softmmu)
      fdt_required=yes
    ;;
  esac
@@ -3396,11 +3396,11 @@ fi
 if test "$fdt" != "no" ; then
  fdt_libs="-lfdt"
  # explicitly check for libfdt_env.h as it is missing in some stable installs
-  # and test for required functions to make sure we are on a version >= 1.4.0
+  # and test for required functions to make sure we are on a version >= 1.4.2
  cat > $TMPC << EOF
 #include <libfdt.h>
 #include <libfdt_env.h>
-int main(void) { fdt_get_property_by_offset(0, 0, 0); return 0; }
+int main(void) { fdt_first_subnode(0, 0); return 0; }
 EOF
  if compile_prog "" "$fdt_libs" ; then
    # system DTC is good - use it
@@ -3418,7 +3418,7 @@ EOF
    fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs"
  elif test "$fdt" = "yes" ; then
    # have neither and want - prompt for system/submodule install
-    error_exit "DTC (libfdt) version >= 1.4.0 not present. Your options:" \
+    error_exit "DTC (libfdt) version >= 1.4.2 not present. Your options:" \
        "  (1) Preferred: Install the DTC (libfdt) devel package" \
        "  (2) Fetch the DTC submodule, using:" \
        "      git submodule update --init dtc"
--- a/default-configs/mips64el-softmmu.mak
+++ b/default-configs/mips64el-softmmu.mak
@@ -10,3 +10,6 @@ CONFIG_JAZZ=y
 CONFIG_G364FB=y
 CONFIG_JAZZ_LED=y
 CONFIG_VT82C686=y
+CONFIG_MIPS_BOSTON=y
+CONFIG_FITLOADER=y
+CONFIG_PCI_XILINX=y
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -166,8 +166,10 @@ static void dma_blk_cb(void *opaque, int ret)
                                QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
    }

+    aio_context_acquire(dbs->ctx);
    dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
                            dma_blk_cb, dbs, dbs->io_func_opaque);
+    aio_context_release(dbs->ctx);
    assert(dbs->acb);
 }

--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -0,0 +1,124 @@
+QEMU Virtual NVDIMM
+===================
+
+This document explains the usage of virtual NVDIMM (vNVDIMM) feature
+which is available since QEMU v2.6.0.
+
+The current QEMU only implements the persistent memory mode of vNVDIMM
+device and not the block window mode.
+
+Basic Usage
+-----------
+
+The storage of a vNVDIMM device in QEMU is provided by the memory
+backend (i.e. memory-backend-file and memory-backend-ram). A simple
+way to create a vNVDIMM device at startup time is done via the
+following command line options:
+
+ -machine pc,nvdimm
+ -m $RAM_SIZE,slots=$N,maxmem=$MAX_SIZE
+ -object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE
+ -device nvdimm,id=nvdimm1,memdev=mem1
+
+Where,
+
+ - the "nvdimm" machine option enables vNVDIMM feature.
+
+ - "slots=$N" should be equal to or larger than the total amount of
+   normal RAM devices and vNVDIMM devices, e.g. $N should be >= 2 here.
+
+ - "maxmem=$MAX_SIZE" should be equal to or larger than the total size
+   of normal RAM devices and vNVDIMM devices, e.g. $MAX_SIZE should be
+   >= $RAM_SIZE + $NVDIMM_SIZE here.
+
+ - "object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE"
+   creates a backend storage of size $NVDIMM_SIZE on a file $PATH. All
+   accesses to the virtual NVDIMM device go to the file $PATH.
+
+   "share=on/off" controls the visibility of guest writes. If
+   "share=on", then guest writes will be applied to the backend
+   file. If another guest uses the same backend file with option
+   "share=on", then above writes will be visible to it as well. If
+   "share=off", then guest writes won't be applied to the backend
+   file and thus will be invisible to other guests.
+
+ - "device nvdimm,id=nvdimm1,memdev=mem1" creates a virtual NVDIMM
+   device whose storage is provided by above memory backend device.
+
+Multiple vNVDIMM devices can be created if multiple pairs of "-object"
+and "-device" are provided.
+
+For above command line options, if the guest OS has the proper NVDIMM
+driver, it should be able to detect a NVDIMM device which is in the
+persistent memory mode and whose size is $NVDIMM_SIZE.
+
+Note:
+
+1. Prior to QEMU v2.8.0, if memory-backend-file is used and the actual
+   backend file size is not equal to the size given by "size" option,
+   QEMU will truncate the backend file by ftruncate(2), which will
+   corrupt the existing data in the backend file, especially for the
+   shrink case.
+
+   QEMU v2.8.0 and later check the backend file size and the "size"
+   option. If they do not match, QEMU will report errors and abort in
+   order to avoid the data corruption.
+
+2. QEMU v2.6.0 only puts a basic alignment requirement on the "size"
+   option of memory-backend-file, e.g. 4KB alignment on x86.  However,
+   QEMU v.2.7.0 puts an additional alignment requirement, which may
+   require a larger value than the basic one, e.g. 2MB on x86. This
+   change breaks the usage of memory-backend-file that only satisfies
+   the basic alignment.
+
+   QEMU v2.8.0 and later remove the additional alignment on non-s390x
+   architectures, so the broken memory-backend-file can work again.
+
+Label
+-----
+
+QEMU v2.7.0 and later implement the label support for vNVDIMM devices.
+To enable label on vNVDIMM devices, users can simply add
+"label-size=$SZ" option to "-device nvdimm", e.g.
+
+ -device nvdimm,id=nvdimm1,memdev=mem1,label-size=128K
+
+Note:
+
+1. The minimal label size is 128KB.
+
+2. QEMU v2.7.0 and later store labels at the end of backend storage.
+   If a memory backend file, which was previously used as the backend
+   of a vNVDIMM device without labels, is now used for a vNVDIMM
+   device with label, the data in the label area at the end of file
+   will be inaccessible to the guest. If any useful data (e.g. the
+   meta-data of the file system) was stored there, the latter usage
+   may result guest data corruption (e.g. breakage of guest file
+   system).
+
+Hotplug
+-------
+
+QEMU v2.8.0 and later implement the hotplug support for vNVDIMM
+devices. Similarly to the RAM hotplug, the vNVDIMM hotplug is
+accomplished by two monitor commands "object_add" and "device_add".
+
+For example, the following commands add another 4GB vNVDIMM device to
+the guest:
+
+ (qemu) object_add memory-backend-file,id=mem2,share=on,mem-path=new_nvdimm.img,size=4G
+ (qemu) device_add nvdimm,id=nvdimm2,memdev=mem2
+
+Note:
+
+1. Each hotplugged vNVDIMM device consumes one memory slot. Users
+   should always ensure the memory option "-m ...,slots=N" specifies
+   enough number of slots, i.e.
+     N >= number of RAM devices +
+          number of statically plugged vNVDIMM devices +
+          number of hotplugged vNVDIMM devices
+
+2. The similar is required for the memory option "-m ...,maxmem=M", i.e.
+     M >= size of RAM devices +
+          size of statically plugged vNVDIMM devices +
+          size of hotplugged vNVDIMM devices
--- a/docs/qemu-ga-ref.texi
+++ b/docs/qemu-ga-ref.texi
@@ -1,6 +1,8 @@
 \input texinfo
@setfilename qemu-ga-ref.info

+@include version.texi
+
@exampleindent 0
@paragraphindent 0

--- a/docs/qemu-qmp-ref.texi
+++ b/docs/qemu-qmp-ref.texi
@@ -1,6 +1,8 @@
 \input texinfo
@setfilename qemu-qmp-ref.info

+@include version.texi
+
@exampleindent 0
@paragraphindent 0

--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -61,6 +61,7 @@ PCI devices (other than virtio):
 1b36:0009  PCI Expander Bridge (-device pxb)
 1b36:000a  PCI-PCI bridge (multiseat)
 1b36:000b  PCIe Expander Bridge (-device pxb-pcie)
+1b36:000d  PCI xhci usb host adapter

 All these devices are documented in docs/specs.

--- a/2
+++ b/2
--- a/exec.c
+++ b/exec.c
@@ -3166,6 +3166,7 @@ void address_space_cache_destroy(MemoryRegionCache *cache)
        xen_invalidate_map_cache_entry(cache->ptr);
    }
    memory_region_unref(cache->mr);
+    cache->mr = NULL;
 }

 /* Called from RCU critical section.  This function has the same
--- a/hmp.c
+++ b/hmp.c
@@ -1014,8 +1014,14 @@ void hmp_memsave(Monitor *mon, const QDict *qdict)
    const char *filename = qdict_get_str(qdict, "filename");
    uint64_t addr = qdict_get_int(qdict, "val");
    Error *err = NULL;
+    int cpu_index = monitor_get_cpu_index();

-    qmp_memsave(addr, size, filename, true, monitor_get_cpu_index(), &err);
+    if (cpu_index < 0) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
+
+    qmp_memsave(addr, size, filename, true, cpu_index, &err);
    hmp_handle_error(mon, &err);
 }

@@ -1552,6 +1558,7 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
 {
    Error *err = NULL;
    BlockIOThrottle throttle = {
+        .has_device = true,
        .device = (char *) qdict_get_str(qdict, "device"),
        .bps = qdict_get_int(qdict, "bps"),
        .bps_rd = qdict_get_int(qdict, "bps_rd"),
@@ -2148,10 +2155,15 @@ void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
 {
    IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
    IOThreadInfoList *info;
+    IOThreadInfo *value;

    for (info = info_list; info; info = info->next) {
-        monitor_printf(mon, "%s: thread_id=%" PRId64 "\n",
-                       info->value->id, info->value->thread_id);
+        value = info->value;
+        monitor_printf(mon, "%s:\n", value->id);
+        monitor_printf(mon, "  thread_id=%" PRId64 "\n", value->thread_id);
+        monitor_printf(mon, "  poll-max-ns=%" PRId64 "\n", value->poll_max_ns);
+        monitor_printf(mon, "  poll-grow=%" PRId64 "\n", value->poll_grow);
+        monitor_printf(mon, "  poll-shrink=%" PRId64 "\n", value->poll_shrink);
    }

    qapi_free_IOThreadInfoList(info_list);
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -2374,7 +2374,7 @@ static void coroutine_fn v9fs_flush(void *opaque)
        /*
         * Wait for pdu to complete.
         */
-        qemu_co_queue_wait(&cancel_pdu->complete);
+        qemu_co_queue_wait(&cancel_pdu->complete, NULL);
        cancel_pdu->cancelled = 0;
        pdu_free(cancel_pdu);
    }
--- a/hw/alpha/dp264.c
+++ b/hw/alpha/dp264.c
@@ -177,6 +177,7 @@ static void clipper_machine_init(MachineClass *mc)
 {
    mc->desc = "Alpha DP264/CLIPPER";
    mc->init = clipper_init;
+    mc->block_default_type = IF_IDE;
    mc->max_cpus = 4;
    mc->is_default = 1;
 }
--- a/hw/arm/cubieboard.c
+++ b/hw/arm/cubieboard.c
@@ -71,6 +71,8 @@ static void cubieboard_init(MachineState *machine)
    memory_region_add_subregion(get_system_memory(), AW_A10_SDRAM_BASE,
                                &s->sdram);

+    /* TODO create and connect IDE devices for ide_drive_get() */
+
    cubieboard_binfo.ram_size = machine->ram_size;
    cubieboard_binfo.kernel_filename = machine->kernel_filename;
    cubieboard_binfo.kernel_cmdline = machine->kernel_cmdline;
@@ -82,6 +84,8 @@ static void cubieboard_machine_init(MachineClass *mc)
 {
    mc->desc = "cubietech cubieboard";
    mc->init = cubieboard_init;
+    mc->block_default_type = IF_IDE;
+    mc->units_per_default_bus = 1;
 }

 DEFINE_MACHINE("cubieboard", cubieboard_machine_init)
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -363,6 +363,8 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id)
        sysbus_connect_irq(SYS_BUS_DEVICE(dev), 2, pic[82]);
    }

+    /* TODO create and connect IDE devices for ide_drive_get() */
+
    highbank_binfo.ram_size = ram_size;
    highbank_binfo.kernel_filename = kernel_filename;
    highbank_binfo.kernel_cmdline = kernel_cmdline;
@@ -405,7 +407,8 @@ static void highbank_class_init(ObjectClass *oc, void *data)

    mc->desc = "Calxeda Highbank (ECX-1000)";
    mc->init = highbank_init;
-    mc->block_default_type = IF_SCSI;
+    mc->block_default_type = IF_IDE;
+    mc->units_per_default_bus = 1;
    mc->max_cpus = 4;
 }

@@ -421,7 +424,8 @@ static void midway_class_init(ObjectClass *oc, void *data)

    mc->desc = "Calxeda Midway (ECX-2000)";
    mc->init = midway_init;
-    mc->block_default_type = IF_SCSI;
+    mc->block_default_type = IF_IDE;
+    mc->units_per_default_bus = 1;
    mc->max_cpus = 4;
 }

--- a/hw/arm/realview.c
+++ b/hw/arm/realview.c
@@ -259,7 +259,7 @@ static void realview_init(MachineState *machine,
        }
        n = drive_get_max_bus(IF_SCSI);
        while (n >= 0) {
-            pci_create_simple(pci_bus, -1, "lsi53c895a");
+            lsi53c895a_create(pci_bus);
            n--;
        }
    }
@@ -443,7 +443,6 @@ static void realview_pbx_a9_class_init(ObjectClass *oc, void *data)

    mc->desc = "ARM RealView Platform Baseboard Explore for Cortex-A9";
    mc->init = realview_pbx_a9_init;
-    mc->block_default_type = IF_SCSI;
    mc->max_cpus = 4;
 }

--- a/hw/arm/spitz.c
+++ b/hw/arm/spitz.c
@@ -998,6 +998,7 @@ static void spitzpda_class_init(ObjectClass *oc, void *data)

    mc->desc = "Sharp SL-C3000 (Spitz) PDA (PXA270)";
    mc->init = spitz_init;
+    mc->block_default_type = IF_IDE;
 }

 static const TypeInfo spitzpda_type = {
@@ -1012,6 +1013,7 @@ static void borzoipda_class_init(ObjectClass *oc, void *data)

    mc->desc = "Sharp SL-C3100 (Borzoi) PDA (PXA270)";
    mc->init = borzoi_init;
+    mc->block_default_type = IF_IDE;
 }

 static const TypeInfo borzoipda_type = {
@@ -1026,6 +1028,7 @@ static void terrierpda_class_init(ObjectClass *oc, void *data)

    mc->desc = "Sharp SL-C3200 (Terrier) PDA (PXA270)";
    mc->init = terrier_init;
+    mc->block_default_type = IF_IDE;
 }

 static const TypeInfo terrierpda_type = {
--- a/hw/arm/tosa.c
+++ b/hw/arm/tosa.c
@@ -263,6 +263,7 @@ static void tosapda_machine_init(MachineClass *mc)
 {
    mc->desc = "Sharp SL-6000 (Tosa) PDA (PXA255)";
    mc->init = tosa_init;
+    mc->block_default_type = IF_IDE;
 }

 DEFINE_MACHINE("tosa", tosapda_machine_init)
--- a/hw/arm/versatilepb.c
+++ b/hw/arm/versatilepb.c
@@ -290,7 +290,7 @@ static void versatile_init(MachineState *machine, int board_id)
    }
    n = drive_get_max_bus(IF_SCSI);
    while (n >= 0) {
-        pci_create_simple(pci_bus, -1, "lsi53c895a");
+        lsi53c895a_create(pci_bus);
        n--;
    }

--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -752,7 +752,6 @@ static void vexpress_class_init(ObjectClass *oc, void *data)

    mc->desc = "ARM Versatile Express";
    mc->init = vexpress_common_init;
-    mc->block_default_type = IF_SCSI;
    mc->max_cpus = 4;
 }

--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -323,7 +323,6 @@ static void zynq_machine_init(MachineClass *mc)
 {
    mc->desc = "Xilinx Zynq Platform Baseboard for Cortex-A9";
    mc->init = zynq_init;
-    mc->block_default_type = IF_SCSI;
    mc->max_cpus = 1;
    mc->no_sdcard = 1;
 }
--- a/hw/arm/xlnx-ep108.c
+++ b/hw/arm/xlnx-ep108.c
@@ -106,6 +106,8 @@ static void xlnx_ep108_init(MachineState *machine)
        sysbus_connect_irq(SYS_BUS_DEVICE(&s->soc.spi[i]), 1, cs_line);
    }

+    /* TODO create and connect IDE devices for ide_drive_get() */
+
    xlnx_ep108_binfo.ram_size = ram_size;
    xlnx_ep108_binfo.kernel_filename = machine->kernel_filename;
    xlnx_ep108_binfo.kernel_cmdline = machine->kernel_cmdline;
@@ -118,6 +120,8 @@ static void xlnx_ep108_machine_init(MachineClass *mc)
 {
    mc->desc = "Xilinx ZynqMP EP108 board";
    mc->init = xlnx_ep108_init;
+    mc->block_default_type = IF_IDE;
+    mc->units_per_default_bus = 1;
 }

 DEFINE_MACHINE("xlnx-ep108", xlnx_ep108_machine_init)
@@ -126,6 +130,8 @@ static void xlnx_zcu102_machine_init(MachineClass *mc)
 {
    mc->desc = "Xilinx ZynqMP ZCU102 board";
    mc->init = xlnx_ep108_init;
+    mc->block_default_type = IF_IDE;
+    mc->units_per_default_bus = 1;
 }

 DEFINE_MACHINE("xlnx-zcu102", xlnx_zcu102_machine_init)
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -147,7 +147,7 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
    g_free(s);
 }

-static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
+static bool virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
                                                VirtQueue *vq)
 {
    VirtIOBlock *s = (VirtIOBlock *)vdev;
@@ -155,7 +155,7 @@ static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
    assert(s->dataplane);
    assert(s->dataplane_started);

-    virtio_blk_handle_vq(s, vq);
+    return virtio_blk_handle_vq(s, vq);
 }

 /* Context: QEMU global mutex held */
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -89,7 +89,9 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
 static void virtio_blk_rw_complete(void *opaque, int ret)
 {
    VirtIOBlockReq *next = opaque;
+    VirtIOBlock *s = next->dev;

+    aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
    while (next) {
        VirtIOBlockReq *req = next;
        next = req->mr_next;
@@ -122,21 +124,27 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
        block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
        virtio_blk_free_request(req);
    }
+    aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }

 static void virtio_blk_flush_complete(void *opaque, int ret)
 {
    VirtIOBlockReq *req = opaque;
+    VirtIOBlock *s = req->dev;

+    aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
    if (ret) {
        if (virtio_blk_handle_rw_error(req, -ret, 0)) {
-            return;
+            goto out;
        }
    }

    virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
    block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
    virtio_blk_free_request(req);
+
+out:
+    aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }

 #ifdef __linux__
@@ -150,7 +158,8 @@ static void virtio_blk_ioctl_complete(void *opaque, int status)
 {
    VirtIOBlockIoctlReq *ioctl_req = opaque;
    VirtIOBlockReq *req = ioctl_req->req;
-    VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
+    VirtIOBlock *s = req->dev;
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
    struct virtio_scsi_inhdr *scsi;
    struct sg_io_hdr *hdr;

@@ -182,8 +191,10 @@ static void virtio_blk_ioctl_complete(void *opaque, int status)
    virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len);

 out:
+    aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
    virtio_blk_req_complete(req, status);
    virtio_blk_free_request(req);
+    aio_context_release(blk_get_aio_context(s->conf.conf.blk));
    g_free(ioctl_req);
 }

@@ -581,17 +592,20 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
    return 0;
 }

-void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
+bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
 {
    VirtIOBlockReq *req;
    MultiReqBuffer mrb = {};
+    bool progress = false;

+    aio_context_acquire(blk_get_aio_context(s->blk));
    blk_io_plug(s->blk);

    do {
        virtio_queue_set_notification(vq, 0);

        while ((req = virtio_blk_get_request(s, vq))) {
+            progress = true;
            if (virtio_blk_handle_request(req, &mrb)) {
                virtqueue_detach_element(req->vq, &req->elem, 0);
                virtio_blk_free_request(req);
@@ -607,6 +621,13 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
    }

    blk_io_unplug(s->blk);
+    aio_context_release(blk_get_aio_context(s->blk));
+    return progress;
+}
+
+static void virtio_blk_handle_output_do(VirtIOBlock *s, VirtQueue *vq)
+{
+    virtio_blk_handle_vq(s, vq);
 }

 static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
@@ -622,7 +643,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
            return;
        }
    }
-    virtio_blk_handle_vq(s, vq);
+    virtio_blk_handle_output_do(s, vq);
 }

 static void virtio_blk_dma_restart_bh(void *opaque)
@@ -636,6 +657,7 @@ static void virtio_blk_dma_restart_bh(void *opaque)

    s->rq = NULL;

+    aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
    while (req) {
        VirtIOBlockReq *next = req->next;
        if (virtio_blk_handle_request(req, &mrb)) {
@@ -656,6 +678,7 @@ static void virtio_blk_dma_restart_bh(void *opaque)
    if (mrb.num_reqs) {
        virtio_blk_submit_multireq(s->blk, &mrb);
    }
+    aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }

 static void virtio_blk_dma_restart_cb(void *opaque, int running,
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -13,6 +13,7 @@ common-obj-$(CONFIG_PTIMER) += ptimer.o
 common-obj-$(CONFIG_SOFTMMU) += sysbus.o
 common-obj-$(CONFIG_SOFTMMU) += machine.o
 common-obj-$(CONFIG_SOFTMMU) += loader.o
+common-obj-$(CONFIG_FITLOADER) += loader-fit.o
 common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o
 common-obj-$(CONFIG_SOFTMMU) += register.o
 common-obj-$(CONFIG_SOFTMMU) += or-irq.o
--- a/hw/core/loader-fit.c
+++ b/hw/core/loader-fit.c
@@ -0,0 +1,325 @@
+/*
+ * Flattened Image Tree loader.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "hw/loader.h"
+#include "hw/loader-fit.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/sysemu.h"
+
+#include <libfdt.h>
+#include <zlib.h>
+
+#define FIT_LOADER_MAX_PATH (128)
+
+static const void *fit_load_image_alloc(const void *itb, const char *name,
+                                        int *poff, size_t *psz)
+{
+    const void *data;
+    const char *comp;
+    void *uncomp_data;
+    char path[FIT_LOADER_MAX_PATH];
+    int off, sz;
+    ssize_t uncomp_len;
+
+    snprintf(path, sizeof(path), "/images/%s", name);
+
+    off = fdt_path_offset(itb, path);
+    if (off < 0) {
+        return NULL;
+    }
+    if (poff) {
+        *poff = off;
+    }
+
+    data = fdt_getprop(itb, off, "data", &sz);
+    if (!data) {
+        return NULL;
+    }
+
+    comp = fdt_getprop(itb, off, "compression", NULL);
+    if (!comp || !strcmp(comp, "none")) {
+        if (psz) {
+            *psz = sz;
+        }
+        uncomp_data = g_malloc(sz);
+        memmove(uncomp_data, data, sz);
+        return uncomp_data;
+    }
+
+    if (!strcmp(comp, "gzip")) {
+        uncomp_len = UBOOT_MAX_GUNZIP_BYTES;
+        uncomp_data = g_malloc(uncomp_len);
+
+        uncomp_len = gunzip(uncomp_data, uncomp_len, (void *) data, sz);
+        if (uncomp_len < 0) {
+            error_printf("unable to decompress %s image\n", name);
+            g_free(uncomp_data);
+            return NULL;
+        }
+
+        data = g_realloc(uncomp_data, uncomp_len);
+        if (psz) {
+            *psz = uncomp_len;
+        }
+        return data;
+    }
+
+    error_printf("unknown compression '%s'\n", comp);
+    return NULL;
+}
+
+static int fit_image_addr(const void *itb, int img, const char *name,
+                          hwaddr *addr)
+{
+    const void *prop;
+    int len;
+
+    prop = fdt_getprop(itb, img, name, &len);
+    if (!prop) {
+        return -ENOENT;
+    }
+
+    switch (len) {
+    case 4:
+        *addr = fdt32_to_cpu(*(fdt32_t *)prop);
+        return 0;
+    case 8:
+        *addr = fdt64_to_cpu(*(fdt64_t *)prop);
+        return 0;
+    default:
+        error_printf("invalid %s address length %d\n", name, len);
+        return -EINVAL;
+    }
+}
+
+static int fit_load_kernel(const struct fit_loader *ldr, const void *itb,
+                           int cfg, void *opaque, hwaddr *pend)
+{
+    const char *name;
+    const void *data;
+    const void *load_data;
+    hwaddr load_addr, entry_addr;
+    int img_off, err;
+    size_t sz;
+    int ret;
+
+    name = fdt_getprop(itb, cfg, "kernel", NULL);
+    if (!name) {
+        error_printf("no kernel specified by FIT configuration\n");
+        return -EINVAL;
+    }
+
+    load_data = data = fit_load_image_alloc(itb, name, &img_off, &sz);
+    if (!data) {
+        error_printf("unable to load kernel image from FIT\n");
+        return -EINVAL;
+    }
+
+    err = fit_image_addr(itb, img_off, "load", &load_addr);
+    if (err) {
+        error_printf("unable to read kernel load address from FIT\n");
+        ret = err;
+        goto out;
+    }
+
+    err = fit_image_addr(itb, img_off, "entry", &entry_addr);
+    if (err) {
+        error_printf("unable to read kernel entry address from FIT\n");
+        ret = err;
+        goto out;
+    }
+
+    if (ldr->kernel_filter) {
+        load_data = ldr->kernel_filter(opaque, data, &load_addr, &entry_addr);
+    }
+
+    if (pend) {
+        *pend = load_addr + sz;
+    }
+
+    load_addr = ldr->addr_to_phys(opaque, load_addr);
+    rom_add_blob_fixed(name, load_data, sz, load_addr);
+
+    ret = 0;
+out:
+    g_free((void *) data);
+    if (data != load_data) {
+        g_free((void *) load_data);
+    }
+    return ret;
+}
+
+static int fit_load_fdt(const struct fit_loader *ldr, const void *itb,
+                        int cfg, void *opaque, const void *match_data,
+                        hwaddr kernel_end)
+{
+    const char *name;
+    const void *data;
+    const void *load_data;
+    hwaddr load_addr;
+    int img_off, err;
+    size_t sz;
+    int ret;
+
+    name = fdt_getprop(itb, cfg, "fdt", NULL);
+    if (!name) {
+        return 0;
+    }
+
+    load_data = data = fit_load_image_alloc(itb, name, &img_off, &sz);
+    if (!data) {
+        error_printf("unable to load FDT image from FIT\n");
+        return -EINVAL;
+    }
+
+    err = fit_image_addr(itb, img_off, "load", &load_addr);
+    if (err == -ENOENT) {
+        load_addr = ROUND_UP(kernel_end, 64 * K_BYTE) + (10 * M_BYTE);
+    } else if (err) {
+        ret = err;
+        goto out;
+    }
+
+    if (ldr->fdt_filter) {
+        load_data = ldr->fdt_filter(opaque, data, match_data, &load_addr);
+    }
+
+    load_addr = ldr->addr_to_phys(opaque, load_addr);
+    sz = fdt_totalsize(load_data);
+    rom_add_blob_fixed(name, load_data, sz, load_addr);
+
+    ret = 0;
+out:
+    g_free((void *) data);
+    if (data != load_data) {
+        g_free((void *) load_data);
+    }
+    return ret;
+}
+
+static bool fit_cfg_compatible(const void *itb, int cfg, const char *compat)
+{
+    const void *fdt;
+    const char *fdt_name;
+    bool ret;
+
+    fdt_name = fdt_getprop(itb, cfg, "fdt", NULL);
+    if (!fdt_name) {
+        return false;
+    }
+
+    fdt = fit_load_image_alloc(itb, fdt_name, NULL, NULL);
+    if (!fdt) {
+        return false;
+    }
+
+    if (fdt_check_header(fdt)) {
+        ret = false;
+        goto out;
+    }
+
+    if (fdt_node_check_compatible(fdt, 0, compat)) {
+        ret = false;
+        goto out;
+    }
+
+    ret = true;
+out:
+    g_free((void *) fdt);
+    return ret;
+}
+
+int load_fit(const struct fit_loader *ldr, const char *filename, void *opaque)
+{
+    const struct fit_loader_match *match;
+    const void *itb, *match_data = NULL;
+    const char *def_cfg_name;
+    char path[FIT_LOADER_MAX_PATH];
+    int itb_size, configs, cfg_off, off, err;
+    hwaddr kernel_end;
+    int ret;
+
+    itb = load_device_tree(filename, &itb_size);
+    if (!itb) {
+        return -EINVAL;
+    }
+
+    configs = fdt_path_offset(itb, "/configurations");
+    if (configs < 0) {
+        ret = configs;
+        goto out;
+    }
+
+    cfg_off = -FDT_ERR_NOTFOUND;
+
+    if (ldr->matches) {
+        for (match = ldr->matches; match->compatible; match++) {
+            off = fdt_first_subnode(itb, configs);
+            while (off >= 0) {
+                if (fit_cfg_compatible(itb, off, match->compatible)) {
+                    cfg_off = off;
+                    match_data = match->data;
+                    break;
+                }
+
+                off = fdt_next_subnode(itb, off);
+            }
+
+            if (cfg_off >= 0) {
+                break;
+            }
+        }
+    }
+
+    if (cfg_off < 0) {
+        def_cfg_name = fdt_getprop(itb, configs, "default", NULL);
+        if (def_cfg_name) {
+            snprintf(path, sizeof(path), "/configurations/%s", def_cfg_name);
+            cfg_off = fdt_path_offset(itb, path);
+        }
+    }
+
+    if (cfg_off < 0) {
+        /* couldn't find a configuration to use */
+        ret = cfg_off;
+        goto out;
+    }
+
+    err = fit_load_kernel(ldr, itb, cfg_off, opaque, &kernel_end);
+    if (err) {
+        ret = err;
+        goto out;
+    }
+
+    err = fit_load_fdt(ldr, itb, cfg_off, opaque, match_data, kernel_end);
+    if (err) {
+        ret = err;
+        goto out;
+    }
+
+    ret = 0;
+out:
+    g_free((void *) itb);
+    return ret;
+}
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -527,12 +527,7 @@ static void zfree(void *x, void *addr)

 #define DEFLATED	8

-/* This is the usual maximum in uboot, so if a uImage overflows this, it would
- * overflow on real hardware too. */
-#define UBOOT_MAX_GUNZIP_BYTES (64 << 20)
-
-static ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src,
-                      size_t srclen)
+ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen)
 {
    z_stream s;
    ssize_t dstbytes;
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -900,6 +900,10 @@ static int cirrus_bitblt_cputovideo(CirrusVGAState * s)
 {
    int w;

+    if (blit_is_unsafe(s, true)) {
+        return 0;
+    }
+
    s->cirrus_blt_mode &= ~CIRRUS_BLTMODE_MEMSYSSRC;
    s->cirrus_srcptr = &s->cirrus_bltbuf[0];
    s->cirrus_srcptr_end = &s->cirrus_bltbuf[0];
@@ -925,6 +929,10 @@ static int cirrus_bitblt_cputovideo(CirrusVGAState * s)
 	}
        s->cirrus_srccounter = s->cirrus_blt_srcpitch * s->cirrus_blt_height;
    }
+
+    /* the blit_is_unsafe call above should catch this */
+    assert(s->cirrus_blt_srcpitch <= CIRRUS_BLTBUFSIZE);
+
    s->cirrus_srcptr = s->cirrus_bltbuf;
    s->cirrus_srcptr_end = s->cirrus_bltbuf + s->cirrus_blt_srcpitch;
    cirrus_update_memory_access(s);
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
 #include "sysemu/kvm.h"
 #include "hw/i386/apic_internal.h"
 #include "kvm_i386.h"
+#include "trace.h"

 /*#define DEBUG_INTEL_IOMMU*/
 #ifdef DEBUG_INTEL_IOMMU
@@ -167,6 +168,7 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
 /* The shift of an addr for a certain level of paging structure */
 static inline uint32_t vtd_slpt_level_shift(uint32_t level)
 {
+    assert(level != 0);
    return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
 }

@@ -259,11 +261,9 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
    uint64_t *key = g_malloc(sizeof(*key));
    uint64_t gfn = vtd_get_iotlb_gfn(addr, level);

-    VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
-                " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte,
-                domain_id);
+    trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
    if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
-        VTD_DPRINTF(CACHE, "iotlb exceeds size limit, forced to reset");
+        trace_vtd_iotlb_reset("iotlb exceeds size limit");
        vtd_reset_iotlb(s);
    }

@@ -474,22 +474,19 @@ static void vtd_handle_inv_queue_error(IntelIOMMUState *s)
 /* Set the IWC field and try to generate an invalidation completion interrupt */
 static void vtd_generate_completion_event(IntelIOMMUState *s)
 {
-    VTD_DPRINTF(INV, "completes an invalidation wait command with "
-                "Interrupt Flag");
    if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) {
-        VTD_DPRINTF(INV, "there is a previous interrupt condition to be "
-                    "serviced by software, "
-                    "new invalidation event is not generated");
+        trace_vtd_inv_desc_wait_irq("One pending, skip current");
        return;
    }
    vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC);
    vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP);
    if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) {
-        VTD_DPRINTF(INV, "IM filed in IECTL_REG is set, new invalidation "
-                    "event is not generated");
+        trace_vtd_inv_desc_wait_irq("IM in IECTL_REG is set, "
+                                    "new event not generated");
        return;
    } else {
        /* Generate the interrupt event */
+        trace_vtd_inv_desc_wait_irq("Generating complete event");
        vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
        vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
    }
@@ -507,8 +504,7 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,

    addr = s->root + index * sizeof(*re);
    if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) {
-        VTD_DPRINTF(GENERAL, "error: fail to access root-entry at 0x%"PRIx64
-                    " + %"PRIu8, s->root, index);
+        trace_vtd_re_invalid(re->rsvd, re->val);
        re->val = 0;
        return -VTD_FR_ROOT_TABLE_INV;
    }
@@ -526,15 +522,10 @@ static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index,
 {
    dma_addr_t addr;

-    if (!vtd_root_entry_present(root)) {
-        VTD_DPRINTF(GENERAL, "error: root-entry is not present");
-        return -VTD_FR_ROOT_ENTRY_P;
-    }
+    /* we have checked that root entry is present */
    addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce);
    if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) {
-        VTD_DPRINTF(GENERAL, "error: fail to access context-entry at 0x%"PRIx64
-                    " + %"PRIu8,
-                    (uint64_t)(root->val & VTD_ROOT_ENTRY_CTP), index);
+        trace_vtd_re_invalid(root->rsvd, root->val);
        return -VTD_FR_CONTEXT_TABLE_INV;
    }
    ce->lo = le64_to_cpu(ce->lo);
@@ -575,12 +566,12 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
    return slpte;
 }

-/* Given a gpa and the level of paging structure, return the offset of current
- * level.
+/* Given an iova and the level of paging structure, return the offset
+ * of current level.
 */
-static inline uint32_t vtd_gpa_level_offset(uint64_t gpa, uint32_t level)
+static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level)
 {
-    return (gpa >> vtd_slpt_level_shift(level)) &
+    return (iova >> vtd_slpt_level_shift(level)) &
            ((1ULL << VTD_SL_LEVEL_BITS) - 1);
 }

@@ -628,12 +619,12 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
    }
 }

-/* Given the @gpa, get relevant @slptep. @slpte_level will be the last level
+/* Given the @iova, get relevant @slptep. @slpte_level will be the last level
 * of the translation, can be used for deciding the size of large page.
 */
-static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
-                            uint64_t *slptep, uint32_t *slpte_level,
-                            bool *reads, bool *writes)
+static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
+                             uint64_t *slptep, uint32_t *slpte_level,
+                             bool *reads, bool *writes)
 {
    dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
    uint32_t level = vtd_get_level_from_context_entry(ce);
@@ -642,11 +633,11 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
    uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
    uint64_t access_right_check;

-    /* Check if @gpa is above 2^X-1, where X is the minimum of MGAW in CAP_REG
-     * and AW in context-entry.
+    /* Check if @iova is above 2^X-1, where X is the minimum of MGAW
+     * in CAP_REG and AW in context-entry.
     */
-    if (gpa & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) {
-        VTD_DPRINTF(GENERAL, "error: gpa 0x%"PRIx64 " exceeds limits", gpa);
+    if (iova & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) {
+        VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova);
        return -VTD_FR_ADDR_BEYOND_MGAW;
    }

@@ -654,13 +645,13 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
    access_right_check = is_write ? VTD_SL_W : VTD_SL_R;

    while (true) {
-        offset = vtd_gpa_level_offset(gpa, level);
+        offset = vtd_iova_level_offset(iova, level);
        slpte = vtd_get_slpte(addr, offset);

        if (slpte == (uint64_t)-1) {
            VTD_DPRINTF(GENERAL, "error: fail to access second-level paging "
-                        "entry at level %"PRIu32 " for gpa 0x%"PRIx64,
-                        level, gpa);
+                        "entry at level %"PRIu32 " for iova 0x%"PRIx64,
+                        level, iova);
            if (level == vtd_get_level_from_context_entry(ce)) {
                /* Invalid programming of context-entry */
                return -VTD_FR_CONTEXT_ENTRY_INV;
@@ -672,8 +663,8 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
        *writes = (*writes) && (slpte & VTD_SL_W);
        if (!(slpte & access_right_check)) {
            VTD_DPRINTF(GENERAL, "error: lack of %s permission for "
-                        "gpa 0x%"PRIx64 " slpte 0x%"PRIx64,
-                        (is_write ? "write" : "read"), gpa, slpte);
+                        "iova 0x%"PRIx64 " slpte 0x%"PRIx64,
+                        (is_write ? "write" : "read"), iova, slpte);
            return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
        }
        if (vtd_slpte_nonzero_rsvd(slpte, level)) {
@@ -706,12 +697,11 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
    }

    if (!vtd_root_entry_present(&re)) {
-        VTD_DPRINTF(GENERAL, "error: root-entry #%"PRIu8 " is not present",
-                    bus_num);
+        /* Not error - it's okay we don't have root entry. */
+        trace_vtd_re_not_present(bus_num);
        return -VTD_FR_ROOT_ENTRY_P;
    } else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) {
-        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in root-entry "
-                    "hi 0x%"PRIx64 " lo 0x%"PRIx64, re.rsvd, re.val);
+        trace_vtd_re_invalid(re.rsvd, re.val);
        return -VTD_FR_ROOT_ENTRY_RSVD;
    }

@@ -721,22 +711,17 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
    }

    if (!vtd_context_entry_present(ce)) {
-        VTD_DPRINTF(GENERAL,
-                    "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
-                    "is not present", devfn, bus_num);
+        /* Not error - it's okay we don't have context entry. */
+        trace_vtd_ce_not_present(bus_num, devfn);
        return -VTD_FR_CONTEXT_ENTRY_P;
    } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
               (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
-        VTD_DPRINTF(GENERAL,
-                    "error: non-zero reserved field in context-entry "
-                    "hi 0x%"PRIx64 " lo 0x%"PRIx64, ce->hi, ce->lo);
+        trace_vtd_ce_invalid(ce->hi, ce->lo);
        return -VTD_FR_CONTEXT_ENTRY_RSVD;
    }
    /* Check if the programming of context-entry is valid */
    if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) {
-        VTD_DPRINTF(GENERAL, "error: unsupported Address Width value in "
-                    "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
-                    ce->hi, ce->lo);
+        trace_vtd_ce_invalid(ce->hi, ce->lo);
        return -VTD_FR_CONTEXT_ENTRY_INV;
    } else {
        switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
@@ -745,9 +730,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
        case VTD_CONTEXT_TT_DEV_IOTLB:
            break;
        default:
-            VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
-                        "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
-                        ce->hi, ce->lo);
+            trace_vtd_ce_invalid(ce->hi, ce->lo);
            return -VTD_FR_CONTEXT_ENTRY_INV;
        }
    }
@@ -818,34 +801,17 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
    bool writes = true;
    VTDIOTLBEntry *iotlb_entry;

-    /* Check if the request is in interrupt address range */
-    if (vtd_is_interrupt_addr(addr)) {
-        if (is_write) {
-            /* FIXME: since we don't know the length of the access here, we
-             * treat Non-DWORD length write requests without PASID as
-             * interrupt requests, too. Withoud interrupt remapping support,
-             * we just use 1:1 mapping.
-             */
-            VTD_DPRINTF(MMU, "write request to interrupt address "
-                        "gpa 0x%"PRIx64, addr);
-            entry->iova = addr & VTD_PAGE_MASK_4K;
-            entry->translated_addr = addr & VTD_PAGE_MASK_4K;
-            entry->addr_mask = ~VTD_PAGE_MASK_4K;
-            entry->perm = IOMMU_WO;
-            return;
-        } else {
-            VTD_DPRINTF(GENERAL, "error: read request from interrupt address "
-                        "gpa 0x%"PRIx64, addr);
-            vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, is_write);
-            return;
-        }
-    }
+    /*
+     * We have standalone memory region for interrupt addresses, we
+     * should never receive translation requests in this region.
+     */
+    assert(!vtd_is_interrupt_addr(addr));
+
    /* Try to fetch slpte form IOTLB */
    iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
    if (iotlb_entry) {
-        VTD_DPRINTF(CACHE, "hit iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
-                    " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr,
-                    iotlb_entry->slpte, iotlb_entry->domain_id);
+        trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+                                 iotlb_entry->domain_id);
        slpte = iotlb_entry->slpte;
        reads = iotlb_entry->read_flags;
        writes = iotlb_entry->write_flags;
@@ -854,10 +820,9 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
    }
    /* Try to fetch context-entry from cache first */
    if (cc_entry->context_cache_gen == s->context_cache_gen) {
-        VTD_DPRINTF(CACHE, "hit context-cache bus %d devfn %d "
-                    "(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 ")",
-                    bus_num, devfn, cc_entry->context_entry.hi,
-                    cc_entry->context_entry.lo, cc_entry->context_cache_gen);
+        trace_vtd_iotlb_cc_hit(bus_num, devfn, cc_entry->context_entry.hi,
+                               cc_entry->context_entry.lo,
+                               cc_entry->context_cache_gen);
        ce = cc_entry->context_entry;
        is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
    } else {
@@ -866,30 +831,26 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
        if (ret_fr) {
            ret_fr = -ret_fr;
            if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
-                VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
-                            "requests through this context-entry "
-                            "(with FPD Set)");
+                trace_vtd_fault_disabled();
            } else {
                vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
            }
            return;
        }
        /* Update context-cache */
-        VTD_DPRINTF(CACHE, "update context-cache bus %d devfn %d "
-                    "(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 "->%"PRIu32 ")",
-                    bus_num, devfn, ce.hi, ce.lo,
-                    cc_entry->context_cache_gen, s->context_cache_gen);
+        trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo,
+                                  cc_entry->context_cache_gen,
+                                  s->context_cache_gen);
        cc_entry->context_entry = ce;
        cc_entry->context_cache_gen = s->context_cache_gen;
    }

-    ret_fr = vtd_gpa_to_slpte(&ce, addr, is_write, &slpte, &level,
-                              &reads, &writes);
+    ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
+                               &reads, &writes);
    if (ret_fr) {
        ret_fr = -ret_fr;
        if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
-            VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests "
-                        "through this context-entry (with FPD Set)");
+            trace_vtd_fault_disabled();
        } else {
            vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
        }
@@ -939,6 +900,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)

 static void vtd_context_global_invalidate(IntelIOMMUState *s)
 {
+    trace_vtd_inv_desc_cc_global();
    s->context_cache_gen++;
    if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
        vtd_reset_context_cache(s);
@@ -978,9 +940,11 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
    uint16_t mask;
    VTDBus *vtd_bus;
    VTDAddressSpace *vtd_as;
-    uint16_t devfn;
+    uint8_t bus_n, devfn;
    uint16_t devfn_it;

+    trace_vtd_inv_desc_cc_devices(source_id, func_mask);
+
    switch (func_mask & 3) {
    case 0:
        mask = 0;   /* No bits in the SID field masked */
@@ -996,16 +960,16 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
        break;
    }
    mask = ~mask;
-    VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16
-                    " mask %"PRIu16, source_id, mask);
-    vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
+
+    bus_n = VTD_SID_TO_BUS(source_id);
+    vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
    if (vtd_bus) {
        devfn = VTD_SID_TO_DEVFN(source_id);
        for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
            vtd_as = vtd_bus->dev_as[devfn_it];
            if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
-                VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16,
-                            devfn_it);
+                trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
+                                             VTD_PCI_FUNC(devfn_it));
                vtd_as->context_cache_entry.context_cache_gen = 0;
            }
        }
@@ -1046,6 +1010,7 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)

 static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
 {
+    trace_vtd_iotlb_reset("global invalidation recved");
    vtd_reset_iotlb(s);
 }

@@ -1318,9 +1283,7 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
 {
    if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) ||
        (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) {
-        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Invalidation "
-                    "Wait Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
-                    inv_desc->hi, inv_desc->lo);
+        trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
        return false;
    }
    if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) {
@@ -1332,21 +1295,18 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)

        /* FIXME: need to be masked with HAW? */
        dma_addr_t status_addr = inv_desc->hi;
-        VTD_DPRINTF(INV, "status data 0x%x, status addr 0x%"PRIx64,
-                    status_data, status_addr);
+        trace_vtd_inv_desc_wait_sw(status_addr, status_data);
        status_data = cpu_to_le32(status_data);
        if (dma_memory_write(&address_space_memory, status_addr, &status_data,
                             sizeof(status_data))) {
-            VTD_DPRINTF(GENERAL, "error: fail to perform a coherent write");
+            trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo);
            return false;
        }
    } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
        /* Interrupt flag */
-        VTD_DPRINTF(INV, "Invalidation Wait Descriptor interrupt completion");
        vtd_generate_completion_event(s);
    } else {
-        VTD_DPRINTF(GENERAL, "error: invalid Invalidation Wait Descriptor: "
-                    "hi 0x%"PRIx64 " lo 0x%"PRIx64, inv_desc->hi, inv_desc->lo);
+        trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
        return false;
    }
    return true;
@@ -1355,30 +1315,29 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
 static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
                                           VTDInvDesc *inv_desc)
 {
+    uint16_t sid, fmask;
+
    if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) {
-        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Context-cache "
-                    "Invalidate Descriptor");
+        trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
        return false;
    }
    switch (inv_desc->lo & VTD_INV_DESC_CC_G) {
    case VTD_INV_DESC_CC_DOMAIN:
-        VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
-                    (uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo));
+        trace_vtd_inv_desc_cc_domain(
+            (uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo));
        /* Fall through */
    case VTD_INV_DESC_CC_GLOBAL:
-        VTD_DPRINTF(INV, "global invalidation");
        vtd_context_global_invalidate(s);
        break;

    case VTD_INV_DESC_CC_DEVICE:
-        vtd_context_device_invalidate(s, VTD_INV_DESC_CC_SID(inv_desc->lo),
-                                      VTD_INV_DESC_CC_FM(inv_desc->lo));
+        sid = VTD_INV_DESC_CC_SID(inv_desc->lo);
+        fmask = VTD_INV_DESC_CC_FM(inv_desc->lo);
+        vtd_context_device_invalidate(s, sid, fmask);
        break;

    default:
-        VTD_DPRINTF(GENERAL, "error: invalid granularity in Context-cache "
-                    "Invalidate Descriptor hi 0x%"PRIx64  " lo 0x%"PRIx64,
-                    inv_desc->hi, inv_desc->lo);
+        trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
        return false;
    }
    return true;
@@ -1392,22 +1351,19 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)

    if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) ||
        (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) {
-        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in IOTLB "
-                    "Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
-                    inv_desc->hi, inv_desc->lo);
+        trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
        return false;
    }

    switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) {
    case VTD_INV_DESC_IOTLB_GLOBAL:
-        VTD_DPRINTF(INV, "global invalidation");
+        trace_vtd_inv_desc_iotlb_global();
        vtd_iotlb_global_invalidate(s);
        break;

    case VTD_INV_DESC_IOTLB_DOMAIN:
        domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
-        VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
-                    domain_id);
+        trace_vtd_inv_desc_iotlb_domain(domain_id);
        vtd_iotlb_domain_invalidate(s, domain_id);
        break;

@@ -1415,20 +1371,16 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
        domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
        addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi);
        am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi);
-        VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16
-                    " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am);
+        trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
        if (am > VTD_MAMV) {
-            VTD_DPRINTF(GENERAL, "error: supported max address mask value is "
-                        "%"PRIu8, (uint8_t)VTD_MAMV);
+            trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
            return false;
        }
        vtd_iotlb_page_invalidate(s, domain_id, addr, am);
        break;

    default:
-        VTD_DPRINTF(GENERAL, "error: invalid granularity in IOTLB Invalidate "
-                    "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
-                    inv_desc->hi, inv_desc->lo);
+        trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
        return false;
    }
    return true;
@@ -1527,33 +1479,28 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)

    switch (desc_type) {
    case VTD_INV_DESC_CC:
-        VTD_DPRINTF(INV, "Context-cache Invalidate Descriptor hi 0x%"PRIx64
-                    " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+        trace_vtd_inv_desc("context-cache", inv_desc.hi, inv_desc.lo);
        if (!vtd_process_context_cache_desc(s, &inv_desc)) {
            return false;
        }
        break;

    case VTD_INV_DESC_IOTLB:
-        VTD_DPRINTF(INV, "IOTLB Invalidate Descriptor hi 0x%"PRIx64
-                    " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+        trace_vtd_inv_desc("iotlb", inv_desc.hi, inv_desc.lo);
        if (!vtd_process_iotlb_desc(s, &inv_desc)) {
            return false;
        }
        break;

    case VTD_INV_DESC_WAIT:
-        VTD_DPRINTF(INV, "Invalidation Wait Descriptor hi 0x%"PRIx64
-                    " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+        trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo);
        if (!vtd_process_wait_desc(s, &inv_desc)) {
            return false;
        }
        break;

    case VTD_INV_DESC_IEC:
-        VTD_DPRINTF(INV, "Invalidation Interrupt Entry Cache "
-                    "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
-                    inv_desc.hi, inv_desc.lo);
+        trace_vtd_inv_desc("iec", inv_desc.hi, inv_desc.lo);
        if (!vtd_process_inv_iec_desc(s, &inv_desc)) {
            return false;
        }
@@ -1568,9 +1515,7 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
        break;

    default:
-        VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
-                    "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8,
-                    inv_desc.hi, inv_desc.lo, desc_type);
+        trace_vtd_inv_desc_invalid(inv_desc.hi, inv_desc.lo);
        return false;
    }
    s->iq_head++;
@@ -2049,7 +1994,7 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
                           is_write, &ret);
    VTD_DPRINTF(MMU,
                "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
-                " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
+                " iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
                VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn),
                vtd_as->devfn, addr, ret.translated_addr);
    return ret;
@@ -2115,6 +2060,7 @@ static Property vtd_properties[] = {
    DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
                            ON_OFF_AUTO_AUTO),
    DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
+    DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
    DEFINE_PROP_END_OF_LIST(),
 };

@@ -2496,6 +2442,10 @@ static void vtd_init(IntelIOMMUState *s)
        s->ecap |= VTD_ECAP_DT;
    }

+    if (s->caching_mode) {
+        s->cap |= VTD_CAP_CM;
+    }
+
    vtd_reset_context_cache(s);
    vtd_reset_iotlb(s);

--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -202,6 +202,7 @@
 #define VTD_CAP_MAMV                (VTD_MAMV << 48)
 #define VTD_CAP_PSI                 (1ULL << 39)
 #define VTD_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
+#define VTD_CAP_CM                  (1ULL << 7)

 /* Supported Adjusted Guest Address Widths */
 #define VTD_CAP_SAGAW_SHIFT         8
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1650,9 +1650,15 @@ void pc_pci_device_init(PCIBus *pci_bus)
    int max_bus;
    int bus;

+    /* Note: if=scsi is deprecated with PC machine types */
    max_bus = drive_get_max_bus(IF_SCSI);
    for (bus = 0; bus <= max_bus; bus++) {
        pci_create_simple(pci_bus, -1, "lsi53c895a");
+        /*
+         * By not creating frontends here, we make
+         * scsi_legacy_handle_cmdline() create them, and warn that
+         * this usage is deprecated.
+         */
    }
 }

@@ -2339,6 +2345,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
    mc->query_hotpluggable_cpus = pc_query_hotpluggable_cpus;
    mc->default_boot_order = "cad";
    mc->hot_add_cpu = pc_hot_add_cpu;
+    mc->block_default_type = IF_IDE;
    mc->max_cpus = 255;
    mc->reset = pc_machine_reset;
    hc->pre_plug = pc_machine_device_pre_plug_cb;
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -3,6 +3,34 @@
 # hw/i386/x86-iommu.c
 x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32

+# hw/i386/intel_iommu.c
+vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
+vtd_inv_desc(const char *type, uint64_t hi, uint64_t lo) "invalidate desc type %s high 0x%"PRIx64" low 0x%"PRIx64
+vtd_inv_desc_invalid(uint64_t hi, uint64_t lo) "invalid inv desc hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_inv_desc_cc_domain(uint16_t domain) "context invalidate domain 0x%"PRIx16
+vtd_inv_desc_cc_global(void) "context invalidate globally"
+vtd_inv_desc_cc_device(uint8_t bus, uint8_t dev, uint8_t fn) "context invalidate device %02"PRIx8":%02"PRIx8".%02"PRIx8
+vtd_inv_desc_cc_devices(uint16_t sid, uint16_t fmask) "context invalidate devices sid 0x%"PRIx16" fmask 0x%"PRIx16
+vtd_inv_desc_cc_invalid(uint64_t hi, uint64_t lo) "invalid context-cache desc hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_inv_desc_iotlb_global(void) "iotlb invalidate global"
+vtd_inv_desc_iotlb_domain(uint16_t domain) "iotlb invalidate whole domain 0x%"PRIx16
+vtd_inv_desc_iotlb_pages(uint16_t domain, uint64_t addr, uint8_t mask) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8
+vtd_inv_desc_iotlb_invalid(uint64_t hi, uint64_t lo) "invalid iotlb desc hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write addr 0x%"PRIx64" data 0x%"PRIx32
+vtd_inv_desc_wait_irq(const char *msg) "%s"
+vtd_inv_desc_wait_invalid(uint64_t hi, uint64_t lo) "invalid wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
+vtd_re_invalid(uint64_t hi, uint64_t lo) "invalid root entry hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
+vtd_ce_invalid(uint64_t hi, uint64_t lo) "invalid context entry hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
+vtd_iotlb_page_update(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page update sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
+vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen) "IOTLB context hit bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32
+vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32
+vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)"
+vtd_fault_disabled(void) "Fault processing disabled for context entry"
+
 # hw/i386/amd_iommu.c
 amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
 amdvi_cache_update(uint16_t domid, uint8_t bus, uint8_t slot, uint8_t func, uint64_t gpa, uint64_t txaddr) " update iotlb domid 0x%"PRIx16" devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2840,23 +2840,6 @@ const VMStateDescription vmstate_ide_bus = {
 void ide_drive_get(DriveInfo **hd, int n)
 {
    int i;
-    int highest_bus = drive_get_max_bus(IF_IDE) + 1;
-    int max_devs = drive_get_max_devs(IF_IDE);
-    int n_buses = max_devs ? (n / max_devs) : n;
-
-    /*
-     * Note: The number of actual buses available is not known.
-     * We compute this based on the size of the DriveInfo* array, n.
-     * If it is less than max_devs * <num_real_buses>,
-     * We will stop looking for drives prematurely instead of overfilling
-     * the array.
-     */
-
-    if (highest_bus > n_buses) {
-        error_report("Too many IDE buses defined (%d > %d)",
-                     highest_bus, n_buses);
-        exit(1);
-    }

    for (i = 0; i < n; i++) {
        hd[i] = drive_get_by_index(IF_IDE, i);
--- a/hw/intc/mips_gic.c
+++ b/hw/intc/mips_gic.c
@@ -20,31 +20,29 @@
 #include "kvm_mips.h"
 #include "hw/intc/mips_gic.h"

-static void mips_gic_set_vp_irq(MIPSGICState *gic, int vp, int pin, int level)
+static void mips_gic_set_vp_irq(MIPSGICState *gic, int vp, int pin)
 {
-    int ored_level = level;
+    int ored_level = 0;
    int i;

    /* ORing pending registers sharing same pin */
-    if (!ored_level) {
-        for (i = 0; i < gic->num_irq; i++) {
-            if ((gic->irq_state[i].map_pin & GIC_MAP_MSK) == pin &&
-                    gic->irq_state[i].map_vp == vp &&
-                    gic->irq_state[i].enabled) {
-                ored_level |= gic->irq_state[i].pending;
-            }
-            if (ored_level) {
-                /* no need to iterate all interrupts */
-                break;
-            }
+    for (i = 0; i < gic->num_irq; i++) {
+        if ((gic->irq_state[i].map_pin & GIC_MAP_MSK) == pin &&
+                gic->irq_state[i].map_vp == vp &&
+                gic->irq_state[i].enabled) {
+            ored_level |= gic->irq_state[i].pending;
        }
-        if (((gic->vps[vp].compare_map & GIC_MAP_MSK) == pin) &&
-                (gic->vps[vp].mask & GIC_VP_MASK_CMP_MSK)) {
-            /* ORing with local pending register (count/compare) */
-            ored_level |= (gic->vps[vp].pend & GIC_VP_MASK_CMP_MSK) >>
-                          GIC_VP_MASK_CMP_SHF;
+        if (ored_level) {
+            /* no need to iterate all interrupts */
+            break;
        }
    }
+    if (((gic->vps[vp].compare_map & GIC_MAP_MSK) == pin) &&
+            (gic->vps[vp].mask & GIC_VP_MASK_CMP_MSK)) {
+        /* ORing with local pending register (count/compare) */
+        ored_level |= (gic->vps[vp].pend & GIC_VP_MASK_CMP_MSK) >>
+                      GIC_VP_MASK_CMP_SHF;
+    }
    if (kvm_enabled())  {
        kvm_mips_set_ipi_interrupt(mips_env_get_cpu(gic->vps[vp].env),
                                   pin + GIC_CPU_PIN_OFFSET,
@@ -55,21 +53,27 @@ static void mips_gic_set_vp_irq(MIPSGICState *gic, int vp, int pin, int level)
    }
 }

+static void gic_update_pin_for_irq(MIPSGICState *gic, int n_IRQ)
+{
+    int vp = gic->irq_state[n_IRQ].map_vp;
+    int pin = gic->irq_state[n_IRQ].map_pin & GIC_MAP_MSK;
+
+    if (vp < 0 || vp >= gic->num_vps) {
+        return;
+    }
+    mips_gic_set_vp_irq(gic, vp, pin);
+}
+
 static void gic_set_irq(void *opaque, int n_IRQ, int level)
 {
    MIPSGICState *gic = (MIPSGICState *) opaque;
-    int vp = gic->irq_state[n_IRQ].map_vp;
-    int pin = gic->irq_state[n_IRQ].map_pin & GIC_MAP_MSK;

    gic->irq_state[n_IRQ].pending = (uint8_t) level;
    if (!gic->irq_state[n_IRQ].enabled) {
        /* GIC interrupt source disabled */
        return;
    }
-    if (vp < 0 || vp >= gic->num_vps) {
-        return;
-    }
-    mips_gic_set_vp_irq(gic, vp, pin, level);
+    gic_update_pin_for_irq(gic, n_IRQ);
 }

 #define OFFSET_CHECK(c)                         \
@@ -209,7 +213,7 @@ static void gic_timer_store_vp_compare(MIPSGICState *gic, uint32_t vp_index,
    gic->vps[vp_index].pend &= ~(1 << GIC_LOCAL_INT_COMPARE);
    if (gic->vps[vp_index].compare_map & GIC_MAP_TO_PIN_MSK) {
        uint32_t pin = (gic->vps[vp_index].compare_map & GIC_MAP_MSK);
-        mips_gic_set_vp_irq(gic, vp_index, pin, 0);
+        mips_gic_set_vp_irq(gic, vp_index, pin);
    }
    mips_gictimer_store_vp_compare(gic->gic_timer, vp_index, compare);
 }
@@ -286,6 +290,7 @@ static void gic_write(void *opaque, hwaddr addr, uint64_t data, unsigned size)
        OFFSET_CHECK((base + size * 8) <= gic->num_irq);
        for (i = 0; i < size * 8; i++) {
            gic->irq_state[base + i].enabled &= !((data >> i) & 1);
+            gic_update_pin_for_irq(gic, base + i);
        }
        break;
    case GIC_SH_WEDGE_OFS:
@@ -305,6 +310,7 @@ static void gic_write(void *opaque, hwaddr addr, uint64_t data, unsigned size)
        OFFSET_CHECK((base + size * 8) <= gic->num_irq);
        for (i = 0; i < size * 8; i++) {
            gic->irq_state[base + i].enabled |= (data >> i) & 1;
+            gic_update_pin_for_irq(gic, base + i);
        }
        break;
    case GIC_SH_MAP0_PIN_OFS ... GIC_SH_MAP255_PIN_OFS:
--- a/hw/m68k/Makefile.objs
+++ b/hw/m68k/Makefile.objs
@@ -1,4 +1,2 @@
 obj-y += an5206.o mcf5208.o
-obj-y += dummy_m68k.o
-
 obj-y += mcf5206.o mcf_intc.o
--- a/hw/m68k/dummy_m68k.c
+++ b/hw/m68k/dummy_m68k.c
@@ -1,84 +0,0 @@
-/*
- * Dummy board with just RAM and CPU for use as an ISS.
- *
- * Copyright (c) 2007 CodeSourcery.
- *
- * This code is licensed under the GPL
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "cpu.h"
-#include "hw/hw.h"
-#include "hw/boards.h"
-#include "hw/loader.h"
-#include "elf.h"
-#include "exec/address-spaces.h"
-
-#define KERNEL_LOAD_ADDR 0x10000
-
-/* Board init.  */
-
-static void dummy_m68k_init(MachineState *machine)
-{
-    ram_addr_t ram_size = machine->ram_size;
-    const char *cpu_model = machine->cpu_model;
-    const char *kernel_filename = machine->kernel_filename;
-    M68kCPU *cpu;
-    CPUM68KState *env;
-    MemoryRegion *address_space_mem =  get_system_memory();
-    MemoryRegion *ram = g_new(MemoryRegion, 1);
-    int kernel_size;
-    uint64_t elf_entry;
-    hwaddr entry;
-
-    if (!cpu_model)
-        cpu_model = "cfv4e";
-    cpu = cpu_m68k_init(cpu_model);
-    if (!cpu) {
-        fprintf(stderr, "Unable to find m68k CPU definition\n");
-        exit(1);
-    }
-    env = &cpu->env;
-
-    /* Initialize CPU registers.  */
-    env->vbr = 0;
-
-    /* RAM at address zero */
-    memory_region_allocate_system_memory(ram, NULL, "dummy_m68k.ram",
-                                         ram_size);
-    memory_region_add_subregion(address_space_mem, 0, ram);
-
-    /* Load kernel.  */
-    if (kernel_filename) {
-        kernel_size = load_elf(kernel_filename, NULL, NULL, &elf_entry,
-                               NULL, NULL, 1, EM_68K, 0, 0);
-        entry = elf_entry;
-        if (kernel_size < 0) {
-            kernel_size = load_uimage(kernel_filename, &entry, NULL, NULL,
-                                      NULL, NULL);
-        }
-        if (kernel_size < 0) {
-            kernel_size = load_image_targphys(kernel_filename,
-                                              KERNEL_LOAD_ADDR,
-                                              ram_size - KERNEL_LOAD_ADDR);
-            entry = KERNEL_LOAD_ADDR;
-        }
-        if (kernel_size < 0) {
-            fprintf(stderr, "qemu: could not load kernel '%s'\n",
-                    kernel_filename);
-            exit(1);
-        }
-    } else {
-        entry = 0;
-    }
-    env->pc = entry;
-}
-
-static void dummy_m68k_machine_init(MachineClass *mc)
-{
-    mc->desc = "Dummy board";
-    mc->init = dummy_m68k_init;
-}
-
-DEFINE_MACHINE("dummy", dummy_m68k_machine_init)
--- a/hw/m68k/mcf_intc.c
+++ b/hw/m68k/mcf_intc.c
@@ -9,10 +9,16 @@
 #include "qemu-common.h"
 #include "cpu.h"
 #include "hw/hw.h"
+#include "hw/sysbus.h"
 #include "hw/m68k/mcf.h"
 #include "exec/address-spaces.h"

+#define TYPE_MCF_INTC "mcf-intc"
+#define MCF_INTC(obj) OBJECT_CHECK(mcf_intc_state, (obj), TYPE_MCF_INTC)
+
 typedef struct {
+    SysBusDevice parent_obj;
+
    MemoryRegion iomem;
    uint64_t ipr;
    uint64_t imr;
@@ -138,8 +144,10 @@ static void mcf_intc_set_irq(void *opaque, int irq, int level)
    mcf_intc_update(s);
 }

-static void mcf_intc_reset(mcf_intc_state *s)
+static void mcf_intc_reset(DeviceState *dev)
 {
+    mcf_intc_state *s = MCF_INTC(dev);
+
    s->imr = ~0ull;
    s->ipr = 0;
    s->ifr = 0;
@@ -154,17 +162,49 @@ static const MemoryRegionOps mcf_intc_ops = {
    .endianness = DEVICE_NATIVE_ENDIAN,
 };

+static void mcf_intc_instance_init(Object *obj)
+{
+    mcf_intc_state *s = MCF_INTC(obj);
+
+    memory_region_init_io(&s->iomem, obj, &mcf_intc_ops, s, "mcf", 0x100);
+}
+
+static void mcf_intc_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->reset = mcf_intc_reset;
+}
+
+static const TypeInfo mcf_intc_gate_info = {
+    .name          = TYPE_MCF_INTC,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(mcf_intc_state),
+    .instance_init = mcf_intc_instance_init,
+    .class_init    = mcf_intc_class_init,
+};
+
+static void mcf_intc_register_types(void)
+{
+    type_register_static(&mcf_intc_gate_info);
+}
+
+type_init(mcf_intc_register_types)
+
 qemu_irq *mcf_intc_init(MemoryRegion *sysmem,
                        hwaddr base,
                        M68kCPU *cpu)
 {
+    DeviceState  *dev;
    mcf_intc_state *s;

-    s = g_malloc0(sizeof(mcf_intc_state));
-    s->cpu = cpu;
-    mcf_intc_reset(s);
+    dev = qdev_create(NULL, TYPE_MCF_INTC);
+    qdev_init_nofail(dev);
+
+    s = MCF_INTC(dev);
+    s->cpu = cpu;

-    memory_region_init_io(&s->iomem, NULL, &mcf_intc_ops, s, "mcf", 0x100);
    memory_region_add_subregion(sysmem, base, &s->iomem);

    return qemu_allocate_irqs(mcf_intc_set_irq, s, 64);
--- a/hw/mips/Makefile.objs
+++ b/hw/mips/Makefile.objs
@@ -4,3 +4,4 @@ obj-$(CONFIG_JAZZ) += mips_jazz.o
 obj-$(CONFIG_FULONG) += mips_fulong2e.o
 obj-y += gt64xxx_pci.o
 obj-$(CONFIG_MIPS_CPS) += cps.o
+obj-$(CONFIG_MIPS_BOSTON) += boston.o
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -0,0 +1,577 @@
+/*
+ * MIPS Boston development board emulation.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "exec/address-spaces.h"
+#include "hw/boards.h"
+#include "hw/char/serial.h"
+#include "hw/hw.h"
+#include "hw/ide/pci.h"
+#include "hw/ide/ahci.h"
+#include "hw/loader.h"
+#include "hw/loader-fit.h"
+#include "hw/mips/cps.h"
+#include "hw/mips/cpudevs.h"
+#include "hw/pci-host/xilinx-pcie.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "sysemu/char.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/qtest.h"
+
+#include <libfdt.h>
+
+#define TYPE_MIPS_BOSTON "mips-boston"
+#define BOSTON(obj) OBJECT_CHECK(BostonState, (obj), TYPE_MIPS_BOSTON)
+
+typedef struct {
+    SysBusDevice parent_obj;
+
+    MachineState *mach;
+    MIPSCPSState *cps;
+    SerialState *uart;
+
+    CharBackend lcd_display;
+    char lcd_content[8];
+    bool lcd_inited;
+
+    hwaddr kernel_entry;
+    hwaddr fdt_base;
+} BostonState;
+
+enum boston_plat_reg {
+    PLAT_FPGA_BUILD     = 0x00,
+    PLAT_CORE_CL        = 0x04,
+    PLAT_WRAPPER_CL     = 0x08,
+    PLAT_SYSCLK_STATUS  = 0x0c,
+    PLAT_SOFTRST_CTL    = 0x10,
+#define PLAT_SOFTRST_CTL_SYSRESET       (1 << 4)
+    PLAT_DDR3_STATUS    = 0x14,
+#define PLAT_DDR3_STATUS_LOCKED         (1 << 0)
+#define PLAT_DDR3_STATUS_CALIBRATED     (1 << 2)
+    PLAT_PCIE_STATUS    = 0x18,
+#define PLAT_PCIE_STATUS_PCIE0_LOCKED   (1 << 0)
+#define PLAT_PCIE_STATUS_PCIE1_LOCKED   (1 << 8)
+#define PLAT_PCIE_STATUS_PCIE2_LOCKED   (1 << 16)
+    PLAT_FLASH_CTL      = 0x1c,
+    PLAT_SPARE0         = 0x20,
+    PLAT_SPARE1         = 0x24,
+    PLAT_SPARE2         = 0x28,
+    PLAT_SPARE3         = 0x2c,
+    PLAT_MMCM_DIV       = 0x30,
+#define PLAT_MMCM_DIV_CLK0DIV_SHIFT     0
+#define PLAT_MMCM_DIV_INPUT_SHIFT       8
+#define PLAT_MMCM_DIV_MUL_SHIFT         16
+#define PLAT_MMCM_DIV_CLK1DIV_SHIFT     24
+    PLAT_BUILD_CFG      = 0x34,
+#define PLAT_BUILD_CFG_IOCU_EN          (1 << 0)
+#define PLAT_BUILD_CFG_PCIE0_EN         (1 << 1)
+#define PLAT_BUILD_CFG_PCIE1_EN         (1 << 2)
+#define PLAT_BUILD_CFG_PCIE2_EN         (1 << 3)
+    PLAT_DDR_CFG        = 0x38,
+#define PLAT_DDR_CFG_SIZE               (0xf << 0)
+#define PLAT_DDR_CFG_MHZ                (0xfff << 4)
+    PLAT_NOC_PCIE0_ADDR = 0x3c,
+    PLAT_NOC_PCIE1_ADDR = 0x40,
+    PLAT_NOC_PCIE2_ADDR = 0x44,
+    PLAT_SYS_CTL        = 0x48,
+};
+
+static void boston_lcd_event(void *opaque, int event)
+{
+    BostonState *s = opaque;
+    if (event == CHR_EVENT_OPENED && !s->lcd_inited) {
+        qemu_chr_fe_printf(&s->lcd_display, "        ");
+        s->lcd_inited = true;
+    }
+}
+
+static uint64_t boston_lcd_read(void *opaque, hwaddr addr,
+                                unsigned size)
+{
+    BostonState *s = opaque;
+    uint64_t val = 0;
+
+    switch (size) {
+    case 8:
+        val |= (uint64_t)s->lcd_content[(addr + 7) & 0x7] << 56;
+        val |= (uint64_t)s->lcd_content[(addr + 6) & 0x7] << 48;
+        val |= (uint64_t)s->lcd_content[(addr + 5) & 0x7] << 40;
+        val |= (uint64_t)s->lcd_content[(addr + 4) & 0x7] << 32;
+        /* fall through */
+    case 4:
+        val |= (uint64_t)s->lcd_content[(addr + 3) & 0x7] << 24;
+        val |= (uint64_t)s->lcd_content[(addr + 2) & 0x7] << 16;
+        /* fall through */
+    case 2:
+        val |= (uint64_t)s->lcd_content[(addr + 1) & 0x7] << 8;
+        /* fall through */
+    case 1:
+        val |= (uint64_t)s->lcd_content[(addr + 0) & 0x7];
+        break;
+    }
+
+    return val;
+}
+
+static void boston_lcd_write(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned size)
+{
+    BostonState *s = opaque;
+
+    switch (size) {
+    case 8:
+        s->lcd_content[(addr + 7) & 0x7] = val >> 56;
+        s->lcd_content[(addr + 6) & 0x7] = val >> 48;
+        s->lcd_content[(addr + 5) & 0x7] = val >> 40;
+        s->lcd_content[(addr + 4) & 0x7] = val >> 32;
+        /* fall through */
+    case 4:
+        s->lcd_content[(addr + 3) & 0x7] = val >> 24;
+        s->lcd_content[(addr + 2) & 0x7] = val >> 16;
+        /* fall through */
+    case 2:
+        s->lcd_content[(addr + 1) & 0x7] = val >> 8;
+        /* fall through */
+    case 1:
+        s->lcd_content[(addr + 0) & 0x7] = val;
+        break;
+    }
+
+    qemu_chr_fe_printf(&s->lcd_display,
+                       "\r%-8.8s", s->lcd_content);
+}
+
+static const MemoryRegionOps boston_lcd_ops = {
+    .read = boston_lcd_read,
+    .write = boston_lcd_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static uint64_t boston_platreg_read(void *opaque, hwaddr addr,
+                                    unsigned size)
+{
+    BostonState *s = opaque;
+    uint32_t gic_freq, val;
+
+    if (size != 4) {
+        qemu_log_mask(LOG_UNIMP, "%uB platform register read", size);
+        return 0;
+    }
+
+    switch (addr & 0xffff) {
+    case PLAT_FPGA_BUILD:
+    case PLAT_CORE_CL:
+    case PLAT_WRAPPER_CL:
+        return 0;
+    case PLAT_DDR3_STATUS:
+        return PLAT_DDR3_STATUS_LOCKED | PLAT_DDR3_STATUS_CALIBRATED;
+    case PLAT_MMCM_DIV:
+        gic_freq = mips_gictimer_get_freq(s->cps->gic.gic_timer) / 1000000;
+        val = gic_freq << PLAT_MMCM_DIV_INPUT_SHIFT;
+        val |= 1 << PLAT_MMCM_DIV_MUL_SHIFT;
+        val |= 1 << PLAT_MMCM_DIV_CLK0DIV_SHIFT;
+        val |= 1 << PLAT_MMCM_DIV_CLK1DIV_SHIFT;
+        return val;
+    case PLAT_BUILD_CFG:
+        val = PLAT_BUILD_CFG_PCIE0_EN;
+        val |= PLAT_BUILD_CFG_PCIE1_EN;
+        val |= PLAT_BUILD_CFG_PCIE2_EN;
+        return val;
+    case PLAT_DDR_CFG:
+        val = s->mach->ram_size / G_BYTE;
+        assert(!(val & ~PLAT_DDR_CFG_SIZE));
+        val |= PLAT_DDR_CFG_MHZ;
+        return val;
+    default:
+        qemu_log_mask(LOG_UNIMP, "Read platform register 0x%" HWADDR_PRIx,
+                      addr & 0xffff);
+        return 0;
+    }
+}
+
+static void boston_platreg_write(void *opaque, hwaddr addr,
+                                 uint64_t val, unsigned size)
+{
+    if (size != 4) {
+        qemu_log_mask(LOG_UNIMP, "%uB platform register write", size);
+        return;
+    }
+
+    switch (addr & 0xffff) {
+    case PLAT_FPGA_BUILD:
+    case PLAT_CORE_CL:
+    case PLAT_WRAPPER_CL:
+    case PLAT_DDR3_STATUS:
+    case PLAT_PCIE_STATUS:
+    case PLAT_MMCM_DIV:
+    case PLAT_BUILD_CFG:
+    case PLAT_DDR_CFG:
+        /* read only */
+        break;
+    case PLAT_SOFTRST_CTL:
+        if (val & PLAT_SOFTRST_CTL_SYSRESET) {
+            qemu_system_reset_request();
+        }
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "Write platform register 0x%" HWADDR_PRIx
+                      " = 0x%" PRIx64, addr & 0xffff, val);
+        break;
+    }
+}
+
+static const MemoryRegionOps boston_platreg_ops = {
+    .read = boston_platreg_read,
+    .write = boston_platreg_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void boston_flash_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned size)
+{
+}
+
+static const MemoryRegionOps boston_flash_ops = {
+    .write = boston_flash_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const TypeInfo boston_device = {
+    .name          = TYPE_MIPS_BOSTON,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BostonState),
+};
+
+static void boston_register_types(void)
+{
+    type_register_static(&boston_device);
+}
+type_init(boston_register_types)
+
+static void gen_firmware(uint32_t *p, hwaddr kernel_entry, hwaddr fdt_addr,
+                         bool is_64b)
+{
+    const uint32_t cm_base = 0x16100000;
+    const uint32_t gic_base = 0x16120000;
+    const uint32_t cpc_base = 0x16200000;
+
+    /* Move CM GCRs */
+    if (is_64b) {
+        stl_p(p++, 0x40287803);                 /* dmfc0 $8, CMGCRBase */
+        stl_p(p++, 0x00084138);                 /* dsll $8, $8, 4 */
+    } else {
+        stl_p(p++, 0x40087803);                 /* mfc0 $8, CMGCRBase */
+        stl_p(p++, 0x00084100);                 /* sll  $8, $8, 4 */
+    }
+    stl_p(p++, 0x3c09a000);                     /* lui  $9, 0xa000 */
+    stl_p(p++, 0x01094025);                     /* or   $8, $9 */
+    stl_p(p++, 0x3c0a0000 | (cm_base >> 16));   /* lui  $10, cm_base >> 16 */
+    if (is_64b) {
+        stl_p(p++, 0xfd0a0008);                 /* sd   $10, 0x8($8) */
+    } else {
+        stl_p(p++, 0xad0a0008);                 /* sw   $10, 0x8($8) */
+    }
+    stl_p(p++, 0x012a4025);                     /* or   $8, $10 */
+
+    /* Move & enable GIC GCRs */
+    stl_p(p++, 0x3c090000 | (gic_base >> 16));  /* lui  $9, gic_base >> 16 */
+    stl_p(p++, 0x35290001);                     /* ori  $9, 0x1 */
+    if (is_64b) {
+        stl_p(p++, 0xfd090080);                 /* sd   $9, 0x80($8) */
+    } else {
+        stl_p(p++, 0xad090080);                 /* sw   $9, 0x80($8) */
+    }
+
+    /* Move & enable CPC GCRs */
+    stl_p(p++, 0x3c090000 | (cpc_base >> 16));  /* lui  $9, cpc_base >> 16 */
+    stl_p(p++, 0x35290001);                     /* ori  $9, 0x1 */
+    if (is_64b) {
+        stl_p(p++, 0xfd090088);                 /* sd   $9, 0x88($8) */
+    } else {
+        stl_p(p++, 0xad090088);                 /* sw   $9, 0x88($8) */
+    }
+
+    /*
+     * Setup argument registers to follow the UHI boot protocol:
+     *
+     * a0/$4 = -2
+     * a1/$5 = virtual address of FDT
+     * a2/$6 = 0
+     * a3/$7 = 0
+     */
+    stl_p(p++, 0x2404fffe);                     /* li   $4, -2 */
+                                                /* lui  $5, hi(fdt_addr) */
+    stl_p(p++, 0x3c050000 | ((fdt_addr >> 16) & 0xffff));
+    if (fdt_addr & 0xffff) {                    /* ori  $5, lo(fdt_addr) */
+        stl_p(p++, 0x34a50000 | (fdt_addr & 0xffff));
+    }
+    stl_p(p++, 0x34060000);                     /* li   $6, 0 */
+    stl_p(p++, 0x34070000);                     /* li   $7, 0 */
+
+    /* Load kernel entry address & jump to it */
+                                                /* lui  $25, hi(kernel_entry) */
+    stl_p(p++, 0x3c190000 | ((kernel_entry >> 16) & 0xffff));
+                                                /* ori  $25, lo(kernel_entry) */
+    stl_p(p++, 0x37390000 | (kernel_entry & 0xffff));
+    stl_p(p++, 0x03200009);                     /* jr   $25 */
+}
+
+static const void *boston_fdt_filter(void *opaque, const void *fdt_orig,
+                                     const void *match_data, hwaddr *load_addr)
+{
+    BostonState *s = BOSTON(opaque);
+    MachineState *machine = s->mach;
+    const char *cmdline;
+    int err;
+    void *fdt;
+    size_t fdt_sz, ram_low_sz, ram_high_sz;
+
+    fdt_sz = fdt_totalsize(fdt_orig) * 2;
+    fdt = g_malloc0(fdt_sz);
+
+    err = fdt_open_into(fdt_orig, fdt, fdt_sz);
+    if (err) {
+        fprintf(stderr, "unable to open FDT\n");
+        return NULL;
+    }
+
+    cmdline = (machine->kernel_cmdline && machine->kernel_cmdline[0])
+            ? machine->kernel_cmdline : " ";
+    err = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
+    if (err < 0) {
+        fprintf(stderr, "couldn't set /chosen/bootargs\n");
+        return NULL;
+    }
+
+    ram_low_sz = MIN(256 * M_BYTE, machine->ram_size);
+    ram_high_sz = machine->ram_size - ram_low_sz;
+    qemu_fdt_setprop_sized_cells(fdt, "/memory@0", "reg",
+                                 1, 0x00000000, 1, ram_low_sz,
+                                 1, 0x90000000, 1, ram_high_sz);
+
+    fdt = g_realloc(fdt, fdt_totalsize(fdt));
+    qemu_fdt_dumpdtb(fdt, fdt_sz);
+
+    s->fdt_base = *load_addr;
+
+    return fdt;
+}
+
+static const void *boston_kernel_filter(void *opaque, const void *kernel,
+                                        hwaddr *load_addr, hwaddr *entry_addr)
+{
+    BostonState *s = BOSTON(opaque);
+
+    s->kernel_entry = *entry_addr;
+
+    return kernel;
+}
+
+static const struct fit_loader_match boston_matches[] = {
+    { "img,boston" },
+    { NULL },
+};
+
+static const struct fit_loader boston_fit_loader = {
+    .matches = boston_matches,
+    .addr_to_phys = cpu_mips_kseg0_to_phys,
+    .fdt_filter = boston_fdt_filter,
+    .kernel_filter = boston_kernel_filter,
+};
+
+static inline XilinxPCIEHost *
+xilinx_pcie_init(MemoryRegion *sys_mem, uint32_t bus_nr,
+                 hwaddr cfg_base, uint64_t cfg_size,
+                 hwaddr mmio_base, uint64_t mmio_size,
+                 qemu_irq irq, bool link_up)
+{
+    DeviceState *dev;
+    MemoryRegion *cfg, *mmio;
+
+    dev = qdev_create(NULL, TYPE_XILINX_PCIE_HOST);
+
+    qdev_prop_set_uint32(dev, "bus_nr", bus_nr);
+    qdev_prop_set_uint64(dev, "cfg_base", cfg_base);
+    qdev_prop_set_uint64(dev, "cfg_size", cfg_size);
+    qdev_prop_set_uint64(dev, "mmio_base", mmio_base);
+    qdev_prop_set_uint64(dev, "mmio_size", mmio_size);
+    qdev_prop_set_bit(dev, "link_up", link_up);
+
+    qdev_init_nofail(dev);
+
+    cfg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
+    memory_region_add_subregion_overlap(sys_mem, cfg_base, cfg, 0);
+
+    mmio = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
+    memory_region_add_subregion_overlap(sys_mem, 0, mmio, 0);
+
+    qdev_connect_gpio_out_named(dev, "interrupt_out", 0, irq);
+
+    return XILINX_PCIE_HOST(dev);
+}
+
+static void boston_mach_init(MachineState *machine)
+{
+    DeviceState *dev;
+    BostonState *s;
+    Error *err = NULL;
+    const char *cpu_model;
+    MemoryRegion *flash, *ddr, *ddr_low_alias, *lcd, *platreg;
+    MemoryRegion *sys_mem = get_system_memory();
+    XilinxPCIEHost *pcie2;
+    PCIDevice *ahci;
+    DriveInfo *hd[6];
+    Chardev *chr;
+    int fw_size, fit_err;
+    bool is_64b;
+
+    if ((machine->ram_size % G_BYTE) ||
+        (machine->ram_size > (2 * G_BYTE))) {
+        error_report("Memory size must be 1GB or 2GB");
+        exit(1);
+    }
+
+    cpu_model = machine->cpu_model ?: "I6400";
+
+    dev = qdev_create(NULL, TYPE_MIPS_BOSTON);
+    qdev_init_nofail(dev);
+
+    s = BOSTON(dev);
+    s->mach = machine;
+    s->cps = g_new0(MIPSCPSState, 1);
+
+    if (!cpu_supports_cps_smp(cpu_model)) {
+        error_report("Boston requires CPUs which support CPS");
+        exit(1);
+    }
+
+    is_64b = cpu_supports_isa(cpu_model, ISA_MIPS64);
+
+    object_initialize(s->cps, sizeof(MIPSCPSState), TYPE_MIPS_CPS);
+    qdev_set_parent_bus(DEVICE(s->cps), sysbus_get_default());
+
+    object_property_set_str(OBJECT(s->cps), cpu_model, "cpu-model", &err);
+    object_property_set_int(OBJECT(s->cps), smp_cpus, "num-vp", &err);
+    object_property_set_bool(OBJECT(s->cps), true, "realized", &err);
+
+    if (err != NULL) {
+        error_report("%s", error_get_pretty(err));
+        exit(1);
+    }
+
+    sysbus_mmio_map_overlap(SYS_BUS_DEVICE(s->cps), 0, 0, 1);
+
+    flash =  g_new(MemoryRegion, 1);
+    memory_region_init_rom_device(flash, NULL, &boston_flash_ops, s,
+                                  "boston.flash", 128 * M_BYTE, &err);
+    memory_region_add_subregion_overlap(sys_mem, 0x18000000, flash, 0);
+
+    ddr = g_new(MemoryRegion, 1);
+    memory_region_allocate_system_memory(ddr, NULL, "boston.ddr",
+                                         machine->ram_size);
+    memory_region_add_subregion_overlap(sys_mem, 0x80000000, ddr, 0);
+
+    ddr_low_alias = g_new(MemoryRegion, 1);
+    memory_region_init_alias(ddr_low_alias, NULL, "boston_low.ddr",
+                             ddr, 0, MIN(machine->ram_size, (256 * M_BYTE)));
+    memory_region_add_subregion_overlap(sys_mem, 0, ddr_low_alias, 0);
+
+    xilinx_pcie_init(sys_mem, 0,
+                     0x10000000, 32 * M_BYTE,
+                     0x40000000, 1 * G_BYTE,
+                     get_cps_irq(s->cps, 2), false);
+
+    xilinx_pcie_init(sys_mem, 1,
+                     0x12000000, 32 * M_BYTE,
+                     0x20000000, 512 * M_BYTE,
+                     get_cps_irq(s->cps, 1), false);
+
+    pcie2 = xilinx_pcie_init(sys_mem, 2,
+                             0x14000000, 32 * M_BYTE,
+                             0x16000000, 1 * M_BYTE,
+                             get_cps_irq(s->cps, 0), true);
+
+    platreg = g_new(MemoryRegion, 1);
+    memory_region_init_io(platreg, NULL, &boston_platreg_ops, s,
+                          "boston-platregs", 0x1000);
+    memory_region_add_subregion_overlap(sys_mem, 0x17ffd000, platreg, 0);
+
+    if (!serial_hds[0]) {
+        serial_hds[0] = qemu_chr_new("serial0", "null");
+    }
+
+    s->uart = serial_mm_init(sys_mem, 0x17ffe000, 2,
+                             get_cps_irq(s->cps, 3), 10000000,
+                             serial_hds[0], DEVICE_NATIVE_ENDIAN);
+
+    lcd = g_new(MemoryRegion, 1);
+    memory_region_init_io(lcd, NULL, &boston_lcd_ops, s, "boston-lcd", 0x8);
+    memory_region_add_subregion_overlap(sys_mem, 0x17fff000, lcd, 0);
+
+    chr = qemu_chr_new("lcd", "vc:320x240");
+    qemu_chr_fe_init(&s->lcd_display, chr, NULL);
+    qemu_chr_fe_set_handlers(&s->lcd_display, NULL, NULL,
+                             boston_lcd_event, s, NULL, true);
+
+    ahci = pci_create_simple_multifunction(&PCI_BRIDGE(&pcie2->root)->sec_bus,
+                                           PCI_DEVFN(0, 0),
+                                           true, TYPE_ICH9_AHCI);
+    g_assert(ARRAY_SIZE(hd) == ICH_AHCI(ahci)->ahci.ports);
+    ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports);
+    ahci_ide_create_devs(ahci, hd);
+
+    if (machine->firmware) {
+        fw_size = load_image_targphys(machine->firmware,
+                                      0x1fc00000, 4 * M_BYTE);
+        if (fw_size == -1) {
+            error_printf("unable to load firmware image '%s'\n",
+                          machine->firmware);
+            exit(1);
+        }
+    } else if (machine->kernel_filename) {
+        fit_err = load_fit(&boston_fit_loader, machine->kernel_filename, s);
+        if (fit_err) {
+            error_printf("unable to load FIT image\n");
+            exit(1);
+        }
+
+        gen_firmware(memory_region_get_ram_ptr(flash) + 0x7c00000,
+                     s->kernel_entry, s->fdt_base, is_64b);
+    } else if (!qtest_enabled()) {
+        error_printf("Please provide either a -kernel or -bios argument\n");
+        exit(1);
+    }
+}
+
+static void boston_mach_class_init(MachineClass *mc)
+{
+    mc->desc = "MIPS Boston";
+    mc->init = boston_mach_init;
+    mc->block_default_type = IF_IDE;
+    mc->default_ram_size = 2 * G_BYTE;
+    mc->max_cpus = 16;
+}
+
+DEFINE_MACHINE("boston", boston_mach_class_init)
--- a/hw/mips/mips_fulong2e.c
+++ b/hw/mips/mips_fulong2e.c
@@ -387,6 +387,7 @@ static void mips_fulong2e_machine_init(MachineClass *mc)
 {
    mc->desc = "Fulong 2e mini pc";
    mc->init = mips_fulong2e_init;
+    mc->block_default_type = IF_IDE;
 }

 DEFINE_MACHINE("fulong2e", mips_fulong2e_machine_init)
--- a/hw/mips/mips_jazz.c
+++ b/hw/mips/mips_jazz.c
@@ -291,10 +291,6 @@ static void mips_jazz_init(MachineState *machine,
             qdev_get_gpio_in(rc4030, 5), &esp_reset, &dma_enable);

    /* Floppy */
-    if (drive_get_max_bus(IF_FLOPPY) >= MAX_FD) {
-        fprintf(stderr, "qemu: too many floppy drives\n");
-        exit(1);
-    }
    for (n = 0; n < MAX_FD; n++) {
        fds[n] = drive_get(IF_FLOPPY, 0, n);
    }
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -1264,6 +1264,7 @@ static void mips_malta_machine_init(MachineClass *mc)
 {
    mc->desc = "MIPS Malta Core LV";
    mc->init = mips_malta_init;
+    mc->block_default_type = IF_IDE;
    mc->max_cpus = 16;
    mc->is_default = 1;
 }
--- a/hw/mips/mips_r4k.c
+++ b/hw/mips/mips_r4k.c
@@ -306,6 +306,7 @@ static void mips_machine_init(MachineClass *mc)
 {
    mc->desc = "mips r4k platform";
    mc->init = mips_r4k_init;
+    mc->block_default_type = IF_IDE;
 }

 DEFINE_MACHINE("mips", mips_machine_init)
--- a/hw/misc/mips_cmgcr.c
+++ b/hw/misc/mips_cmgcr.c
@@ -29,6 +29,20 @@ static inline bool is_gic_connected(MIPSGCRState *s)
    return s->gic_mr != NULL;
 }

+static inline void update_gcr_base(MIPSGCRState *gcr, uint64_t val)
+{
+    CPUState *cpu;
+    MIPSCPU *mips_cpu;
+
+    gcr->gcr_base = val & GCR_BASE_GCRBASE_MSK;
+    memory_region_set_address(&gcr->iomem, gcr->gcr_base);
+
+    CPU_FOREACH(cpu) {
+        mips_cpu = MIPS_CPU(cpu);
+        mips_cpu->env.CP0_CMGCRBase = gcr->gcr_base >> 4;
+    }
+}
+
 static inline void update_cpc_base(MIPSGCRState *gcr, uint64_t val)
 {
    if (is_cpc_connected(gcr)) {
@@ -117,6 +131,9 @@ static void gcr_write(void *opaque, hwaddr addr, uint64_t data, unsigned size)
    MIPSGCRVPState *other_vps = &gcr->vps[current_vps->other];

    switch (addr) {
+    case GCR_BASE_OFS:
+        update_gcr_base(gcr, data);
+        break;
    case GCR_GIC_BASE_OFS:
        update_gic_base(gcr, data);
        break;
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1130,7 +1130,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
    return 0;
 }

-static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
+                                      size_t size)
 {
    VirtIONet *n = qemu_get_nic_opaque(nc);
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
@@ -1233,6 +1234,17 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
    return size;
 }

+static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
+                                  size_t size)
+{
+    ssize_t r;
+
+    rcu_read_lock();
+    r = virtio_net_receive_rcu(nc, buf, size);
+    rcu_read_unlock();
+    return r;
+}
+
 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);

 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
--- a/hw/pci-host/Makefile.objs
+++ b/hw/pci-host/Makefile.objs
@@ -16,3 +16,4 @@ common-obj-$(CONFIG_FULONG) += bonito.o
 common-obj-$(CONFIG_PCI_PIIX) += piix.o
 common-obj-$(CONFIG_PCI_Q35) += q35.o
 common-obj-$(CONFIG_PCI_GENERIC) += gpex.o
+common-obj-$(CONFIG_PCI_XILINX) += xilinx-pcie.o
--- a/hw/pci-host/xilinx-pcie.c
+++ b/hw/pci-host/xilinx-pcie.c
@@ -0,0 +1,328 @@
+/*
+ * Xilinx PCIe host controller emulation.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci-host/xilinx-pcie.h"
+
+enum root_cfg_reg {
+    /* Interrupt Decode Register */
+    ROOTCFG_INTDEC              = 0x138,
+
+    /* Interrupt Mask Register */
+    ROOTCFG_INTMASK             = 0x13c,
+    /* INTx Interrupt Received */
+#define ROOTCFG_INTMASK_INTX    (1 << 16)
+    /* MSI Interrupt Received */
+#define ROOTCFG_INTMASK_MSI     (1 << 17)
+
+    /* PHY Status/Control Register */
+    ROOTCFG_PSCR                = 0x144,
+    /* Link Up */
+#define ROOTCFG_PSCR_LINK_UP    (1 << 11)
+
+    /* Root Port Status/Control Register */
+    ROOTCFG_RPSCR               = 0x148,
+    /* Bridge Enable */
+#define ROOTCFG_RPSCR_BRIDGEEN  (1 << 0)
+    /* Interrupt FIFO Not Empty */
+#define ROOTCFG_RPSCR_INTNEMPTY (1 << 18)
+    /* Interrupt FIFO Overflow */
+#define ROOTCFG_RPSCR_INTOVF    (1 << 19)
+
+    /* Root Port Interrupt FIFO Read Register 1 */
+    ROOTCFG_RPIFR1              = 0x158,
+#define ROOTCFG_RPIFR1_INT_LANE_SHIFT   27
+#define ROOTCFG_RPIFR1_INT_ASSERT_SHIFT 29
+#define ROOTCFG_RPIFR1_INT_VALID_SHIFT  31
+    /* Root Port Interrupt FIFO Read Register 2 */
+    ROOTCFG_RPIFR2              = 0x15c,
+};
+
+static void xilinx_pcie_update_intr(XilinxPCIEHost *s,
+                                    uint32_t set, uint32_t clear)
+{
+    int level;
+
+    s->intr |= set;
+    s->intr &= ~clear;
+
+    if (s->intr_fifo_r != s->intr_fifo_w) {
+        s->intr |= ROOTCFG_INTMASK_INTX;
+    }
+
+    level = !!(s->intr & s->intr_mask);
+    qemu_set_irq(s->irq, level);
+}
+
+static void xilinx_pcie_queue_intr(XilinxPCIEHost *s,
+                                   uint32_t fifo_reg1, uint32_t fifo_reg2)
+{
+    XilinxPCIEInt *intr;
+    unsigned int new_w;
+
+    new_w = (s->intr_fifo_w + 1) % ARRAY_SIZE(s->intr_fifo);
+    if (new_w == s->intr_fifo_r) {
+        s->rpscr |= ROOTCFG_RPSCR_INTOVF;
+        return;
+    }
+
+    intr = &s->intr_fifo[s->intr_fifo_w];
+    s->intr_fifo_w = new_w;
+
+    intr->fifo_reg1 = fifo_reg1;
+    intr->fifo_reg2 = fifo_reg2;
+
+    xilinx_pcie_update_intr(s, ROOTCFG_INTMASK_INTX, 0);
+}
+
+static void xilinx_pcie_set_irq(void *opaque, int irq_num, int level)
+{
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(opaque);
+
+    xilinx_pcie_queue_intr(s,
+       (irq_num << ROOTCFG_RPIFR1_INT_LANE_SHIFT) |
+           (level << ROOTCFG_RPIFR1_INT_ASSERT_SHIFT) |
+           (1 << ROOTCFG_RPIFR1_INT_VALID_SHIFT),
+       0);
+}
+
+static void xilinx_pcie_host_realize(DeviceState *dev, Error **errp)
+{
+    PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    PCIExpressHost *pex = PCIE_HOST_BRIDGE(dev);
+
+    snprintf(s->name, sizeof(s->name), "pcie%u", s->bus_nr);
+
+    /* PCI configuration space */
+    pcie_host_mmcfg_init(pex, s->cfg_size);
+
+    /* MMIO region */
+    memory_region_init(&s->mmio, OBJECT(s), "mmio", UINT64_MAX);
+    memory_region_set_enabled(&s->mmio, false);
+
+    /* dummy I/O region */
+    memory_region_init_ram(&s->io, OBJECT(s), "io", 16, NULL);
+    memory_region_set_enabled(&s->io, false);
+
+    /* interrupt out */
+    qdev_init_gpio_out_named(dev, &s->irq, "interrupt_out", 1);
+
+    sysbus_init_mmio(sbd, &pex->mmio);
+    sysbus_init_mmio(sbd, &s->mmio);
+
+    pci->bus = pci_register_bus(dev, s->name, xilinx_pcie_set_irq,
+                                pci_swizzle_map_irq_fn, s, &s->mmio,
+                                &s->io, 0, 4, TYPE_PCIE_BUS);
+
+    qdev_set_parent_bus(DEVICE(&s->root), BUS(pci->bus));
+    qdev_init_nofail(DEVICE(&s->root));
+}
+
+static const char *xilinx_pcie_host_root_bus_path(PCIHostState *host_bridge,
+                                                  PCIBus *rootbus)
+{
+    return "0000:00";
+}
+
+static void xilinx_pcie_host_init(Object *obj)
+{
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(obj);
+    XilinxPCIERoot *root = &s->root;
+
+    object_initialize(root, sizeof(*root), TYPE_XILINX_PCIE_ROOT);
+    object_property_add_child(obj, "root", OBJECT(root), NULL);
+    qdev_prop_set_uint32(DEVICE(root), "addr", PCI_DEVFN(0, 0));
+    qdev_prop_set_bit(DEVICE(root), "multifunction", false);
+}
+
+static Property xilinx_pcie_host_props[] = {
+    DEFINE_PROP_UINT32("bus_nr", XilinxPCIEHost, bus_nr, 0),
+    DEFINE_PROP_SIZE("cfg_base", XilinxPCIEHost, cfg_base, 0),
+    DEFINE_PROP_SIZE("cfg_size", XilinxPCIEHost, cfg_size, 32 << 20),
+    DEFINE_PROP_SIZE("mmio_base", XilinxPCIEHost, mmio_base, 0),
+    DEFINE_PROP_SIZE("mmio_size", XilinxPCIEHost, mmio_size, 1 << 20),
+    DEFINE_PROP_BOOL("link_up", XilinxPCIEHost, link_up, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xilinx_pcie_host_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+
+    hc->root_bus_path = xilinx_pcie_host_root_bus_path;
+    dc->realize = xilinx_pcie_host_realize;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->fw_name = "pci";
+    dc->props = xilinx_pcie_host_props;
+}
+
+static const TypeInfo xilinx_pcie_host_info = {
+    .name       = TYPE_XILINX_PCIE_HOST,
+    .parent     = TYPE_PCIE_HOST_BRIDGE,
+    .instance_size = sizeof(XilinxPCIEHost),
+    .instance_init = xilinx_pcie_host_init,
+    .class_init = xilinx_pcie_host_class_init,
+};
+
+static uint32_t xilinx_pcie_root_config_read(PCIDevice *d,
+                                             uint32_t address, int len)
+{
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(OBJECT(d)->parent);
+    uint32_t val;
+
+    switch (address) {
+    case ROOTCFG_INTDEC:
+        val = s->intr;
+        break;
+    case ROOTCFG_INTMASK:
+        val = s->intr_mask;
+        break;
+    case ROOTCFG_PSCR:
+        val = s->link_up ? ROOTCFG_PSCR_LINK_UP : 0;
+        break;
+    case ROOTCFG_RPSCR:
+        if (s->intr_fifo_r != s->intr_fifo_w) {
+            s->rpscr &= ~ROOTCFG_RPSCR_INTNEMPTY;
+        } else {
+            s->rpscr |= ROOTCFG_RPSCR_INTNEMPTY;
+        }
+        val = s->rpscr;
+        break;
+    case ROOTCFG_RPIFR1:
+        if (s->intr_fifo_w == s->intr_fifo_r) {
+            /* FIFO empty */
+            val = 0;
+        } else {
+            val = s->intr_fifo[s->intr_fifo_r].fifo_reg1;
+        }
+        break;
+    case ROOTCFG_RPIFR2:
+        if (s->intr_fifo_w == s->intr_fifo_r) {
+            /* FIFO empty */
+            val = 0;
+        } else {
+            val = s->intr_fifo[s->intr_fifo_r].fifo_reg2;
+        }
+        break;
+    default:
+        val = pci_default_read_config(d, address, len);
+        break;
+    }
+    return val;
+}
+
+static void xilinx_pcie_root_config_write(PCIDevice *d, uint32_t address,
+                                          uint32_t val, int len)
+{
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(OBJECT(d)->parent);
+    switch (address) {
+    case ROOTCFG_INTDEC:
+        xilinx_pcie_update_intr(s, 0, val);
+        break;
+    case ROOTCFG_INTMASK:
+        s->intr_mask = val;
+        xilinx_pcie_update_intr(s, 0, 0);
+        break;
+    case ROOTCFG_RPSCR:
+        s->rpscr &= ~ROOTCFG_RPSCR_BRIDGEEN;
+        s->rpscr |= val & ROOTCFG_RPSCR_BRIDGEEN;
+        memory_region_set_enabled(&s->mmio, val & ROOTCFG_RPSCR_BRIDGEEN);
+
+        if (val & ROOTCFG_INTMASK_INTX) {
+            s->rpscr &= ~ROOTCFG_INTMASK_INTX;
+        }
+        break;
+    case ROOTCFG_RPIFR1:
+    case ROOTCFG_RPIFR2:
+        if (s->intr_fifo_w == s->intr_fifo_r) {
+            /* FIFO empty */
+            return;
+        } else {
+            s->intr_fifo_r = (s->intr_fifo_r + 1) % ARRAY_SIZE(s->intr_fifo);
+        }
+        break;
+    default:
+        pci_default_write_config(d, address, val, len);
+        break;
+    }
+}
+
+static int xilinx_pcie_root_init(PCIDevice *dev)
+{
+    BusState *bus = qdev_get_parent_bus(DEVICE(dev));
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(bus->parent);
+
+    pci_set_word(dev->config + PCI_COMMAND,
+                 PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+    pci_set_word(dev->config + PCI_MEMORY_BASE, s->mmio_base >> 16);
+    pci_set_word(dev->config + PCI_MEMORY_LIMIT,
+                 ((s->mmio_base + s->mmio_size - 1) >> 16) & 0xfff0);
+
+    pci_bridge_initfn(dev, TYPE_PCI_BUS);
+
+    if (pcie_endpoint_cap_v1_init(dev, 0x80) < 0) {
+        hw_error("Failed to initialize PCIe capability");
+    }
+
+    return 0;
+}
+
+static void xilinx_pcie_root_class_init(ObjectClass *klass, void *data)
+{
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->desc = "Xilinx AXI-PCIe Host Bridge";
+    k->vendor_id = PCI_VENDOR_ID_XILINX;
+    k->device_id = 0x7021;
+    k->revision = 0;
+    k->class_id = PCI_CLASS_BRIDGE_HOST;
+    k->is_express = true;
+    k->is_bridge = true;
+    k->init = xilinx_pcie_root_init;
+    k->exit = pci_bridge_exitfn;
+    dc->reset = pci_bridge_reset;
+    k->config_read = xilinx_pcie_root_config_read;
+    k->config_write = xilinx_pcie_root_config_write;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
+}
+
+static const TypeInfo xilinx_pcie_root_info = {
+    .name = TYPE_XILINX_PCIE_ROOT,
+    .parent = TYPE_PCI_BRIDGE,
+    .instance_size = sizeof(XilinxPCIERoot),
+    .class_init = xilinx_pcie_root_class_init,
+};
+
+static void xilinx_pcie_register(void)
+{
+    type_register_static(&xilinx_pcie_root_info);
+    type_register_static(&xilinx_pcie_host_info);
+}
+
+type_init(xilinx_pcie_register)
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -610,7 +610,8 @@ bool pcie_cap_is_arifwd_enabled(const PCIDevice *dev)
 * uint16_t ext_cap_size
 */

-static uint16_t pcie_find_capability_list(PCIDevice *dev, uint16_t cap_id,
+/* Passing a cap_id value > 0xffff will return 0 and put end of list in prev */
+static uint16_t pcie_find_capability_list(PCIDevice *dev, uint32_t cap_id,
                                          uint16_t *prev_p)
 {
    uint16_t prev = 0;
@@ -664,30 +665,24 @@ void pcie_add_capability(PCIDevice *dev,
                         uint16_t cap_id, uint8_t cap_ver,
                         uint16_t offset, uint16_t size)
 {
-    uint32_t header;
-    uint16_t next;
-
    assert(offset >= PCI_CONFIG_SPACE_SIZE);
    assert(offset < offset + size);
    assert(offset + size <= PCIE_CONFIG_SPACE_SIZE);
    assert(size >= 8);
    assert(pci_is_express(dev));

-    if (offset == PCI_CONFIG_SPACE_SIZE) {
-        header = pci_get_long(dev->config + offset);
-        next = PCI_EXT_CAP_NEXT(header);
-    } else {
+    if (offset != PCI_CONFIG_SPACE_SIZE) {
        uint16_t prev;

-        /* 0 is reserved cap id. use internally to find the last capability
-           in the linked list */
-        next = pcie_find_capability_list(dev, 0, &prev);
-
+        /*
+         * 0xffffffff is not a valid cap id (it's a 16 bit field). use
+         * internally to find the last capability in the linked list.
+         */
+        pcie_find_capability_list(dev, 0xffffffff, &prev);
        assert(prev >= PCI_CONFIG_SPACE_SIZE);
-        assert(next == 0);
        pcie_ext_cap_set_next(dev, prev, offset);
    }
-    pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, next));
+    pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, 0));

    /* Make capability read-only by default */
    memset(dev->wmask + offset, 0, size);
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -518,6 +518,7 @@ static void core99_machine_class_init(ObjectClass *oc, void *data)

    mc->desc = "Mac99 based PowerMAC";
    mc->init = ppc_core99_init;
+    mc->block_default_type = IF_IDE;
    mc->max_cpus = MAX_CPUS;
    mc->default_boot_order = "cd";
    mc->kvm_type = core99_kvm_type;
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -368,6 +368,7 @@ static void heathrow_machine_init(MachineClass *mc)
 {
    mc->desc = "Heathrow based PowerMAC";
    mc->init = ppc_heathrow_init;
+    mc->block_default_type = IF_IDE;
    mc->max_cpus = MAX_CPUS;
 #ifndef TARGET_PPC64
    mc->is_default = 1;
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -684,6 +684,7 @@ static void prep_machine_init(MachineClass *mc)
 {
    mc->desc = "PowerPC PREP platform";
    mc->init = ppc_prep_init;
+    mc->block_default_type = IF_IDE;
    mc->max_cpus = MAX_CPUS;
    mc->default_boot_order = "cad";
 }
--- a/hw/scsi/esp-pci.c
+++ b/hw/scsi/esp-pci.c
@@ -367,9 +367,6 @@ static void esp_pci_scsi_realize(PCIDevice *dev, Error **errp)
    s->irq = pci_allocate_irq(dev);

    scsi_bus_new(&s->bus, sizeof(s->bus), d, &esp_pci_scsi_info, NULL);
-    if (!d->hotplugged) {
-        scsi_bus_legacy_handle_cmdline(&s->bus, errp);
-    }
 }

 static void esp_pci_scsi_uninit(PCIDevice *d)
--- a/hw/scsi/esp.c
+++ b/hw/scsi/esp.c
@@ -690,7 +690,6 @@ static void sysbus_esp_realize(DeviceState *dev, Error **errp)
    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
    SysBusESPState *sysbus = ESP(dev);
    ESPState *s = &sysbus->esp;
-    Error *err = NULL;

    sysbus_init_irq(sbd, &s->irq);
    assert(sysbus->it_shift != -1);
@@ -703,11 +702,6 @@ static void sysbus_esp_realize(DeviceState *dev, Error **errp)
    qdev_init_gpio_in(dev, sysbus_esp_gpio_demux, 2);

    scsi_bus_new(&s->bus, sizeof(s->bus), dev, &esp_scsi_info, NULL);
-    scsi_bus_legacy_handle_cmdline(&s->bus, &err);
-    if (err != NULL) {
-        error_propagate(errp, err);
-        return;
-    }
 }

 static void sysbus_esp_hard_reset(DeviceState *dev)
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -2216,9 +2216,6 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp)
    QTAILQ_INIT(&s->queue);

    scsi_bus_new(&s->bus, sizeof(s->bus), d, &lsi_scsi_info, NULL);
-    if (!d->hotplugged) {
-        scsi_bus_legacy_handle_cmdline(&s->bus, errp);
-    }
 }

 static void lsi_scsi_unrealize(DeviceState *dev, Error **errp)
@@ -2271,3 +2268,10 @@ static void lsi53c895a_register_types(void)
 }

 type_init(lsi53c895a_register_types)
+
+void lsi53c895a_create(PCIBus *bus)
+{
+    LSIState *s = LSI53C895A(pci_create_simple(bus, -1, "lsi53c895a"));
+
+    scsi_bus_legacy_handle_cmdline(&s->bus, false);
+}
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -2325,7 +2325,6 @@ static const struct SCSIBusInfo megasas_scsi_info = {

 static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
 {
-    DeviceState *d = DEVICE(dev);
    MegasasState *s = MEGASAS(dev);
    MegasasBaseClass *b = MEGASAS_DEVICE_GET_CLASS(s);
    uint8_t *pci_conf;
@@ -2426,9 +2425,6 @@ static void megasas_scsi_realize(PCIDevice *dev, Error **errp)

    scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev),
                 &megasas_scsi_info, NULL);
-    if (!d->hotplugged) {
-        scsi_bus_legacy_handle_cmdline(&s->bus, errp);
-    }
 }

 static Property megasas_properties_gen1[] = {
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -1271,7 +1271,6 @@ static const struct SCSIBusInfo mptsas_scsi_info = {

 static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
 {
-    DeviceState *d = DEVICE(dev);
    MPTSASState *s = MPT_SAS(dev);
    Error *err = NULL;
    int ret;
@@ -1326,9 +1325,6 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
    QTAILQ_INIT(&s->pending);

    scsi_bus_new(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info, NULL);
-    if (!d->hotplugged) {
-        scsi_bus_legacy_handle_cmdline(&s->bus, errp);
-    }
 }

 static void mptsas_scsi_uninit(PCIDevice *dev)
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -105,6 +105,7 @@ static void scsi_dma_restart_bh(void *opaque)
    qemu_bh_delete(s->bh);
    s->bh = NULL;

+    aio_context_acquire(blk_get_aio_context(s->conf.blk));
    QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
        scsi_req_ref(req);
        if (req->retry) {
@@ -122,6 +123,7 @@ static void scsi_dma_restart_bh(void *opaque)
        }
        scsi_req_unref(req);
    }
+    aio_context_release(blk_get_aio_context(s->conf.blk));
 }

 void scsi_req_retry(SCSIRequest *req)
@@ -261,12 +263,11 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
    return SCSI_DEVICE(dev);
 }

-void scsi_bus_legacy_handle_cmdline(SCSIBus *bus, Error **errp)
+void scsi_bus_legacy_handle_cmdline(SCSIBus *bus, bool deprecated)
 {
    Location loc;
    DriveInfo *dinfo;
    int unit;
-    Error *err = NULL;

    loc_push_none(&loc);
    for (unit = 0; unit <= bus->info->max_target; unit++) {
@@ -275,16 +276,59 @@ void scsi_bus_legacy_handle_cmdline(SCSIBus *bus, Error **errp)
            continue;
        }
        qemu_opts_loc_restore(dinfo->opts);
-        scsi_bus_legacy_add_drive(bus, blk_by_legacy_dinfo(dinfo),
-                                  unit, false, -1, NULL, &err);
-        if (err != NULL) {
-            error_propagate(errp, err);
-            break;
+        if (deprecated) {
+            /* Handling -drive not claimed by machine initialization */
+            if (blk_get_attached_dev(blk_by_legacy_dinfo(dinfo))) {
+                continue;       /* claimed */
+            }
+            if (!dinfo->is_default) {
+                error_report("warning: bus=%d,unit=%d is deprecated with this"
+                             " machine type",
+                             bus->busnr, unit);
+            }
        }
+        scsi_bus_legacy_add_drive(bus, blk_by_legacy_dinfo(dinfo),
+                                  unit, false, -1, NULL, &error_fatal);
    }
    loc_pop(&loc);
 }

+static bool is_scsi_hba_with_legacy_magic(Object *obj)
+{
+    static const char *magic[] = {
+        "am53c974", "dc390", "esp", "lsi53c810", "lsi53c895a",
+        "megasas", "megasas-gen2", "mptsas1068", "spapr-vscsi",
+        "virtio-scsi-device",
+        NULL
+    };
+    const char *typename = object_get_typename(obj);
+    int i;
+
+    for (i = 0; magic[i]; i++)
+        if (!strcmp(typename, magic[i])) {
+            return true;
+    }
+
+    return false;
+}
+
+static int scsi_legacy_handle_cmdline_cb(Object *obj, void *opaque)
+{
+    SCSIBus *bus = (SCSIBus *)object_dynamic_cast(obj, TYPE_SCSI_BUS);
+
+    if (bus && is_scsi_hba_with_legacy_magic(OBJECT(bus->qbus.parent))) {
+        scsi_bus_legacy_handle_cmdline(bus, true);
+    }
+
+    return 0;
+}
+
+void scsi_legacy_handle_cmdline(void)
+{
+    object_child_foreach_recursive(object_get_root(),
+                                   scsi_legacy_handle_cmdline_cb, NULL);
+}
+
 static int32_t scsi_invalid_field(SCSIRequest *req, uint8_t *buf)
 {
    scsi_req_build_sense(req, SENSE_CODE(INVALID_FIELD));
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -207,6 +207,7 @@ static void scsi_aio_complete(void *opaque, int ret)

    assert(r->req.aiocb != NULL);
    r->req.aiocb = NULL;
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
    if (scsi_disk_req_check_error(r, ret, true)) {
        goto done;
    }
@@ -215,6 +216,7 @@ static void scsi_aio_complete(void *opaque, int ret)
    scsi_req_complete(&r->req, GOOD);

 done:
+    aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
    scsi_req_unref(&r->req);
 }

@@ -290,12 +292,14 @@ static void scsi_dma_complete(void *opaque, int ret)
    assert(r->req.aiocb != NULL);
    r->req.aiocb = NULL;

+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
    if (ret < 0) {
        block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
    } else {
        block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
    }
    scsi_dma_complete_noio(r, ret);
+    aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }

 static void scsi_read_complete(void * opaque, int ret)
@@ -306,6 +310,7 @@ static void scsi_read_complete(void * opaque, int ret)

    assert(r->req.aiocb != NULL);
    r->req.aiocb = NULL;
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
    if (scsi_disk_req_check_error(r, ret, true)) {
        goto done;
    }
@@ -320,6 +325,7 @@ static void scsi_read_complete(void * opaque, int ret)

 done:
    scsi_req_unref(&r->req);
+    aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }

 /* Actually issue a read to the block device.  */
@@ -364,12 +370,14 @@ static void scsi_do_read_cb(void *opaque, int ret)
    assert (r->req.aiocb != NULL);
    r->req.aiocb = NULL;

+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
    if (ret < 0) {
        block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
    } else {
        block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
    }
    scsi_do_read(opaque, ret);
+    aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }

 /* Read more data from scsi device into buffer.  */
@@ -489,12 +497,14 @@ static void scsi_write_complete(void * opaque, int ret)
    assert (r->req.aiocb != NULL);
    r->req.aiocb = NULL;

+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
    if (ret < 0) {
        block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
    } else {
        block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
    }
    scsi_write_complete_noio(r, ret);
+    aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }

 static void scsi_write_data(SCSIRequest *req)
@@ -1625,11 +1635,14 @@ static void scsi_unmap_complete(void *opaque, int ret)
 {
    UnmapCBData *data = opaque;
    SCSIDiskReq *r = data->r;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);

    assert(r->req.aiocb != NULL);
    r->req.aiocb = NULL;

+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
    scsi_unmap_complete_noio(data, ret);
+    aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }

 static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
@@ -1696,6 +1709,7 @@ static void scsi_write_same_complete(void *opaque, int ret)

    assert(r->req.aiocb != NULL);
    r->req.aiocb = NULL;
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
    if (scsi_disk_req_check_error(r, ret, true)) {
        goto done;
    }
@@ -1724,6 +1738,7 @@ done:
    scsi_req_unref(&r->req);
    qemu_vfree(data->iov.iov_base);
    g_free(data);
+    aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }

 static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -143,10 +143,14 @@ done:
 static void scsi_command_complete(void *opaque, int ret)
 {
    SCSIGenericReq *r = (SCSIGenericReq *)opaque;
+    SCSIDevice *s = r->req.dev;

    assert(r->req.aiocb != NULL);
    r->req.aiocb = NULL;
+
+    aio_context_acquire(blk_get_aio_context(s->conf.blk));
    scsi_command_complete_noio(r, ret);
+    aio_context_release(blk_get_aio_context(s->conf.blk));
 }

 static int execute_command(BlockBackend *blk,
@@ -182,9 +186,11 @@ static void scsi_read_complete(void * opaque, int ret)
    assert(r->req.aiocb != NULL);
    r->req.aiocb = NULL;

+    aio_context_acquire(blk_get_aio_context(s->conf.blk));
+
    if (ret || r->req.io_canceled) {
        scsi_command_complete_noio(r, ret);
-        return;
+        goto done;
    }

    len = r->io_header.dxfer_len - r->io_header.resid;
@@ -193,7 +199,7 @@ static void scsi_read_complete(void * opaque, int ret)
    r->len = -1;
    if (len == 0) {
        scsi_command_complete_noio(r, 0);
-        return;
+        goto done;
    }

    /* Snoop READ CAPACITY output to set the blocksize.  */
@@ -237,6 +243,9 @@ static void scsi_read_complete(void * opaque, int ret)
    }
    scsi_req_data(&r->req, len);
    scsi_req_unref(&r->req);
+
+done:
+    aio_context_release(blk_get_aio_context(s->conf.blk));
 }

 /* Read more data from scsi device into buffer.  */
@@ -272,9 +281,11 @@ static void scsi_write_complete(void * opaque, int ret)
    assert(r->req.aiocb != NULL);
    r->req.aiocb = NULL;

+    aio_context_acquire(blk_get_aio_context(s->conf.blk));
+
    if (ret || r->req.io_canceled) {
        scsi_command_complete_noio(r, ret);
-        return;
+        goto done;
    }

    if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 &&
@@ -284,6 +295,9 @@ static void scsi_write_complete(void * opaque, int ret)
    }

    scsi_command_complete_noio(r, ret);
+
+done:
+    aio_context_release(blk_get_aio_context(s->conf.blk));
 }

 /* Write data to a scsi device.  Returns nonzero on failure.
--- a/hw/scsi/spapr_vscsi.c
+++ b/hw/scsi/spapr_vscsi.c
@@ -1206,9 +1206,6 @@ static void spapr_vscsi_realize(VIOsPAPRDevice *dev, Error **errp)

    scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev),
                 &vscsi_scsi_info, NULL);
-    if (!dev->qdev.hotplugged) {
-        scsi_bus_legacy_handle_cmdline(&s->bus, errp);
-    }
 }

 void spapr_vscsi_create(VIOsPAPRBus *bus)
@@ -1218,6 +1215,8 @@ void spapr_vscsi_create(VIOsPAPRBus *bus)
    dev = qdev_create(&bus->bus, "spapr-vscsi");

    qdev_init_nofail(dev);
+    scsi_bus_legacy_handle_cmdline(&VIO_SPAPR_VSCSI_DEVICE(dev)->bus,
+                                   false);
 }

 static int spapr_vscsi_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off)
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -49,35 +49,35 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
    }
 }

-static void virtio_scsi_data_plane_handle_cmd(VirtIODevice *vdev,
+static bool virtio_scsi_data_plane_handle_cmd(VirtIODevice *vdev,
                                              VirtQueue *vq)
 {
    VirtIOSCSI *s = (VirtIOSCSI *)vdev;

    assert(s->ctx && s->dataplane_started);
-    virtio_scsi_handle_cmd_vq(s, vq);
+    return virtio_scsi_handle_cmd_vq(s, vq);
 }

-static void virtio_scsi_data_plane_handle_ctrl(VirtIODevice *vdev,
+static bool virtio_scsi_data_plane_handle_ctrl(VirtIODevice *vdev,
                                               VirtQueue *vq)
 {
    VirtIOSCSI *s = VIRTIO_SCSI(vdev);

    assert(s->ctx && s->dataplane_started);
-    virtio_scsi_handle_ctrl_vq(s, vq);
+    return virtio_scsi_handle_ctrl_vq(s, vq);
 }

-static void virtio_scsi_data_plane_handle_event(VirtIODevice *vdev,
+static bool virtio_scsi_data_plane_handle_event(VirtIODevice *vdev,
                                                VirtQueue *vq)
 {
    VirtIOSCSI *s = VIRTIO_SCSI(vdev);

    assert(s->ctx && s->dataplane_started);
-    virtio_scsi_handle_event_vq(s, vq);
+    return virtio_scsi_handle_event_vq(s, vq);
 }

 static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
-                                  void (*fn)(VirtIODevice *vdev, VirtQueue *vq))
+                                  VirtIOHandleAIOOutput fn)
 {
    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s)));
    int rc;
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -436,13 +436,18 @@ static inline void virtio_scsi_release(VirtIOSCSI *s)
    }
 }

-void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
+bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
 {
    VirtIOSCSIReq *req;
+    bool progress = false;

+    virtio_scsi_acquire(s);
    while ((req = virtio_scsi_pop_req(s, vq))) {
+        progress = true;
        virtio_scsi_handle_ctrl_req(s, req);
    }
+    virtio_scsi_release(s);
+    return progress;
 }

 static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
@@ -591,17 +596,20 @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req)
    scsi_req_unref(sreq);
 }

-void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
+bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
 {
    VirtIOSCSIReq *req, *next;
    int ret = 0;
+    bool progress = false;

    QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);

+    virtio_scsi_acquire(s);
    do {
        virtio_queue_set_notification(vq, 0);

        while ((req = virtio_scsi_pop_req(s, vq))) {
+            progress = true;
            ret = virtio_scsi_handle_cmd_req_prepare(s, req);
            if (!ret) {
                QTAILQ_INSERT_TAIL(&reqs, req, next);
@@ -624,6 +632,8 @@ void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
    QTAILQ_FOREACH_SAFE(req, &reqs, next, next) {
        virtio_scsi_handle_cmd_req_submit(s, req);
    }
+    virtio_scsi_release(s);
+    return progress;
 }

 static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq)
@@ -752,11 +762,16 @@ out:
    virtio_scsi_release(s);
 }

-void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
+bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
 {
+    virtio_scsi_acquire(s);
    if (s->events_dropped) {
        virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
+        virtio_scsi_release(s);
+        return true;
    }
+    virtio_scsi_release(s);
+    return false;
 }

 static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq)
@@ -889,14 +904,6 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
    /* override default SCSI bus hotplug-handler, with virtio-scsi's one */
    qbus_set_hotplug_handler(BUS(&s->bus), dev, &error_abort);

-    if (!dev->hotplugged) {
-        scsi_bus_legacy_handle_cmdline(&s->bus, &err);
-        if (err != NULL) {
-            error_propagate(errp, err);
-            return;
-        }
-    }
-
    virtio_scsi_dataplane_setup(s, errp);
 }

--- a/hw/sh4/r2d.c
+++ b/hw/sh4/r2d.c
@@ -362,6 +362,7 @@ static void r2d_machine_init(MachineClass *mc)
 {
    mc->desc = "r2d-plus board";
    mc->init = r2d_init;
+    mc->block_default_type = IF_IDE;
 }

 DEFINE_MACHINE("r2d", r2d_machine_init)
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -989,11 +989,6 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef,
    slavio_misc_init(hwdef->slavio_base, hwdef->aux1_base, hwdef->aux2_base,
                     slavio_irq[30], fdc_tc);

-    if (drive_get_max_bus(IF_SCSI) > 0) {
-        fprintf(stderr, "qemu: too many SCSI bus\n");
-        exit(1);
-    }
-
    esp_init(hwdef->esp_base, 2,
             espdma_memory_read, espdma_memory_write,
             espdma, espdma_irq, &esp_reset, &dma_enable);
--- a/hw/sparc64/sun4u.c
+++ b/hw/sparc64/sun4u.c
@@ -579,6 +579,7 @@ static void sun4u_class_init(ObjectClass *oc, void *data)

    mc->desc = "Sun4u platform";
    mc->init = sun4u_init;
+    mc->block_default_type = IF_IDE;
    mc->max_cpus = 1; /* XXX for now */
    mc->is_default = 1;
    mc->default_boot_order = "c";
@@ -596,6 +597,7 @@ static void sun4v_class_init(ObjectClass *oc, void *data)

    mc->desc = "Sun4v platform";
    mc->init = sun4v_init;
+    mc->block_default_type = IF_IDE;
    mc->max_cpus = 1; /* XXX for now */
    mc->default_boot_order = "c";
 }
--- a/hw/timer/mips_gictimer.c
+++ b/hw/timer/mips_gictimer.c
@@ -14,6 +14,11 @@

 #define TIMER_PERIOD 10 /* 10 ns period for 100 Mhz frequency */

+uint32_t mips_gictimer_get_freq(MIPSGICTimerState *gic)
+{
+    return NANOSECONDS_PER_SECOND / TIMER_PERIOD;
+}
+
 static void gic_vptimer_update(MIPSGICTimerState *gictimer,
                                   uint32_t vp_index, uint64_t now)
 {
--- a/hw/usb/dev-smartcard-reader.c
+++ b/hw/usb/dev-smartcard-reader.c
@@ -1001,80 +1001,92 @@ static void ccid_handle_bulk_out(USBCCIDState *s, USBPacket *p)
    CCID_Header *ccid_header;

    if (p->iov.size + s->bulk_out_pos > BULK_OUT_DATA_SIZE) {
-        p->status = USB_RET_STALL;
-        return;
+        goto err;
    }
-    ccid_header = (CCID_Header *)s->bulk_out_data;
    usb_packet_copy(p, s->bulk_out_data + s->bulk_out_pos, p->iov.size);
    s->bulk_out_pos += p->iov.size;
-    if (p->iov.size == CCID_MAX_PACKET_SIZE) {
+    if (s->bulk_out_pos < 10) {
+        DPRINTF(s, 1, "%s: header incomplete\n", __func__);
+        goto err;
+    }
+
+    ccid_header = (CCID_Header *)s->bulk_out_data;
+    if ((s->bulk_out_pos - 10 < ccid_header->dwLength) &&
+        (p->iov.size == CCID_MAX_PACKET_SIZE)) {
        DPRINTF(s, D_VERBOSE,
-            "usb-ccid: bulk_in: expecting more packets (%zd/%d)\n",
-            p->iov.size, ccid_header->dwLength);
+                "usb-ccid: bulk_in: expecting more packets (%d/%d)\n",
+                s->bulk_out_pos - 10, ccid_header->dwLength);
        return;
    }
-    if (s->bulk_out_pos < 10) {
+    if (s->bulk_out_pos - 10 != ccid_header->dwLength) {
        DPRINTF(s, 1,
-                "%s: bad USB_TOKEN_OUT length, should be at least 10 bytes\n",
-                __func__);
-    } else {
-        DPRINTF(s, D_MORE_INFO, "%s %x %s\n", __func__,
-                ccid_header->bMessageType,
-                ccid_message_type_to_str(ccid_header->bMessageType));
-        switch (ccid_header->bMessageType) {
-        case CCID_MESSAGE_TYPE_PC_to_RDR_GetSlotStatus:
-            ccid_write_slot_status(s, ccid_header);
-            break;
-        case CCID_MESSAGE_TYPE_PC_to_RDR_IccPowerOn:
-            DPRINTF(s, 1, "%s: PowerOn: %d\n", __func__,
+                "usb-ccid: bulk_in: message size mismatch (got %d, expected %d)\n",
+                s->bulk_out_pos - 10, ccid_header->dwLength);
+        goto err;
+    }
+
+    DPRINTF(s, D_MORE_INFO, "%s %x %s\n", __func__,
+            ccid_header->bMessageType,
+            ccid_message_type_to_str(ccid_header->bMessageType));
+    switch (ccid_header->bMessageType) {
+    case CCID_MESSAGE_TYPE_PC_to_RDR_GetSlotStatus:
+        ccid_write_slot_status(s, ccid_header);
+        break;
+    case CCID_MESSAGE_TYPE_PC_to_RDR_IccPowerOn:
+        DPRINTF(s, 1, "%s: PowerOn: %d\n", __func__,
                ((CCID_IccPowerOn *)(ccid_header))->bPowerSelect);
-            s->powered = true;
-            if (!ccid_card_inserted(s)) {
-                ccid_report_error_failed(s, ERROR_ICC_MUTE);
-            }
-            /* atr is written regardless of error. */
-            ccid_write_data_block_atr(s, ccid_header);
-            break;
-        case CCID_MESSAGE_TYPE_PC_to_RDR_IccPowerOff:
-            ccid_reset_error_status(s);
-            s->powered = false;
-            ccid_write_slot_status(s, ccid_header);
-            break;
-        case CCID_MESSAGE_TYPE_PC_to_RDR_XfrBlock:
-            ccid_on_apdu_from_guest(s, (CCID_XferBlock *)s->bulk_out_data);
-            break;
-        case CCID_MESSAGE_TYPE_PC_to_RDR_SetParameters:
-            ccid_reset_error_status(s);
-            ccid_set_parameters(s, ccid_header);
-            ccid_write_parameters(s, ccid_header);
-            break;
-        case CCID_MESSAGE_TYPE_PC_to_RDR_ResetParameters:
-            ccid_reset_error_status(s);
-            ccid_reset_parameters(s);
-            ccid_write_parameters(s, ccid_header);
-            break;
-        case CCID_MESSAGE_TYPE_PC_to_RDR_GetParameters:
-            ccid_reset_error_status(s);
-            ccid_write_parameters(s, ccid_header);
-            break;
-        case CCID_MESSAGE_TYPE_PC_to_RDR_Mechanical:
-            ccid_report_error_failed(s, 0);
-            ccid_write_slot_status(s, ccid_header);
-            break;
-        default:
-            DPRINTF(s, 1,
+        s->powered = true;
+        if (!ccid_card_inserted(s)) {
+            ccid_report_error_failed(s, ERROR_ICC_MUTE);
+        }
+        /* atr is written regardless of error. */
+        ccid_write_data_block_atr(s, ccid_header);
+        break;
+    case CCID_MESSAGE_TYPE_PC_to_RDR_IccPowerOff:
+        ccid_reset_error_status(s);
+        s->powered = false;
+        ccid_write_slot_status(s, ccid_header);
+        break;
+    case CCID_MESSAGE_TYPE_PC_to_RDR_XfrBlock:
+        ccid_on_apdu_from_guest(s, (CCID_XferBlock *)s->bulk_out_data);
+        break;
+    case CCID_MESSAGE_TYPE_PC_to_RDR_SetParameters:
+        ccid_reset_error_status(s);
+        ccid_set_parameters(s, ccid_header);
+        ccid_write_parameters(s, ccid_header);
+        break;
+    case CCID_MESSAGE_TYPE_PC_to_RDR_ResetParameters:
+        ccid_reset_error_status(s);
+        ccid_reset_parameters(s);
+        ccid_write_parameters(s, ccid_header);
+        break;
+    case CCID_MESSAGE_TYPE_PC_to_RDR_GetParameters:
+        ccid_reset_error_status(s);
+        ccid_write_parameters(s, ccid_header);
+        break;
+    case CCID_MESSAGE_TYPE_PC_to_RDR_Mechanical:
+        ccid_report_error_failed(s, 0);
+        ccid_write_slot_status(s, ccid_header);
+        break;
+    default:
+        DPRINTF(s, 1,
                "handle_data: ERROR: unhandled message type %Xh\n",
                ccid_header->bMessageType);
-            /*
-             * The caller is expecting the device to respond, tell it we
-             * don't support the operation.
-             */
-            ccid_report_error_failed(s, ERROR_CMD_NOT_SUPPORTED);
-            ccid_write_slot_status(s, ccid_header);
-            break;
-        }
+        /*
+         * The caller is expecting the device to respond, tell it we
+         * don't support the operation.
+         */
+        ccid_report_error_failed(s, ERROR_CMD_NOT_SUPPORTED);
+        ccid_write_slot_status(s, ccid_header);
+        break;
    }
    s->bulk_out_pos = 0;
+    return;
+
+err:
+    p->status = USB_RET_STALL;
+    s->bulk_out_pos = 0;
+    return;
 }

 static void ccid_bulk_in_copy_to_guest(USBCCIDState *s, USBPacket *p)
--- a/hw/usb/hcd-ehci-pci.c
+++ b/hw/usb/hcd-ehci-pci.c
@@ -89,6 +89,14 @@ static void usb_ehci_pci_init(Object *obj)
    usb_ehci_init(s, DEVICE(obj));
 }

+static void usb_ehci_pci_finalize(Object *obj)
+{
+    EHCIPCIState *i = PCI_EHCI(obj);
+    EHCIState *s = &i->ehci;
+
+    usb_ehci_finalize(s);
+}
+
 static void usb_ehci_pci_exit(PCIDevice *dev)
 {
    EHCIPCIState *i = PCI_EHCI(dev);
@@ -159,6 +167,7 @@ static const TypeInfo ehci_pci_type_info = {
    .parent = TYPE_PCI_DEVICE,
    .instance_size = sizeof(EHCIPCIState),
    .instance_init = usb_ehci_pci_init,
+    .instance_finalize = usb_ehci_pci_finalize,
    .abstract = true,
    .class_init = ehci_class_init,
 };
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2545,6 +2545,11 @@ void usb_ehci_init(EHCIState *s, DeviceState *dev)
                                &s->mem_ports);
 }

+void usb_ehci_finalize(EHCIState *s)
+{
+    usb_packet_cleanup(&s->ipacket);
+}
+
 /*
 * vim: expandtab ts=4
 */
--- a/hw/usb/hcd-ehci.h
+++ b/hw/usb/hcd-ehci.h
@@ -323,6 +323,7 @@ struct EHCIState {
 extern const VMStateDescription vmstate_ehci;

 void usb_ehci_init(EHCIState *s, DeviceState *dev);
+void usb_ehci_finalize(EHCIState *s);
 void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp);
 void usb_ehci_unrealize(EHCIState *s, DeviceState *dev, Error **errp);
 void ehci_reset(void *opaque);
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -42,6 +42,8 @@

 #define OHCI_MAX_PORTS 15

+#define ED_LINK_LIMIT 4
+
 static int64_t usb_frame_time;
 static int64_t usb_bit_time;

@@ -725,7 +727,7 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
    if (ohci_read_iso_td(ohci, addr, &iso_td)) {
        trace_usb_ohci_iso_td_read_failed(addr);
        ohci_die(ohci);
-        return 0;
+        return 1;
    }

    starting_frame = OHCI_BM(iso_td.flags, TD_SF);
@@ -1184,7 +1186,7 @@ static int ohci_service_ed_list(OHCIState *ohci, uint32_t head, int completion)
    uint32_t next_ed;
    uint32_t cur;
    int active;
-
+    uint32_t link_cnt = 0;
    active = 0;

    if (head == 0)
@@ -1199,6 +1201,11 @@ static int ohci_service_ed_list(OHCIState *ohci, uint32_t head, int completion)

        next_ed = ed.next & OHCI_DPTR_MASK;

+        if (++link_cnt > ED_LINK_LIMIT) {
+            ohci_die(ohci);
+            return 0;
+        }
+
        if ((ed.head & OHCI_ED_H) || (ed.flags & OHCI_ED_K)) {
            uint32_t addr;
            /* Cancel pending packets for ED that have been paused.  */
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -49,11 +49,10 @@

 /* Very pessimistic, let's hope it's enough for all cases */
 #define EV_QUEUE (((3 * 24) + 16) * MAXSLOTS)
-/* Do not deliver ER Full events. NEC's driver does some things not bound
- * to the specs when it gets them */
-#define ER_FULL_HACK

 #define TRB_LINK_LIMIT  4
+#define COMMAND_LIMIT   256
+#define TRANSFER_LIMIT  256

 #define LEN_CAP         0x40
 #define LEN_OPER        (0x400 + 0x10 * MAXPORTS)
@@ -199,7 +198,6 @@ typedef enum TRBType {
    ER_DEVICE_NOTIFICATION,
    ER_MFINDEX_WRAP,
    /* vendor specific bits */
-    CR_VENDOR_VIA_CHALLENGE_RESPONSE = 48,
    CR_VENDOR_NEC_FIRMWARE_REVISION  = 49,
    CR_VENDOR_NEC_CHALLENGE_RESPONSE = 50,
 } TRBType;
@@ -431,12 +429,14 @@ typedef struct XHCIInterrupter {
    uint32_t erdp_low;
    uint32_t erdp_high;

-    bool msix_used, er_pcs, er_full;
+    bool msix_used, er_pcs;

    dma_addr_t er_start;
    uint32_t er_size;
    unsigned int er_ep_idx;

+    /* kept for live migration compat only */
+    bool er_full_unused;
    XHCIEvent ev_buffer[EV_QUEUE];
    unsigned int ev_buffer_put;
    unsigned int ev_buffer_get;
@@ -486,9 +486,13 @@ struct XHCIState {
    XHCIInterrupter intr[MAXINTRS];

    XHCIRing cmd_ring;
+
+    bool nec_quirks;
 };

-#define TYPE_XHCI "nec-usb-xhci"
+#define TYPE_XHCI "base-xhci"
+#define TYPE_NEC_XHCI "nec-usb-xhci"
+#define TYPE_QEMU_XHCI "qemu-xhci"

 #define XHCI(obj) \
    OBJECT_CHECK(XHCIState, (obj), TYPE_XHCI)
@@ -549,7 +553,6 @@ static const char *TRBType_names[] = {
    [ER_HOST_CONTROLLER]               = "ER_HOST_CONTROLLER",
    [ER_DEVICE_NOTIFICATION]           = "ER_DEVICE_NOTIFICATION",
    [ER_MFINDEX_WRAP]                  = "ER_MFINDEX_WRAP",
-    [CR_VENDOR_VIA_CHALLENGE_RESPONSE] = "CR_VENDOR_VIA_CHALLENGE_RESPONSE",
    [CR_VENDOR_NEC_FIRMWARE_REVISION]  = "CR_VENDOR_NEC_FIRMWARE_REVISION",
    [CR_VENDOR_NEC_CHALLENGE_RESPONSE] = "CR_VENDOR_NEC_CHALLENGE_RESPONSE",
 };
@@ -826,7 +829,7 @@ static void xhci_intr_raise(XHCIState *xhci, int v)

 static inline int xhci_running(XHCIState *xhci)
 {
-    return !(xhci->usbsts & USBSTS_HCH) && !xhci->intr[0].er_full;
+    return !(xhci->usbsts & USBSTS_HCH);
 }

 static void xhci_die(XHCIState *xhci)
@@ -865,74 +868,6 @@ static void xhci_write_event(XHCIState *xhci, XHCIEvent *event, int v)
    }
 }

-static void xhci_events_update(XHCIState *xhci, int v)
-{
-    XHCIInterrupter *intr = &xhci->intr[v];
-    dma_addr_t erdp;
-    unsigned int dp_idx;
-    bool do_irq = 0;
-
-    if (xhci->usbsts & USBSTS_HCH) {
-        return;
-    }
-
-    erdp = xhci_addr64(intr->erdp_low, intr->erdp_high);
-    if (erdp < intr->er_start ||
-        erdp >= (intr->er_start + TRB_SIZE*intr->er_size)) {
-        DPRINTF("xhci: ERDP out of bounds: "DMA_ADDR_FMT"\n", erdp);
-        DPRINTF("xhci: ER[%d] at "DMA_ADDR_FMT" len %d\n",
-                v, intr->er_start, intr->er_size);
-        xhci_die(xhci);
-        return;
-    }
-    dp_idx = (erdp - intr->er_start) / TRB_SIZE;
-    assert(dp_idx < intr->er_size);
-
-    /* NEC didn't read section 4.9.4 of the spec (v1.0 p139 top Note) and thus
-     * deadlocks when the ER is full. Hack it by holding off events until
-     * the driver decides to free at least half of the ring */
-    if (intr->er_full) {
-        int er_free = dp_idx - intr->er_ep_idx;
-        if (er_free <= 0) {
-            er_free += intr->er_size;
-        }
-        if (er_free < (intr->er_size/2)) {
-            DPRINTF("xhci_events_update(): event ring still "
-                    "more than half full (hack)\n");
-            return;
-        }
-    }
-
-    while (intr->ev_buffer_put != intr->ev_buffer_get) {
-        assert(intr->er_full);
-        if (((intr->er_ep_idx+1) % intr->er_size) == dp_idx) {
-            DPRINTF("xhci_events_update(): event ring full again\n");
-#ifndef ER_FULL_HACK
-            XHCIEvent full = {ER_HOST_CONTROLLER, CC_EVENT_RING_FULL_ERROR};
-            xhci_write_event(xhci, &full, v);
-#endif
-            do_irq = 1;
-            break;
-        }
-        XHCIEvent *event = &intr->ev_buffer[intr->ev_buffer_get];
-        xhci_write_event(xhci, event, v);
-        intr->ev_buffer_get++;
-        do_irq = 1;
-        if (intr->ev_buffer_get == EV_QUEUE) {
-            intr->ev_buffer_get = 0;
-        }
-    }
-
-    if (do_irq) {
-        xhci_intr_raise(xhci, v);
-    }
-
-    if (intr->er_full && intr->ev_buffer_put == intr->ev_buffer_get) {
-        DPRINTF("xhci_events_update(): event ring no longer full\n");
-        intr->er_full = 0;
-    }
-}
-
 static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v)
 {
    XHCIInterrupter *intr;
@@ -945,19 +880,6 @@ static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v)
    }
    intr = &xhci->intr[v];

-    if (intr->er_full) {
-        DPRINTF("xhci_event(): ER full, queueing\n");
-        if (((intr->ev_buffer_put+1) % EV_QUEUE) == intr->ev_buffer_get) {
-            DPRINTF("xhci: event queue full, dropping event!\n");
-            return;
-        }
-        intr->ev_buffer[intr->ev_buffer_put++] = *event;
-        if (intr->ev_buffer_put == EV_QUEUE) {
-            intr->ev_buffer_put = 0;
-        }
-        return;
-    }
-
    erdp = xhci_addr64(intr->erdp_low, intr->erdp_high);
    if (erdp < intr->er_start ||
        erdp >= (intr->er_start + TRB_SIZE*intr->er_size)) {
@@ -971,21 +893,12 @@ static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v)
    dp_idx = (erdp - intr->er_start) / TRB_SIZE;
    assert(dp_idx < intr->er_size);

-    if ((intr->er_ep_idx+1) % intr->er_size == dp_idx) {
-        DPRINTF("xhci_event(): ER full, queueing\n");
-#ifndef ER_FULL_HACK
+    if ((intr->er_ep_idx + 2) % intr->er_size == dp_idx) {
+        DPRINTF("xhci: ER %d full, send ring full error\n", v);
        XHCIEvent full = {ER_HOST_CONTROLLER, CC_EVENT_RING_FULL_ERROR};
-        xhci_write_event(xhci, &full);
-#endif
-        intr->er_full = 1;
-        if (((intr->ev_buffer_put+1) % EV_QUEUE) == intr->ev_buffer_get) {
-            DPRINTF("xhci: event queue full, dropping event!\n");
-            return;
-        }
-        intr->ev_buffer[intr->ev_buffer_put++] = *event;
-        if (intr->ev_buffer_put == EV_QUEUE) {
-            intr->ev_buffer_put = 0;
-        }
+        xhci_write_event(xhci, &full, v);
+    } else if ((intr->er_ep_idx + 1) % intr->er_size == dp_idx) {
+        DPRINTF("xhci: ER %d full, drop event\n", v);
    } else {
        xhci_write_event(xhci, event, v);
    }
@@ -1032,6 +945,7 @@ static TRBType xhci_ring_fetch(XHCIState *xhci, XHCIRing *ring, XHCITRB *trb,
            return type;
        } else {
            if (++link_cnt > TRB_LINK_LIMIT) {
+                trace_usb_xhci_enforced_limit("trb-link");
                return 0;
            }
            ring->dequeue = xhci_mask64(trb->parameter);
@@ -1124,7 +1038,6 @@ static void xhci_er_reset(XHCIState *xhci, int v)

    intr->er_ep_idx = 0;
    intr->er_pcs = 1;
-    intr->er_full = 0;

    DPRINTF("xhci: event ring[%d]:" DMA_ADDR_FMT " [%d]\n",
            v, intr->er_start, intr->er_size);
@@ -2150,6 +2063,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid)
    XHCIRing *ring;
    USBEndpoint *ep = NULL;
    uint64_t mfindex;
+    unsigned int count = 0;
    int length;
    int i;

@@ -2262,6 +2176,10 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid)
            epctx->retry = xfer;
            break;
        }
+        if (count++ > TRANSFER_LIMIT) {
+            trace_usb_xhci_enforced_limit("transfers");
+            break;
+        }
    }
    epctx->kick_active--;

@@ -2702,39 +2620,13 @@ static uint32_t xhci_nec_challenge(uint32_t hi, uint32_t lo)
    return ~val;
 }

-static void xhci_via_challenge(XHCIState *xhci, uint64_t addr)
-{
-    PCIDevice *pci_dev = PCI_DEVICE(xhci);
-    uint32_t buf[8];
-    uint32_t obuf[8];
-    dma_addr_t paddr = xhci_mask64(addr);
-
-    pci_dma_read(pci_dev, paddr, &buf, 32);
-
-    memcpy(obuf, buf, sizeof(obuf));
-
-    if ((buf[0] & 0xff) == 2) {
-        obuf[0] = 0x49932000 + 0x54dc200 * buf[2] + 0x7429b578 * buf[3];
-        obuf[0] |=  (buf[2] * buf[3]) & 0xff;
-        obuf[1] = 0x0132bb37 + 0xe89 * buf[2] + 0xf09 * buf[3];
-        obuf[2] = 0x0066c2e9 + 0x2091 * buf[2] + 0x19bd * buf[3];
-        obuf[3] = 0xd5281342 + 0x2cc9691 * buf[2] + 0x2367662 * buf[3];
-        obuf[4] = 0x0123c75c + 0x1595 * buf[2] + 0x19ec * buf[3];
-        obuf[5] = 0x00f695de + 0x26fd * buf[2] + 0x3e9 * buf[3];
-        obuf[6] = obuf[2] ^ obuf[3] ^ 0x29472956;
-        obuf[7] = obuf[2] ^ obuf[3] ^ 0x65866593;
-    }
-
-    pci_dma_write(pci_dev, paddr, &obuf, 32);
-}
-
 static void xhci_process_commands(XHCIState *xhci)
 {
    XHCITRB trb;
    TRBType type;
    XHCIEvent event = {ER_COMMAND_COMPLETE, CC_SUCCESS};
    dma_addr_t addr;
-    unsigned int i, slotid = 0;
+    unsigned int i, slotid = 0, count = 0;

    DPRINTF("xhci_process_commands()\n");
    if (!xhci_running(xhci)) {
@@ -2823,24 +2715,27 @@ static void xhci_process_commands(XHCIState *xhci)
        case CR_GET_PORT_BANDWIDTH:
            event.ccode = xhci_get_port_bandwidth(xhci, trb.parameter);
            break;
-        case CR_VENDOR_VIA_CHALLENGE_RESPONSE:
-            xhci_via_challenge(xhci, trb.parameter);
-            break;
        case CR_VENDOR_NEC_FIRMWARE_REVISION:
-            event.type = 48; /* NEC reply */
-            event.length = 0x3025;
+            if (xhci->nec_quirks) {
+                event.type = 48; /* NEC reply */
+                event.length = 0x3025;
+            } else {
+                event.ccode = CC_TRB_ERROR;
+            }
            break;
        case CR_VENDOR_NEC_CHALLENGE_RESPONSE:
-        {
-            uint32_t chi = trb.parameter >> 32;
-            uint32_t clo = trb.parameter;
-            uint32_t val = xhci_nec_challenge(chi, clo);
-            event.length = val & 0xFFFF;
-            event.epid = val >> 16;
-            slotid = val >> 24;
-            event.type = 48; /* NEC reply */
-        }
-        break;
+            if (xhci->nec_quirks) {
+                uint32_t chi = trb.parameter >> 32;
+                uint32_t clo = trb.parameter;
+                uint32_t val = xhci_nec_challenge(chi, clo);
+                event.length = val & 0xFFFF;
+                event.epid = val >> 16;
+                slotid = val >> 24;
+                event.type = 48; /* NEC reply */
+            } else {
+                event.ccode = CC_TRB_ERROR;
+            }
+            break;
        default:
            trace_usb_xhci_unimplemented("command", type);
            event.ccode = CC_TRB_ERROR;
@@ -2848,6 +2743,11 @@ static void xhci_process_commands(XHCIState *xhci)
        }
        event.slotid = slotid;
        xhci_event(xhci, &event, 0);
+
+        if (count++ > COMMAND_LIMIT) {
+            trace_usb_xhci_enforced_limit("commands");
+            return;
+        }
    }
 }

@@ -2978,7 +2878,6 @@ static void xhci_reset(DeviceState *dev)

        xhci->intr[i].er_ep_idx = 0;
        xhci->intr[i].er_pcs = 1;
-        xhci->intr[i].er_full = 0;
        xhci->intr[i].ev_buffer_put = 0;
        xhci->intr[i].ev_buffer_get = 0;
    }
@@ -3343,9 +3242,12 @@ static void xhci_runtime_write(void *ptr, hwaddr reg,
        intr->erstsz = val & 0xffff;
        break;
    case 0x10: /* ERSTBA low */
-        /* XXX NEC driver bug: it doesn't align this to 64 bytes
-        intr->erstba_low = val & 0xffffffc0; */
-        intr->erstba_low = val & 0xfffffff0;
+        if (xhci->nec_quirks) {
+            /* NEC driver bug: it doesn't align this to 64 bytes */
+            intr->erstba_low = val & 0xfffffff0;
+        } else {
+            intr->erstba_low = val & 0xffffffc0;
+        }
        break;
    case 0x14: /* ERSTBA high */
        intr->erstba_high = val;
@@ -3368,7 +3270,6 @@ static void xhci_runtime_write(void *ptr, hwaddr reg,
        break;
    case 0x1c: /* ERDP high */
        intr->erdp_high = val;
-        xhci_events_update(xhci, v);
        break;
    default:
        trace_usb_xhci_unimplemented("oper write", reg);
@@ -3641,6 +3542,9 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp)
    dev->config[PCI_CACHE_LINE_SIZE] = 0x10;
    dev->config[0x60] = 0x30; /* release number */

+    if (strcmp(object_get_typename(OBJECT(dev)), TYPE_NEC_XHCI) == 0) {
+        xhci->nec_quirks = true;
+    }
    if (xhci->numintrs > MAXINTRS) {
        xhci->numintrs = MAXINTRS;
    }
@@ -3866,8 +3770,7 @@ static const VMStateDescription vmstate_xhci_event = {

 static bool xhci_er_full(void *opaque, int version_id)
 {
-    struct XHCIInterrupter *intr = opaque;
-    return intr->er_full;
+    return false;
 }

 static const VMStateDescription vmstate_xhci_intr = {
@@ -3891,7 +3794,7 @@ static const VMStateDescription vmstate_xhci_intr = {
        VMSTATE_UINT32(er_ep_idx,     XHCIInterrupter),

        /* event queue (used if ring is full) */
-        VMSTATE_BOOL(er_full,         XHCIInterrupter),
+        VMSTATE_BOOL(er_full_unused,  XHCIInterrupter),
        VMSTATE_UINT32_TEST(ev_buffer_put, XHCIInterrupter, xhci_er_full),
        VMSTATE_UINT32_TEST(ev_buffer_get, XHCIInterrupter, xhci_er_full),
        VMSTATE_STRUCT_ARRAY_TEST(ev_buffer, XHCIInterrupter, EV_QUEUE,
@@ -3963,10 +3866,7 @@ static void xhci_class_init(ObjectClass *klass, void *data)
    set_bit(DEVICE_CATEGORY_USB, dc->categories);
    k->realize      = usb_xhci_realize;
    k->exit         = usb_xhci_exit;
-    k->vendor_id    = PCI_VENDOR_ID_NEC;
-    k->device_id    = PCI_DEVICE_ID_NEC_UPD720200;
    k->class_id     = PCI_CLASS_SERIAL_USB;
-    k->revision     = 0x03;
    k->is_express   = 1;
 }

@@ -3975,11 +3875,44 @@ static const TypeInfo xhci_info = {
    .parent        = TYPE_PCI_DEVICE,
    .instance_size = sizeof(XHCIState),
    .class_init    = xhci_class_init,
+    .abstract      = true,
+};
+
+static void nec_xhci_class_init(ObjectClass *klass, void *data)
+{
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->vendor_id    = PCI_VENDOR_ID_NEC;
+    k->device_id    = PCI_DEVICE_ID_NEC_UPD720200;
+    k->revision     = 0x03;
+}
+
+static const TypeInfo nec_xhci_info = {
+    .name          = TYPE_NEC_XHCI,
+    .parent        = TYPE_XHCI,
+    .class_init    = nec_xhci_class_init,
+};
+
+static void qemu_xhci_class_init(ObjectClass *klass, void *data)
+{
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->vendor_id    = PCI_VENDOR_ID_REDHAT;
+    k->device_id    = PCI_DEVICE_ID_REDHAT_XHCI;
+    k->revision     = 0x01;
+}
+
+static const TypeInfo qemu_xhci_info = {
+    .name          = TYPE_QEMU_XHCI,
+    .parent        = TYPE_XHCI,
+    .class_init    = qemu_xhci_class_init,
 };

 static void xhci_register_types(void)
 {
    type_register_static(&xhci_info);
+    type_register_static(&nec_xhci_info);
+    type_register_static(&qemu_xhci_info);
 }

 type_init(xhci_register_types)
--- a/hw/usb/trace-events
+++ b/hw/usb/trace-events
@@ -174,6 +174,7 @@ usb_xhci_xfer_retry(void *xfer) "%p"
 usb_xhci_xfer_success(void *xfer, uint32_t bytes) "%p: len %d"
 usb_xhci_xfer_error(void *xfer, uint32_t ret) "%p: ret %d"
 usb_xhci_unimplemented(const char *item, int nr) "%s (0x%x)"
+usb_xhci_enforced_limit(const char *item) "%s"

 # hw/usb/desc.c
 usb_desc_device(int addr, int len, int ret) "dev %d query device, len %d, ret %d"
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -294,18 +294,55 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
           section->offset_within_address_space & (1ULL << 63);
 }

+/* Called with rcu_read_lock held.  */
+static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
+                           bool *read_only)
+{
+    MemoryRegion *mr;
+    hwaddr xlat;
+    hwaddr len = iotlb->addr_mask + 1;
+    bool writable = iotlb->perm & IOMMU_WO;
+
+    /*
+     * The IOMMU TLB entry we have just covers translation through
+     * this IOMMU to its immediate target.  We need to translate
+     * it the rest of the way through to memory.
+     */
+    mr = address_space_translate(&address_space_memory,
+                                 iotlb->translated_addr,
+                                 &xlat, &len, writable);
+    if (!memory_region_is_ram(mr)) {
+        error_report("iommu map to non memory area %"HWADDR_PRIx"",
+                     xlat);
+        return false;
+    }
+
+    /*
+     * Translation truncates length to the IOMMU page size,
+     * check that it did not truncate too much.
+     */
+    if (len & iotlb->addr_mask) {
+        error_report("iommu has granularity incompatible with target AS");
+        return false;
+    }
+
+    *vaddr = memory_region_get_ram_ptr(mr) + xlat;
+    *read_only = !writable || mr->readonly;
+
+    return true;
+}
+
 static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
 {
    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
    VFIOContainer *container = giommu->container;
    hwaddr iova = iotlb->iova + giommu->iommu_offset;
-    MemoryRegion *mr;
-    hwaddr xlat;
-    hwaddr len = iotlb->addr_mask + 1;
+    bool read_only;
    void *vaddr;
    int ret;

-    trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
+    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
+                                iova, iova + iotlb->addr_mask);

    if (iotlb->target_as != &address_space_memory) {
        error_report("Wrong target AS \"%s\", only system memory is allowed",
@@ -313,34 +350,22 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
        return;
    }

-    /*
-     * The IOMMU TLB entry we have just covers translation through
-     * this IOMMU to its immediate target.  We need to translate
-     * it the rest of the way through to memory.
-     */
    rcu_read_lock();
-    mr = address_space_translate(&address_space_memory,
-                                 iotlb->translated_addr,
-                                 &xlat, &len, iotlb->perm & IOMMU_WO);
-    if (!memory_region_is_ram(mr)) {
-        error_report("iommu map to non memory area %"HWADDR_PRIx"",
-                     xlat);
-        goto out;
-    }
-    /*
-     * Translation truncates length to the IOMMU page size,
-     * check that it did not truncate too much.
-     */
-    if (len & iotlb->addr_mask) {
-        error_report("iommu has granularity incompatible with target AS");
-        goto out;
-    }

    if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
-        vaddr = memory_region_get_ram_ptr(mr) + xlat;
+        if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
+            goto out;
+        }
+        /*
+         * vaddr is only valid until rcu_read_unlock(). But after
+         * vfio_dma_map has set up the mapping the pages will be
+         * pinned by the kernel. This makes sure that the RAM backend
+         * of vaddr will always be there, even if the memory object is
+         * destroyed and its backing memory munmap-ed.
+         */
        ret = vfio_dma_map(container, iova,
                           iotlb->addr_mask + 1, vaddr,
-                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
+                           read_only);
        if (ret) {
            error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
                         "0x%"HWADDR_PRIx", %p) = %d (%m)",
--- a/Show More
+++ b/Show More