memory: prevent dma-reentracy issues

Git-commit: a2e1753b80 References: bsc#1190011 (CVE-2021-3750) Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. This flag is set/checked prior to calling a device's MemoryRegion handlers, and set when device code initiates DMA. The purpose of this flag is to prevent two types of DMA-based reentrancy issues: 1.) mmio -> dma -> mmio case 2.) bh -> dma write -> mmio case These issues have led to problems such as stack-exhaustion and use-after-frees. Summary of the problem from Peter Maydell: https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 Resolves: CVE-2023-0330 Signed-off-by: Alexander Bulekov <alxndr@bu.edu> Reviewed-by: Thomas Huth <thuth@redhat.com> Message-Id: <20230427211013.2994127-2-alxndr@bu.edu> [thuth: Replace warn_report() with warn_report_once()] Signed-off-by: Thomas Huth <thuth@redhat.com> Signed-off-by: Dario Faggioli <dfaggioli@suse.com>
io: remove io watch if TLS channel is closed during handshake
2023-10-20 14:57:22 +02:00 · 2023-10-20 14:19:10 +02:00 · 2023-10-20 14:19:01 +02:00 · 2023-10-20 14:18:43 +02:00 · 2023-04-20 16:18:30 +02:00 · 2023-04-20 16:05:53 +02:00
356 changed files with 13553 additions and 3221 deletions
--- a/2
+++ b/2
@@ -831,6 +831,7 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 S: Supported
 F: include/hw/scsi*
 F: hw/scsi/*
+F: tests/scsi-disk-test.c
 T: git git://github.com/bonzini/qemu.git scsi-next

 LSI53C895A
@@ -1211,6 +1212,7 @@ F: qom/
 X: qom/cpu.c
 F: tests/check-qom-interface.c
 F: tests/check-qom-proplist.c
+F: tests/check-qom-props.c
 F: tests/qom-test.c

 QMP
--- a/Makefile.target
+++ b/Makefile.target
@@ -36,6 +36,10 @@ endif
 PROGS=$(QEMU_PROG) $(QEMU_PROGW)
 STPFILES=

+ifdef CONFIG_LINUX_USER
+PROGS+=$(QEMU_PROG)-binfmt
+endif
+
 config-target.h: config-target.h-timestamp
 config-target.h-timestamp: config-target.mak

@@ -113,6 +117,8 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user
 obj-y += linux-user/
 obj-y += gdbstub.o thunk.o user-exec.o

+obj-binfmt-y += linux-user/
+
 endif #CONFIG_LINUX_USER

 #########################################################
@@ -161,7 +167,11 @@ endif # CONFIG_SOFTMMU
 # Workaround for http://gcc.gnu.org/PR55489, see configure.
 %/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS)

+ifdef CONFIG_LINUX_USER
+dummy := $(call unnest-vars,,obj-y obj-binfmt-y)
+else
 dummy := $(call unnest-vars,,obj-y)
+endif
 all-obj-y := $(obj-y)

 target-obj-y :=
@@ -198,6 +208,9 @@ ifdef CONFIG_DARWIN
 	$(call quiet-command,SetFile -a C $@,"  SETFILE $(TARGET_DIR)$@")
 endif

+$(QEMU_PROG)-binfmt: $(obj-binfmt-y)
+	$(call LINK,$^)
+
 gdbstub-xml.c: $(TARGET_XML_FILES) $(SRC_PATH)/scripts/feature_to_c.sh
 	$(call quiet-command,rm -f $@ && $(SHELL) $(SRC_PATH)/scripts/feature_to_c.sh $@ $(TARGET_XML_FILES),"  GEN   $(TARGET_DIR)$@")

--- a/2
+++ b/2
@@ -1 +1 @@
-2.6.0
+2.6.2
--- a/arch_init.c
+++ b/arch_init.c
@@ -233,25 +233,6 @@ void audio_init(void)
    }
 }

-int qemu_uuid_parse(const char *str, uint8_t *uuid)
-{
-    int ret;
-
-    if (strlen(str) != 36) {
-        return -1;
-    }
-
-    ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3],
-                 &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9],
-                 &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14],
-                 &uuid[15]);
-
-    if (ret != 16) {
-        return -1;
-    }
-    return 0;
-}
-
 void do_acpitable_option(const QemuOpts *opts)
 {
 #ifdef TARGET_I386
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -2019,6 +2019,8 @@ void AUD_del_capture (CaptureVoiceOut *cap, void *cb_opaque)
                    sw = sw1;
                }
                QLIST_REMOVE (cap, entries);
+                g_free (cap->hw.mix_buf);
+                g_free (cap->buf);
                g_free (cap);
            }
            return;
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -270,7 +270,7 @@ f_sample *mixeng_clip[2][2][2][3] = {
 * August 21, 1998
 * Copyright 1998 Fabrice Bellard.
 *
- * [Rewrote completly the code of Lance Norskog And Sundry
+ * [Rewrote completely the code of Lance Norskog And Sundry
 * Contributors with a more efficient algorithm.]
 *
 * This source code is freely redistributable and may be used for
--- a/audio/ossaudio.c
+++ b/audio/ossaudio.c
@@ -898,7 +898,7 @@ static struct audio_option oss_options[] = {
        .name  = "EXCLUSIVE",
        .tag   = AUD_OPT_BOOL,
        .valp  = &glob_conf.exclusive,
-        .descr = "Open device in exclusive mode (vmix wont work)"
+        .descr = "Open device in exclusive mode (vmix won't work)"
    },
 #ifdef USE_DSP_POLICY
    {
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -21,6 +21,8 @@ block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
+block-obj-y += dictzip.o
+block-obj-y += tar.o
 block-obj-y += write-threshold.o

 block-obj-y += crypto.o
--- a/block/backup.c
+++ b/block/backup.c
@@ -504,6 +504,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
 {
    int64_t len;
    BlockDriverInfo bdi;
+    BackupBlockJob *job = NULL;
    int ret;

    assert(bs);
@@ -568,8 +569,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
        goto error;
    }

-    BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
-                                           cb, opaque, errp);
+    job = block_job_create(&backup_job_driver, bs, speed, cb, opaque, errp);
    if (!job) {
        goto error;
    }
@@ -610,4 +610,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
    if (sync_bitmap) {
        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
    }
+    if (job) {
+        block_job_unref(&job->common);
+    }
 }
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1,7 +1,7 @@
 /*
 * QEMU Block backends
 *
- * Copyright (C) 2014 Red Hat, Inc.
+ * Copyright (C) 2014-2016 Red Hat, Inc.
 *
 * Authors:
 *  Markus Armbruster <armbru@redhat.com>,
@@ -36,6 +36,7 @@ struct BlockBackend {
    QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */

    void *dev;                  /* attached device model, if any */
+    bool xen_dev;               /* true if dev is a Xen disk */
    /* TODO change to DeviceState when all users are qdevified */
    const BlockDevOps *dev_ops;
    void *dev_opaque;
@@ -460,11 +461,12 @@ int blk_attach_dev(BlockBackend *blk, void *dev)
 * @blk must not have a device model attached already.
 * TODO qdevified devices don't use this, remove when devices are qdevified
 */
-void blk_attach_dev_nofail(BlockBackend *blk, void *dev)
+void blk_attach_dev_nofail(BlockBackend *blk, void *dev, bool xen_dev)
 {
    if (blk_attach_dev(blk, dev) < 0) {
        abort();
    }
+    blk->xen_dev = xen_dev;
 }

 /*
@@ -790,21 +792,21 @@ int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
    return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0);
 }

-int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
-                         int nb_sectors)
+int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
+                          int count)
 {
    BlockDriverState *bs = blk_bs(blk);
    bool enabled;
    int ret;

-    ret = blk_check_request(blk, sector_num, nb_sectors);
+    ret = blk_check_byte_request(blk, offset, count);
    if (ret < 0) {
        return ret;
    }

    enabled = bs->io_limits_enabled;
    bs->io_limits_enabled = false;
-    ret = blk_read(blk, sector_num, buf, nb_sectors);
+    ret = blk_pread(blk, offset, buf, count);
    bs->io_limits_enabled = enabled;
    return ret;
 }
@@ -816,11 +818,11 @@ int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
                  blk_write_entry, 0);
 }

-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
-                     int nb_sectors, BdrvRequestFlags flags)
+int blk_write_zeroes(BlockBackend *blk, int64_t offset,
+                     int count, BdrvRequestFlags flags)
 {
-    return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry,
-                  flags | BDRV_REQ_ZERO_WRITE);
+    return blk_prw(blk, offset, NULL, count, blk_write_entry,
+                   flags | BDRV_REQ_ZERO_WRITE);
 }

 static void error_callback_bh(void *opaque)
@@ -932,18 +934,12 @@ static void blk_aio_write_entry(void *opaque)
    blk_aio_complete(acb);
 }

-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
-                                 int nb_sectors, BdrvRequestFlags flags,
+BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
+                                 int count, BdrvRequestFlags flags,
                                 BlockCompletionFunc *cb, void *opaque)
 {
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
-    }
-
-    return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS,
-                        nb_sectors << BDRV_SECTOR_BITS, NULL,
-                        blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE,
-                        cb, opaque);
+    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
+                        flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
 }

 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
@@ -955,9 +951,11 @@ int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
    return count;
 }

-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
+               BdrvRequestFlags flags)
 {
-    int ret = blk_prw(blk, offset, (void*) buf, count, blk_write_entry, 0);
+    int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+                      flags);
    if (ret < 0) {
        return ret;
    }
@@ -1004,6 +1002,14 @@ BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
                        blk_aio_read_entry, 0, cb, opaque);
 }

+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+                           QEMUIOVector *qiov, BdrvRequestFlags flags,
+                           BlockCompletionFunc *cb, void *opaque)
+{
+    return blk_aio_prwv(blk, offset, qiov->size, qiov,
+                        blk_aio_read_entry, flags, cb, opaque);
+}
+
 BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
                           QEMUIOVector *iov, int nb_sectors,
                           BlockCompletionFunc *cb, void *opaque)
@@ -1017,6 +1023,14 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
                        blk_aio_write_entry, 0, cb, opaque);
 }

+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+                            QEMUIOVector *qiov, BdrvRequestFlags flags,
+                            BlockCompletionFunc *cb, void *opaque)
+{
+    return blk_aio_prwv(blk, offset, qiov->size, qiov,
+                        blk_aio_write_entry, flags, cb, opaque);
+}
+
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
                          BlockCompletionFunc *cb, void *opaque)
 {
@@ -1444,15 +1458,10 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
    return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
 }

-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num,
-                                     int nb_sectors, BdrvRequestFlags flags)
+int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset,
+                                     int count, BdrvRequestFlags flags)
 {
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return -EINVAL;
-    }
-
-    return blk_co_pwritev(blk, sector_num << BDRV_SECTOR_BITS,
-                          nb_sectors << BDRV_SECTOR_BITS, NULL,
+    return blk_co_pwritev(blk, offset, count, NULL,
                          flags | BDRV_REQ_ZERO_WRITE);
 }

@@ -1476,6 +1485,13 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
    return bdrv_truncate(blk_bs(blk), offset);
 }

+void blk_legacy_resize_cb(BlockBackend *blk)
+{
+    if (blk->xen_dev) {
+        xen_blk_resize_cb(blk->dev);
+    }
+}
+
 int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
 {
    int ret = blk_check_request(blk, sector_num, nb_sectors);
@@ -1611,25 +1627,3 @@ int blk_commit_all(void)
    }
    return 0;
 }
-
-int blk_flush_all(void)
-{
-    BlockBackend *blk = NULL;
-    int result = 0;
-
-    while ((blk = blk_all_next(blk)) != NULL) {
-        AioContext *aio_context = blk_get_aio_context(blk);
-        int ret;
-
-        aio_context_acquire(aio_context);
-        if (blk_is_inserted(blk)) {
-            ret = blk_flush(blk);
-            if (ret < 0 && !result) {
-                result = ret;
-            }
-        }
-        aio_context_release(aio_context);
-    }
-
-    return result;
-}
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -91,7 +91,7 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
    struct BlockCryptoCreateData *data = opaque;
    ssize_t ret;

-    ret = blk_pwrite(data->blk, offset, buf, buflen);
+    ret = blk_pwrite(data->blk, offset, buf, buflen, 0);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not write encryption header");
        return ret;
--- a/block/curl.c
+++ b/block/curl.c
@@ -67,7 +67,6 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,

 #define CURL_NUM_STATES 8
 #define CURL_NUM_ACB    8
-#define SECTOR_SIZE     512
 #define READ_AHEAD_DEFAULT (256 * 1024)
 #define CURL_TIMEOUT_DEFAULT 5
 #define CURL_TIMEOUT_MAX 10000
@@ -100,12 +99,17 @@ typedef struct CURLAIOCB {
    size_t end;
 } CURLAIOCB;

+typedef struct CURLSocket {
+    int fd;
+    QLIST_ENTRY(CURLSocket) next;
+} CURLSocket;
+
 typedef struct CURLState
 {
    struct BDRVCURLState *s;
    CURLAIOCB *acb[CURL_NUM_ACB];
    CURL *curl;
-    curl_socket_t sock_fd;
+    QLIST_HEAD(, CURLSocket) sockets;
    char *orig_buf;
    size_t buf_start;
    size_t buf_off;
@@ -159,11 +163,28 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
 {
    BDRVCURLState *s;
    CURLState *state = NULL;
+    CURLSocket *socket;
+
    curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state);
-    state->sock_fd = fd;
    s = state->s;

-    DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
+    QLIST_FOREACH(socket, &state->sockets, next) {
+        if (socket->fd == fd) {
+            if (action == CURL_POLL_REMOVE) {
+                QLIST_REMOVE(socket, next);
+                g_free(socket);
+            }
+            break;
+        }
+    }
+    if (!socket) {
+        socket = g_new0(CURLSocket, 1);
+        socket->fd = fd;
+        QLIST_INSERT_HEAD(&state->sockets, socket, next);
+    }
+    socket = NULL;
+
+    DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, (int)fd);
    switch (action) {
        case CURL_POLL_IN:
            aio_set_fd_handler(s->aio_context, fd, false,
@@ -208,12 +229,13 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)

    DPRINTF("CURL: Just reading %zd bytes\n", realsize);

-    if (!s || !s->orig_buf)
-        return 0;
+    if (!s || !s->orig_buf) {
+        goto read_end;
+    }

    if (s->buf_off >= s->buf_len) {
        /* buffer full, read nothing */
-        return 0;
+        goto read_end;
    }
    realsize = MIN(realsize, s->buf_len - s->buf_off);
    memcpy(s->orig_buf + s->buf_off, ptr, realsize);
@@ -226,15 +248,26 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
            continue;

        if ((s->buf_off >= acb->end)) {
+            size_t request_length = acb->nb_sectors * BDRV_SECTOR_SIZE;
+
            qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
                                acb->end - acb->start);
+
+            if (acb->end - acb->start < request_length) {
+                size_t offset = acb->end - acb->start;
+                qemu_iovec_memset(acb->qiov, offset, 0,
+                                  request_length - offset);
+            }
+
            acb->common.cb(acb->common.opaque, 0);
            qemu_aio_unref(acb);
            s->acb[i] = NULL;
        }
    }

-    return realsize;
+read_end:
+    /* curl will error out if we do not return this value */
+    return size * nmemb;
 }

 static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
@@ -242,6 +275,8 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
 {
    int i;
    size_t end = start + len;
+    size_t clamped_end = MIN(end, s->len);
+    size_t clamped_len = clamped_end - start;

    for (i=0; i<CURL_NUM_STATES; i++) {
        CURLState *state = &s->states[i];
@@ -256,12 +291,15 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
        // Does the existing buffer cover our section?
        if ((start >= state->buf_start) &&
            (start <= buf_end) &&
-            (end >= state->buf_start) &&
-            (end <= buf_end))
+            (clamped_end >= state->buf_start) &&
+            (clamped_end <= buf_end))
        {
            char *buf = state->orig_buf + (start - state->buf_start);

-            qemu_iovec_from_buf(acb->qiov, 0, buf, len);
+            qemu_iovec_from_buf(acb->qiov, 0, buf, clamped_len);
+            if (clamped_len < len) {
+                qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len);
+            }
            acb->common.cb(acb->common.opaque, 0);

            return FIND_RET_OK;
@@ -271,13 +309,13 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
        if (state->in_use &&
            (start >= state->buf_start) &&
            (start <= buf_fend) &&
-            (end >= state->buf_start) &&
-            (end <= buf_fend))
+            (clamped_end >= state->buf_start) &&
+            (clamped_end <= buf_fend))
        {
            int j;

            acb->start = start - state->buf_start;
-            acb->end = acb->start + len;
+            acb->end = acb->start + clamped_len;

            for (j=0; j<CURL_NUM_ACB; j++) {
                if (!state->acb[j]) {
@@ -347,6 +385,7 @@ static void curl_multi_check_completion(BDRVCURLState *s)
 static void curl_multi_do(void *arg)
 {
    CURLState *s = (CURLState *)arg;
+    CURLSocket *socket, *next_socket;
    int running;
    int r;

@@ -354,10 +393,13 @@ static void curl_multi_do(void *arg)
        return;
    }

-    do {
-        r = curl_multi_socket_action(s->s->multi, s->sock_fd, 0, &running);
-    } while(r == CURLM_CALL_MULTI_PERFORM);
-
+    /* Need to use _SAFE because curl_multi_socket_action() may trigger
+     * curl_sock_cb() which might modify this list */
+    QLIST_FOREACH_SAFE(socket, &s->sockets, next, next_socket) {
+        do {
+            r = curl_multi_socket_action(s->s->multi, socket->fd, 0, &running);
+        } while (r == CURLM_CALL_MULTI_PERFORM);
+    }
 }

 static void curl_multi_read(void *arg)
@@ -461,6 +503,7 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
 #endif
    }

+    QLIST_INIT(&state->sockets);
    state->s = s;

    return state;
@@ -470,6 +513,14 @@ static void curl_clean_state(CURLState *s)
 {
    if (s->s->multi)
        curl_multi_remove_handle(s->s->multi, s->curl);
+
+    while (!QLIST_EMPTY(&s->sockets)) {
+        CURLSocket *socket = QLIST_FIRST(&s->sockets);
+
+        QLIST_REMOVE(socket, next);
+        g_free(socket);
+    }
+
    s->in_use = 0;
 }

@@ -719,12 +770,12 @@ static void curl_readv_bh_cb(void *p)
    qemu_bh_delete(acb->bh);
    acb->bh = NULL;

-    size_t start = acb->sector_num * SECTOR_SIZE;
+    size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
    size_t end;

    // In case we have the requested data already (e.g. read-ahead),
    // we can just call the callback and be done.
-    switch (curl_find_buf(s, start, acb->nb_sectors * SECTOR_SIZE, acb)) {
+    switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
        case FIND_RET_OK:
            qemu_aio_unref(acb);
            // fall through
@@ -743,13 +794,13 @@ static void curl_readv_bh_cb(void *p)
    }

    acb->start = 0;
-    acb->end = (acb->nb_sectors * SECTOR_SIZE);
+    acb->end = MIN(acb->nb_sectors * BDRV_SECTOR_SIZE, s->len - start);

    state->buf_off = 0;
    g_free(state->orig_buf);
    state->buf_start = start;
-    state->buf_len = acb->end + s->readahead_size;
-    end = MIN(start + state->buf_len, s->len) - 1;
+    state->buf_len = MIN(acb->end + s->readahead_size, s->len - start);
+    end = start + state->buf_len - 1;
    state->orig_buf = g_try_malloc(state->buf_len);
    if (state->buf_len && state->orig_buf == NULL) {
        curl_clean_state(state);
@@ -760,8 +811,8 @@ static void curl_readv_bh_cb(void *p)
    state->acb[0] = acb;

    snprintf(state->range, 127, "%zd-%zd", start, end);
-    DPRINTF("CURL (AIO): Reading %d at %zd (%s)\n",
-            (acb->nb_sectors * SECTOR_SIZE), start, state->range);
+    DPRINTF("CURL (AIO): Reading %llu at %zd (%s)\n",
+            (acb->nb_sectors * BDRV_SECTOR_SIZE), start, state->range);
    curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);

    curl_multi_add_handle(s->multi, state->curl);
--- a/block/dictzip.c
+++ b/block/dictzip.c
@@ -0,0 +1,586 @@
+/*
+ * DictZip Block driver for dictzip enabled gzip files
+ *
+ * Use the "dictzip" tool from the "dictd" package to create gzip files that
+ * contain the extra DictZip headers.
+ *
+ * dictzip(1) is a compression program which creates compressed files in the
+ * gzip format (see RFC 1952). However, unlike gzip(1), dictzip(1) compresses
+ * the file in pieces and stores an index to the pieces in the gzip header.
+ * This allows random access to the file at the granularity of the compressed
+ * pieces (currently about 64kB) while maintaining good compression ratios
+ * (within 5% of the expected ratio for dictionary data).
+ * dictd(8) uses files stored in this format.
+ *
+ * For details on DictZip see http://dict.org/.
+ *
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include <zlib.h>
+
+// #define DEBUG
+
+#ifdef DEBUG
+#define dprintf(fmt, ...) do { printf("dzip: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) do { } while (0)
+#endif
+
+#define SECTOR_SIZE 512
+#define Z_STREAM_COUNT 4
+#define CACHE_COUNT 20
+
+/* magic values */
+
+#define GZ_MAGIC1     0x1f
+#define GZ_MAGIC2     0x8b
+#define DZ_MAGIC1      'R'
+#define DZ_MAGIC2      'A'
+
+#define GZ_FEXTRA     0x04      /* Optional field (random access index)    */
+#define GZ_FNAME      0x08      /* Original name                           */
+#define GZ_COMMENT    0x10      /* Zero-terminated, human-readable comment */
+#define GZ_FHCRC      0x02      /* Header CRC16                            */
+
+/* offsets */
+
+#define GZ_ID            0      /* GZ_MAGIC (16bit)                        */
+#define GZ_FLG           3      /* FLaGs (see above)                       */
+#define GZ_XLEN         10      /* eXtra LENgth (16bit)                    */
+#define GZ_SI           12      /* Subfield ID (16bit)                     */
+#define GZ_VERSION      16      /* Version for subfield format             */
+#define GZ_CHUNKSIZE    18      /* Chunk size (16bit)                      */
+#define GZ_CHUNKCNT     20      /* Number of chunks (16bit)                */
+#define GZ_RNDDATA      22      /* Random access data (16bit)              */
+
+#define GZ_99_CHUNKSIZE 18      /* Chunk size (32bit)                      */
+#define GZ_99_CHUNKCNT  22      /* Number of chunks (32bit)                */
+#define GZ_99_FILESIZE  26      /* Size of unpacked file (64bit)           */
+#define GZ_99_RNDDATA   34      /* Random access data (32bit)              */
+
+struct BDRVDictZipState;
+
+typedef struct DictZipAIOCB {
+    BlockAIOCB common;
+    struct BDRVDictZipState *s;
+    QEMUIOVector *qiov;          /* QIOV of the original request */
+    QEMUIOVector *qiov_gz;       /* QIOV of the gz subrequest */
+    QEMUBH *bh;                  /* BH for cache */
+    z_stream *zStream;           /* stream to use for decoding */
+    int zStream_id;              /* stream id of the above pointer */
+    size_t start;                /* offset into the uncompressed file */
+    size_t len;                  /* uncompressed bytes to read */
+    uint8_t *gzipped;            /* the gzipped data */
+    uint8_t *buf;                /* cached result */
+    size_t gz_len;               /* amount of gzip data */
+    size_t gz_start;             /* uncompressed starting point of gzip data */
+    uint64_t offset;             /* offset for "start" into the uncompressed chunk */
+    int chunks_len;              /* amount of uncompressed data in all gzip data */
+} DictZipAIOCB;
+
+typedef struct dict_cache {
+    size_t start;
+    size_t len;
+    uint8_t *buf;
+} DictCache;
+
+typedef struct BDRVDictZipState {
+    BlockDriverState *hd;
+    z_stream zStream[Z_STREAM_COUNT];
+    DictCache cache[CACHE_COUNT];
+    int cache_index;
+    uint8_t  stream_in_use;
+    uint64_t chunk_len;
+    uint32_t chunk_cnt;
+    uint16_t *chunks;
+    uint32_t *chunks32;
+    uint64_t *offsets;
+    int64_t file_len;
+} BDRVDictZipState;
+
+static int start_zStream(z_stream *zStream)
+{
+    zStream->zalloc    = NULL;
+    zStream->zfree     = NULL;
+    zStream->opaque    = NULL;
+    zStream->next_in   = 0;
+    zStream->avail_in  = 0;
+    zStream->next_out  = NULL;
+    zStream->avail_out = 0;
+
+    return inflateInit2( zStream, -15 );
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "dzip",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "URL to the dictzip file",
+        },
+        { /* end of list */ }
+    },
+};
+
+static int dictzip_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
+{
+    BDRVDictZipState *s = bs->opaque;
+    const char *err = "Unknown (read error?)";
+    uint8_t magic[2];
+    char buf[100];
+    uint8_t header_flags;
+    uint16_t chunk_len16;
+    uint16_t chunk_cnt16;
+    uint32_t chunk_len32;
+    uint16_t header_ver;
+    uint16_t tmp_short;
+    uint64_t offset;
+    int chunks_len;
+    int headerLength = GZ_XLEN - 1;
+    int rnd_offs;
+    int ret;
+    int i;
+    QemuOpts *opts;
+    Error *local_err = NULL;
+    const char *filename;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    filename = qemu_opt_get(opts, "filename");
+
+    if (!strncmp(filename, "dzip://", 7))
+        filename += 7;
+    else if (!strncmp(filename, "dzip:", 5))
+        filename += 5;
+
+    ret = bdrv_open(&s->hd, filename, NULL, NULL, flags | BDRV_O_PROTOCOL, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        qemu_opts_del(opts);
+        return ret;
+    }
+
+    /* initialize zlib streams */
+    for (i = 0; i < Z_STREAM_COUNT; i++) {
+        if (start_zStream( &s->zStream[i] ) != Z_OK) {
+            err = s->zStream[i].msg;
+            goto fail;
+        }
+    }
+
+    /* gzip header */
+    if (bdrv_pread(s->hd, GZ_ID, &magic, sizeof(magic)) != sizeof(magic))
+        goto fail;
+
+    if (!((magic[0] == GZ_MAGIC1) && (magic[1] == GZ_MAGIC2))) {
+        err = "No gzip file";
+        goto fail;
+    }
+
+    /* dzip header */
+    if (bdrv_pread(s->hd, GZ_FLG, &header_flags, 1) != 1)
+        goto fail;
+
+    if (!(header_flags & GZ_FEXTRA)) {
+        err = "Not a dictzip file (wrong flags)";
+        goto fail;
+    }
+
+    /* extra length */
+    if (bdrv_pread(s->hd, GZ_XLEN, &tmp_short, 2) != 2)
+        goto fail;
+
+    headerLength += le16_to_cpu(tmp_short) + 2;
+
+    /* DictZip magic */
+    if (bdrv_pread(s->hd, GZ_SI, &magic, 2) != 2)
+        goto fail;
+
+    if (magic[0] != DZ_MAGIC1 || magic[1] != DZ_MAGIC2) {
+        err = "Not a dictzip file (missing extra magic)";
+        goto fail;
+    }
+
+    /* DictZip version */
+    if (bdrv_pread(s->hd, GZ_VERSION, &header_ver, 2) != 2)
+        goto fail;
+
+    header_ver = le16_to_cpu(header_ver);
+
+    switch (header_ver) {
+        case 1: /* Normal DictZip */
+            /* number of chunks */
+            if (bdrv_pread(s->hd, GZ_CHUNKSIZE, &chunk_len16, 2) != 2)
+                goto fail;
+
+            s->chunk_len = le16_to_cpu(chunk_len16);
+
+            /* chunk count */
+            if (bdrv_pread(s->hd, GZ_CHUNKCNT, &chunk_cnt16, 2) != 2)
+                goto fail;
+
+            s->chunk_cnt = le16_to_cpu(chunk_cnt16);
+            chunks_len = sizeof(short) * s->chunk_cnt;
+            rnd_offs = GZ_RNDDATA;
+            break;
+        case 99: /* Special Alex pigz version */
+            /* number of chunks */
+            if (bdrv_pread(s->hd, GZ_99_CHUNKSIZE, &chunk_len32, 4) != 4)
+                goto fail;
+
+            dprintf("chunk len [%#x] = %d\n", GZ_99_CHUNKSIZE, chunk_len32);
+            s->chunk_len = le32_to_cpu(chunk_len32);
+
+            /* chunk count */
+            if (bdrv_pread(s->hd, GZ_99_CHUNKCNT, &s->chunk_cnt, 4) != 4)
+                goto fail;
+
+            s->chunk_cnt = le32_to_cpu(s->chunk_cnt);
+
+            dprintf("chunk len | count = %"PRId64" | %d\n", s->chunk_len, s->chunk_cnt);
+
+            /* file size */
+            if (bdrv_pread(s->hd, GZ_99_FILESIZE, &s->file_len, 8) != 8)
+                goto fail;
+
+            s->file_len = le64_to_cpu(s->file_len);
+            chunks_len = sizeof(int) * s->chunk_cnt;
+            rnd_offs = GZ_99_RNDDATA;
+            break;
+        default:
+            err = "Invalid DictZip version";
+            goto fail;
+    }
+
+    /* random access data */
+    s->chunks = g_malloc(chunks_len);
+    if (header_ver == 99)
+        s->chunks32 = (uint32_t *)s->chunks;
+
+    if (bdrv_pread(s->hd, rnd_offs, s->chunks, chunks_len) != chunks_len)
+        goto fail;
+
+    /* orig filename */
+    if (header_flags & GZ_FNAME) {
+        if (bdrv_pread(s->hd, headerLength + 1, buf, sizeof(buf)) != sizeof(buf))
+            goto fail;
+
+        buf[sizeof(buf) - 1] = '\0';
+        headerLength += strlen(buf) + 1;
+
+        if (strlen(buf) == sizeof(buf))
+            goto fail;
+
+        dprintf("filename: %s\n", buf);
+    }
+
+    /* comment field */
+    if (header_flags & GZ_COMMENT) {
+        if (bdrv_pread(s->hd, headerLength, buf, sizeof(buf)) != sizeof(buf))
+            goto fail;
+
+        buf[sizeof(buf) - 1] = '\0';
+        headerLength += strlen(buf) + 1;
+
+        if (strlen(buf) == sizeof(buf))
+            goto fail;
+
+        dprintf("comment: %s\n", buf);
+    }
+
+    if (header_flags & GZ_FHCRC)
+        headerLength += 2;
+
+    /* uncompressed file length*/
+    if (!s->file_len) {
+        uint32_t file_len;
+
+        if (bdrv_pread(s->hd, bdrv_getlength(s->hd) - 4, &file_len, 4) != 4)
+            goto fail;
+
+        s->file_len = le32_to_cpu(file_len);
+    }
+
+    /* compute offsets */
+    s->offsets = g_malloc(sizeof( *s->offsets ) * s->chunk_cnt);
+
+    for (offset = headerLength + 1, i = 0; i < s->chunk_cnt; i++) {
+        s->offsets[i] = offset;
+        switch (header_ver) {
+        case 1:
+            offset += le16_to_cpu(s->chunks[i]);
+            break;
+        case 99:
+            offset += le32_to_cpu(s->chunks32[i]);
+            break;
+        }
+
+        dprintf("chunk %#"PRIx64" - %#"PRIx64" = offset %#"PRIx64" -> %#"PRIx64"\n", i * s->chunk_len, (i+1) * s->chunk_len, s->offsets[i], offset);
+    }
+    qemu_opts_del(opts);
+
+    return 0;
+
+fail:
+    fprintf(stderr, "DictZip: Error opening file: %s\n", err);
+    bdrv_unref(s->hd);
+    if (s->chunks)
+        g_free(s->chunks);
+    qemu_opts_del(opts);
+    return -EINVAL;
+}
+
+/* This callback gets invoked when we have the result in cache already */
+static void dictzip_cache_cb(void *opaque)
+{
+    DictZipAIOCB *acb = (DictZipAIOCB *)opaque;
+
+    qemu_iovec_from_buf(acb->qiov, 0, acb->buf, acb->len);
+    acb->common.cb(acb->common.opaque, 0);
+    qemu_bh_delete(acb->bh);
+    qemu_aio_unref(acb);
+}
+
+/* This callback gets invoked by the underlying block reader when we have
+ * all compressed data. We uncompress in here. */
+static void dictzip_read_cb(void *opaque, int ret)
+{
+    DictZipAIOCB *acb = (DictZipAIOCB *)opaque;
+    struct BDRVDictZipState *s = acb->s;
+    uint8_t *buf;
+    DictCache *cache;
+    int r, i;
+
+    buf = g_malloc(acb->chunks_len);
+
+    /* try to find zlib stream for decoding */
+    do {
+        for (i = 0; i < Z_STREAM_COUNT; i++) {
+            if (!(s->stream_in_use & (1 << i))) {
+                s->stream_in_use |= (1 << i);
+                acb->zStream_id = i;
+                acb->zStream = &s->zStream[i];
+                break;
+            }
+        }
+    } while(!acb->zStream);
+
+    /* sure, we could handle more streams, but this callback should be single
+       threaded and when it's not, we really want to know! */
+    assert(i == 0);
+
+    /* uncompress the chunk */
+    acb->zStream->next_in   = acb->gzipped;
+    acb->zStream->avail_in  = acb->gz_len;
+    acb->zStream->next_out  = buf;
+    acb->zStream->avail_out = acb->chunks_len;
+
+    r = inflate( acb->zStream,  Z_PARTIAL_FLUSH );
+    if ( (r != Z_OK) && (r != Z_STREAM_END) )
+        fprintf(stderr, "Error inflating: [%d] %s\n", r, acb->zStream->msg);
+
+    if ( r == Z_STREAM_END )
+        inflateReset(acb->zStream);
+
+    dprintf("inflating [%d] left: %d | %d bytes\n", r, acb->zStream->avail_in, acb->zStream->avail_out);
+    s->stream_in_use &= ~(1 << acb->zStream_id);
+
+    /* nofity the caller */
+    qemu_iovec_from_buf(acb->qiov, 0, buf + acb->offset, acb->len);
+    acb->common.cb(acb->common.opaque, 0);
+
+    /* fill the cache */
+    cache = &s->cache[s->cache_index];
+    s->cache_index++;
+    if (s->cache_index == CACHE_COUNT)
+        s->cache_index = 0;
+
+    cache->len = 0;
+    if (cache->buf)
+        g_free(cache->buf);
+    cache->start = acb->gz_start;
+    cache->buf = buf;
+    cache->len = acb->chunks_len;
+
+    /* free occupied ressources */
+    g_free(acb->qiov_gz);
+    qemu_aio_unref(acb);
+}
+
+static const AIOCBInfo dictzip_aiocb_info = {
+    .aiocb_size         = sizeof(DictZipAIOCB),
+};
+
+/* This is where we get a request from a caller to read something */
+static BlockAIOCB *dictzip_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    BDRVDictZipState *s = bs->opaque;
+    DictZipAIOCB *acb;
+    QEMUIOVector *qiov_gz;
+    struct iovec *iov;
+    uint8_t *buf;
+    size_t  start = sector_num * SECTOR_SIZE;
+    size_t  len = nb_sectors * SECTOR_SIZE;
+    size_t  end = start + len;
+    size_t  gz_start;
+    size_t  gz_len;
+    int64_t gz_sector_num;
+    int     gz_nb_sectors;
+    int     first_chunk, last_chunk;
+    int     first_offset;
+    int     i;
+
+    acb = qemu_aio_get(&dictzip_aiocb_info, bs, cb, opaque);
+    if (!acb)
+        return NULL;
+
+    /* Search Cache */
+    for (i = 0; i < CACHE_COUNT; i++) {
+        if (!s->cache[i].len)
+            continue;
+
+        if ((start >= s->cache[i].start) &&
+            (end <= (s->cache[i].start + s->cache[i].len))) {
+            acb->buf = s->cache[i].buf + (start - s->cache[i].start);
+            acb->len = len;
+            acb->qiov = qiov;
+            acb->bh = qemu_bh_new(dictzip_cache_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        }
+    }
+
+    /* No cache, so let's decode */
+    /* We need to read these chunks */
+    first_chunk  = start / s->chunk_len;
+    first_offset = start - first_chunk * s->chunk_len;
+    last_chunk   = end / s->chunk_len;
+
+    gz_start = s->offsets[first_chunk];
+    gz_len = 0;
+    for (i = first_chunk; i <= last_chunk; i++) {
+        if (s->chunks32)
+            gz_len += le32_to_cpu(s->chunks32[i]);
+        else
+            gz_len += le16_to_cpu(s->chunks[i]);
+    }
+
+    gz_sector_num = gz_start / SECTOR_SIZE;
+    gz_nb_sectors = (gz_len / SECTOR_SIZE);
+
+    /* account for tail and heads */
+    while ((gz_start + gz_len) > ((gz_sector_num + gz_nb_sectors) * SECTOR_SIZE))
+        gz_nb_sectors++;
+
+    /* Allocate qiov, iov and buf in one chunk so we only need to free qiov */
+    qiov_gz = g_malloc0(sizeof(QEMUIOVector) + sizeof(struct iovec) +
+                           (gz_nb_sectors * SECTOR_SIZE));
+    iov = (struct iovec *)(((char *)qiov_gz) + sizeof(QEMUIOVector));
+    buf = ((uint8_t *)iov) + sizeof(struct iovec *);
+
+    /* Kick off the read by the backing file, so we can start decompressing */
+    iov->iov_base = (void *)buf;
+    iov->iov_len = gz_nb_sectors * 512;
+    qemu_iovec_init_external(qiov_gz, iov, 1);
+
+    dprintf("read %zd - %zd => %zd - %zd\n", start, end, gz_start, gz_start + gz_len);
+
+    acb->s = s;
+    acb->qiov = qiov;
+    acb->qiov_gz = qiov_gz;
+    acb->start = start;
+    acb->len = len;
+    acb->gzipped = buf + (gz_start % SECTOR_SIZE);
+    acb->gz_len = gz_len;
+    acb->gz_start = first_chunk * s->chunk_len;
+    acb->offset = first_offset;
+    acb->chunks_len = (last_chunk - first_chunk + 1) * s->chunk_len;
+
+    return bdrv_aio_readv(s->hd, gz_sector_num, qiov_gz, gz_nb_sectors,
+                          dictzip_read_cb, acb);
+}
+
+static void dictzip_close(BlockDriverState *bs)
+{
+    BDRVDictZipState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < CACHE_COUNT; i++) {
+        if (!s->cache[i].len)
+            continue;
+
+        g_free(s->cache[i].buf);
+    }
+
+    for (i = 0; i < Z_STREAM_COUNT; i++) {
+        inflateEnd(&s->zStream[i]);
+    }
+
+    if (s->chunks)
+        g_free(s->chunks);
+
+    if (s->offsets)
+        g_free(s->offsets);
+
+    dprintf("Close\n");
+}
+
+static int64_t dictzip_getlength(BlockDriverState *bs)
+{
+    BDRVDictZipState *s = bs->opaque;
+    dprintf("getlength -> %ld\n", s->file_len);
+    return s->file_len;
+}
+
+static BlockDriver bdrv_dictzip = {
+    .format_name     = "dzip",
+    .protocol_name   = "dzip",
+
+    .instance_size   = sizeof(BDRVDictZipState),
+    .bdrv_file_open  = dictzip_open,
+    .bdrv_close      = dictzip_close,
+    .bdrv_getlength  = dictzip_getlength,
+
+    .bdrv_aio_readv  = dictzip_aio_readv,
+};
+
+static void dictzip_block_init(void)
+{
+    bdrv_register(&bdrv_dictzip);
+}
+
+block_init(dictzip_block_init);
--- a/block/io.c
+++ b/block/io.c
@@ -1481,6 +1481,30 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
                             BDRV_REQ_ZERO_WRITE | flags);
 }

+/*
+ * Flush ALL BDSes regardless of if they are reachable via a BlkBackend or not.
+ */
+int bdrv_flush_all(void)
+{
+    BlockDriverState *bs = NULL;
+    int result = 0;
+
+    while ((bs = bdrv_next(bs))) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+        int ret;
+
+        aio_context_acquire(aio_context);
+        ret = bdrv_flush(bs);
+        if (ret < 0 && !result) {
+            result = ret;
+        }
+        aio_context_release(aio_context);
+    }
+
+    return result;
+}
+
+
 typedef struct BdrvCoGetBlockStatusData {
    BlockDriverState *bs;
    BlockDriverState *base;
@@ -2487,7 +2511,7 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque)
    rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
 }

-int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
+static int __bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
                                 int nb_sectors)
 {
    BdrvTrackedRequest req;
@@ -2569,6 +2593,26 @@ out:
    return ret;
 }

+int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
+                                 int nb_sectors)
+{
+    int num, ret;
+    int limit = BDRV_REQUEST_MAX_SECTORS;
+    int remaining = nb_sectors;
+    int64_t sector_offset = sector_num;
+
+    do {
+        num = remaining > limit ? limit : remaining;
+        ret = __bdrv_co_discard(bs, sector_offset, num);
+        if (ret < 0)
+            break;
+        remaining -= num;
+        sector_offset += num;
+    } while (remaining > 0);
+
+    return ret;
+}
+
 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
 {
    Coroutine *co;
@@ -2595,19 +2639,6 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
    return rwco.ret;
 }

-typedef struct {
-    CoroutineIOCompletion *co;
-    QEMUBH *bh;
-} BdrvIoctlCompletionData;
-
-static void bdrv_ioctl_bh_cb(void *opaque)
-{
-    BdrvIoctlCompletionData *data = opaque;
-
-    bdrv_co_io_em_complete(data->co, -ENOTSUP);
-    qemu_bh_delete(data->bh);
-}
-
 static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
 {
    BlockDriver *drv = bs->drv;
@@ -2625,11 +2656,8 @@ static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)

    acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
    if (!acb) {
-        BdrvIoctlCompletionData *data = g_new(BdrvIoctlCompletionData, 1);
-        data->bh = aio_bh_new(bdrv_get_aio_context(bs),
-                                bdrv_ioctl_bh_cb, data);
-        data->co = &co;
-        qemu_bh_schedule(data->bh);
+        co.ret = -ENOTSUP;
+        goto out;
    }
    qemu_coroutine_yield();
 out:
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -36,7 +36,7 @@
 #include "block/block_int.h"
 #include "block/scsi.h"
 #include "qemu/iov.h"
-#include "sysemu/sysemu.h"
+#include "qemu/uuid.h"
 #include "qmp-commands.h"
 #include "qapi/qmp/qstring.h"
 #include "crypto/secret.h"
@@ -425,12 +425,14 @@ static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
                                    int nb_sectors)
 {
+    int64_t cluster_num, nb_clusters;
    if (iscsilun->allocationmap == NULL) {
        return;
    }
-    bitmap_set(iscsilun->allocationmap,
-               sector_num / iscsilun->cluster_sectors,
-               DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
+    cluster_num = sector_num / iscsilun->cluster_sectors;
+    nb_clusters = DIV_ROUND_UP(sector_num + nb_sectors,
+                               iscsilun->cluster_sectors) - cluster_num;
+    bitmap_set(iscsilun->allocationmap, cluster_num, nb_clusters);
 }

 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
@@ -542,7 +544,7 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
    struct scsi_get_lba_status *lbas = NULL;
    struct scsi_lba_status_descriptor *lbasd = NULL;
    struct IscsiTask iTask;
-    int64_t ret;
+    int64_t ret, max_sector;

    iscsi_co_init_iscsitask(iscsilun, &iTask);

@@ -561,6 +563,7 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
        goto out;
    }

+    max_sector = iscsilun->num_blocks - sector_num;
 retry:
    if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
                                  sector_qemu2lun(sector_num, iscsilun),
@@ -605,7 +608,7 @@ retry:
        goto out;
    }

-    *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
+    *pnum = MIN(sector_lun2qemu(lbasd->num_blocks, iscsilun), max_sector);

    if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
        lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
@@ -766,6 +769,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
    acb->ioh->driver_status = 0;
    acb->ioh->host_status   = 0;
    acb->ioh->resid         = 0;
+    acb->ioh->status        = status;

 #define SG_ERR_DRIVER_SENSE    0x08

@@ -775,8 +779,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
        acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;

        acb->ioh->sb_len_wr = acb->task->datain.size - 2;
-        ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
-             acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
+        ss = MIN(acb->ioh->mx_sb_len, acb->ioh->sb_len_wr);
        memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
    }

@@ -837,6 +840,13 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
        return &acb->common;
    }

+    if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
+        error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
+                     acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
+        qemu_aio_unref(acb);
+        return NULL;
+    }
+
    acb->task = malloc(sizeof(struct scsi_task));
    if (acb->task == NULL) {
        error_report("iSCSI: Failed to allocate task for scsi command. %s",
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -506,6 +506,7 @@ static void coroutine_fn mirror_run(void *opaque)
    MirrorBlockJob *s = opaque;
    MirrorExitData *data;
    BlockDriverState *bs = s->common.bs;
+    bool need_drain = true;
    int64_t sector_num, end, length;
    uint64_t last_pause_ns;
    BlockDriverInfo bdi;
@@ -667,11 +668,26 @@ static void coroutine_fn mirror_run(void *opaque)
             * source has dirty data to copy!
             *
             * Note that I/O can be submitted by the guest while
-             * mirror_populate runs.
+             * mirror_populate runs, so pause it now.  Before deciding
+             * whether to switch to target check one last time if I/O has
+             * come in the meanwhile, and if not flush the data to disk.
             */
            trace_mirror_before_drain(s, cnt);
-            bdrv_co_drain(bs);
+
+            bdrv_drained_begin(bs);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
+            if (cnt > 0) {
+                bdrv_drained_end(bs);
+                continue;
+            }
+
+            /* The two disks are in sync.  Exit and report successful
+             * completion.
+             */
+            assert(QLIST_EMPTY(&bs->tracked_requests));
+            s->common.cancelled = false;
+            need_drain = false;
+            break;
        }

        ret = 0;
@@ -684,13 +700,6 @@ static void coroutine_fn mirror_run(void *opaque)
        } else if (!should_complete) {
            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
-        } else if (cnt == 0) {
-            /* The two disks are in sync.  Exit and report successful
-             * completion.
-             */
-            assert(QLIST_EMPTY(&bs->tracked_requests));
-            s->common.cancelled = false;
-            break;
        }
        last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }
@@ -702,6 +711,7 @@ immediate_exit:
         * the target is a copy of the source.
         */
        assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
+        assert(need_drain);
        mirror_drain(s);
    }

@@ -716,9 +726,10 @@ immediate_exit:

    data = g_malloc(sizeof(*data));
    data->ret = ret;
-    /* Before we switch to target in mirror_exit, make sure data doesn't
-     * change. */
-    bdrv_drained_begin(s->common.bs);
+
+    if (need_drain) {
+        bdrv_drained_begin(s->common.bs);
+    }
    if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) {
        /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the
         * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -269,10 +269,6 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
    return -reply.error;
 }

-/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
- * remain aligned to 4K. */
-#define NBD_MAX_SECTORS 2040
-
 int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
                        int nb_sectors, QEMUIOVector *qiov)
 {
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -20,7 +20,7 @@
 typedef struct NbdClientSession {
    QIOChannelSocket *sioc; /* The master data channel */
    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
-    uint32_t nbdflags;
+    uint16_t nbdflags;
    off_t size;

    CoMutex send_mutex;
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -1,7 +1,7 @@
 /*
 * QEMU Block driver for native access to files on NFS shares
 *
- * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
+ * Copyright (c) 2014-2016 Peter Lieven <pl@kamp.de>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -47,6 +47,7 @@ typedef struct NFSClient {
    bool has_zero_init;
    AioContext *aio_context;
    blkcnt_t st_blocks;
+    bool cache_used;
 } NFSClient;

 typedef struct NFSRPC {
@@ -278,7 +279,7 @@ static void nfs_file_close(BlockDriverState *bs)
 }

 static int64_t nfs_client_open(NFSClient *client, const char *filename,
-                               int flags, Error **errp)
+                               int flags, Error **errp, int open_flags)
 {
    int ret = -EINVAL, i;
    struct stat st;
@@ -330,12 +331,18 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
            nfs_set_tcp_syncnt(client->context, val);
 #ifdef LIBNFS_FEATURE_READAHEAD
        } else if (!strcmp(qp->p[i].name, "readahead")) {
+            if (open_flags & BDRV_O_NOCACHE) {
+                error_setg(errp, "Cannot enable NFS readahead "
+                                 "if cache.direct = on");
+                goto fail;
+            }
            if (val > QEMU_NFS_MAX_READAHEAD_SIZE) {
                error_report("NFS Warning: Truncating NFS readahead"
                             " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
                val = QEMU_NFS_MAX_READAHEAD_SIZE;
            }
            nfs_set_readahead(client->context, val);
+            client->cache_used = true;
 #endif
 #ifdef LIBNFS_FEATURE_DEBUG
        } else if (!strcmp(qp->p[i].name, "debug")) {
@@ -418,7 +425,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
    }
    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
-                          errp);
+                          errp, bs->open_flags);
    if (ret < 0) {
        goto out;
    }
@@ -454,7 +461,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                          BDRV_SECTOR_SIZE);

-    ret = nfs_client_open(client, url, O_CREAT, errp);
+    ret = nfs_client_open(client, url, O_CREAT, errp, 0);
    if (ret < 0) {
        goto out;
    }
@@ -516,6 +523,11 @@ static int nfs_reopen_prepare(BDRVReopenState *state,
        return -EACCES;
    }

+    if ((state->flags & BDRV_O_NOCACHE) && client->cache_used) {
+        error_setg(errp, "Cannot disable cache if libnfs readahead is enabled");
+        return -EINVAL;
+    }
+
    /* Update cache for read-only reopens */
    if (!(state->flags & BDRV_O_RDWR)) {
        ret = nfs_fstat(client->context, client->fh, &st);
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -512,11 +512,12 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
    memset(tmp, 0, sizeof(tmp));
    memcpy(tmp, &header, sizeof(header));

-    ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
+    ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE, 0);
    if (ret < 0) {
        goto exit;
    }
-    ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0);
+    ret = blk_write_zeroes(file, BDRV_SECTOR_SIZE,
+                           (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
    if (ret < 0) {
        goto exit;
    }
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -853,14 +853,14 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    /* write all the data */
-    ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header));
+    ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header), 0);
    if (ret != sizeof(header)) {
        goto exit;
    }

    if (backing_file) {
        ret = blk_pwrite(qcow_blk, sizeof(header),
-            backing_file, backing_filename_len);
+                         backing_file, backing_filename_len, 0);
        if (ret != backing_filename_len) {
            goto exit;
        }
@@ -869,8 +869,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    tmp = g_malloc0(BDRV_SECTOR_SIZE);
    for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
        BDRV_SECTOR_SIZE); i++) {
-        ret = blk_pwrite(qcow_blk, header_size +
-            BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
+        ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i,
+                         tmp, BDRV_SECTOR_SIZE, 0);
        if (ret != BDRV_SECTOR_SIZE) {
            g_free(tmp);
            goto exit;
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -226,7 +226,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
    return 0;
 }

-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
+int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c)
 {
    BDRVQcow2State *s = bs->opaque;
    int result = 0;
@@ -242,8 +242,15 @@ int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
        }
    }

+    return result;
+}
+
+int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
+{
+    int result = qcow2_cache_write(bs, c);
+
    if (result == 0) {
-        ret = bdrv_flush(bs->file->bs);
+        int ret = bdrv_flush(bs->file->bs);
        if (ret < 0) {
            result = ret;
        }
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -64,7 +64,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
        }
    }

-    if (new_l1_size > INT_MAX / sizeof(uint64_t)) {
+    QEMU_BUILD_BUG_ON(QCOW_MAX_L1_SIZE > INT_MAX);
+    if (new_l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
        return -EFBIG;
    }

--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2207,7 +2207,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
            cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
    }

-    ret = blk_pwrite(blk, 0, header, cluster_size);
+    ret = blk_pwrite(blk, 0, header, cluster_size, 0);
    g_free(header);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not write qcow2 header");
@@ -2217,7 +2217,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
    /* Write a refcount table with one refcount block */
    refcount_table = g_malloc0(2 * cluster_size);
    refcount_table[0] = cpu_to_be64(2 * cluster_size);
-    ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size);
+    ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
    g_free(refcount_table);

    if (ret < 0) {
@@ -2774,14 +2774,14 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
    int ret;

    qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_cache_flush(bs, s->l2_table_cache);
+    ret = qcow2_cache_write(bs, s->l2_table_cache);
    if (ret < 0) {
        qemu_co_mutex_unlock(&s->lock);
        return ret;
    }

    if (qcow2_need_accurate_refcounts(s)) {
-        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+        ret = qcow2_cache_write(bs, s->refcount_block_cache);
        if (ret < 0) {
            qemu_co_mutex_unlock(&s->lock);
            return ret;
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -583,6 +583,7 @@ int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
 void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
     void *table);
 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
+int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c);
 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
    Qcow2Cache *dependency);
 void qcow2_cache_depends_on_flush(Qcow2Cache *c);
--- a/block/qed.c
+++ b/block/qed.c
@@ -601,18 +601,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    }

    qed_header_cpu_to_le(&header, &le_header);
-    ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header));
+    ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header), 0);
    if (ret < 0) {
        goto out;
    }
    ret = blk_pwrite(blk, sizeof(le_header), backing_file,
-                     header.backing_filename_size);
+                     header.backing_filename_size, 0);
    if (ret < 0) {
        goto out;
    }

    l1_table = g_malloc0(l1_size);
-    ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size);
+    ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size, 0);
    if (ret < 0) {
        goto out;
    }
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -641,9 +641,9 @@ static int rbd_aio_flush_wrapper(rbd_image_t image,
 }

 static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
-                                 int64_t sector_num,
+                                 int64_t off,
                                 QEMUIOVector *qiov,
-                                 int nb_sectors,
+                                 int64_t size,
                                 BlockCompletionFunc *cb,
                                 void *opaque,
                                 RBDAIOCmd cmd)
@@ -651,7 +651,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    RBDAIOCB *acb;
    RADOSCB *rcb = NULL;
    rbd_completion_t c;
-    int64_t off, size;
    char *buf;
    int r;

@@ -660,6 +659,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
    acb->cmd = cmd;
    acb->qiov = qiov;
+    assert(!qiov || qiov->size == size);
    if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
        acb->bounce = NULL;
    } else {
@@ -679,9 +679,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,

    buf = acb->bounce;

-    off = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
-
    rcb = g_new(RADOSCB, 1);
    rcb->acb = acb;
    rcb->buf = buf;
@@ -731,7 +728,8 @@ static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
                                      BlockCompletionFunc *cb,
                                      void *opaque)
 {
-    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
+    return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
+                         (int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
                         RBD_AIO_READ);
 }

@@ -742,7 +740,8 @@ static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
                                       BlockCompletionFunc *cb,
                                       void *opaque)
 {
-    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
+    return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
+                         (int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
                         RBD_AIO_WRITE);
 }

@@ -931,7 +930,8 @@ static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
                                        BlockCompletionFunc *cb,
                                        void *opaque)
 {
-    return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
+    return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, NULL,
+                         nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
                         RBD_AIO_DISCARD);
 }
 #endif
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1678,7 +1678,7 @@ static int sd_prealloc(const char *filename, Error **errp)
        if (ret < 0) {
            goto out;
        }
-        ret = blk_pwrite(blk, idx * buf_size, buf, buf_size);
+        ret = blk_pwrite(blk, idx * buf_size, buf, buf_size, 0);
        if (ret < 0) {
            goto out;
        }
--- a/block/tar.c
+++ b/block/tar.c
@@ -0,0 +1,379 @@
+/*
+ * Tar block driver
+ *
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "block/block_int.h"
+
+// #define DEBUG
+
+#ifdef DEBUG
+#define dprintf(fmt, ...) do { printf("tar: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) do { } while (0)
+#endif
+
+#define SECTOR_SIZE      512
+
+#define POSIX_TAR_MAGIC  "ustar"
+#define OFFS_LENGTH      0x7c
+#define OFFS_TYPE        0x9c
+#define OFFS_MAGIC       0x101
+
+#define OFFS_S_SP        0x182
+#define OFFS_S_EXT       0x1e2
+#define OFFS_S_LENGTH    0x1e3
+#define OFFS_SX_EXT      0x1f8
+
+typedef struct SparseCache {
+    uint64_t start;
+    uint64_t end;
+} SparseCache;
+
+typedef struct BDRVTarState {
+    BlockDriverState *hd;
+    size_t file_sec;
+    uint64_t file_len;
+    SparseCache *sparse;
+    int sparse_num;
+    uint64_t last_end;
+    char longfile[2048];
+} BDRVTarState;
+
+static int str_ends(char *str, const char *end)
+{
+    int end_len = strlen(end);
+    int str_len = strlen(str);
+
+    if (str_len < end_len)
+        return 0;
+
+    return !strncmp(str + str_len - end_len, end, end_len);
+}
+
+static int is_target_file(BlockDriverState *bs, char *filename,
+                          char *header)
+{
+    int retval = 0;
+
+    if (str_ends(filename, ".raw"))
+        retval = 1;
+
+    if (str_ends(filename, ".qcow"))
+        retval = 1;
+
+    if (str_ends(filename, ".qcow2"))
+        retval = 1;
+
+    if (str_ends(filename, ".vmdk"))
+        retval = 1;
+
+    if (retval &&
+        (header[OFFS_TYPE] != '0') &&
+        (header[OFFS_TYPE] != 'S')) {
+        retval = 0;
+    }
+
+    dprintf("does filename %s match? %s\n", filename, retval ? "yes" : "no");
+
+    /* make sure we're not using this name again */
+    filename[0] = '\0';
+
+    return retval;
+}
+
+static uint64_t tar2u64(char *ptr)
+{
+    uint64_t retval;
+    char oldend = ptr[12];
+
+    ptr[12] = '\0';
+    if (*ptr & 0x80) {
+        /* XXX we only support files up to 64 bit length */
+        retval = be64_to_cpu(*(uint64_t *)(ptr+4));
+        dprintf("Convert %lx -> %#lx\n", *(uint64_t*)(ptr+4), retval);
+    } else {
+        retval = strtol(ptr, NULL, 8);
+        dprintf("Convert %s -> %#lx\n", ptr, retval);
+    }
+
+    ptr[12] = oldend;
+
+    return retval;
+}
+
+static void tar_sparse(BDRVTarState *s, uint64_t offs, uint64_t len)
+{
+    SparseCache *sparse;
+
+    if (!len)
+        return;
+    if (!(offs - s->last_end)) {
+        s->last_end += len;
+        return;
+    }
+    if (s->last_end > offs)
+        return;
+
+    dprintf("Last chunk until %lx new chunk at %lx\n", s->last_end, offs);
+
+    s->sparse = g_realloc(s->sparse, (s->sparse_num + 1) * sizeof(SparseCache));
+    sparse = &s->sparse[s->sparse_num];
+    sparse->start = s->last_end;
+    sparse->end = offs;
+    s->last_end = offs + len;
+    s->sparse_num++;
+    dprintf("Sparse at %lx end=%lx\n", sparse->start,
+                                       sparse->end);
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "tar",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "URL to the tar file",
+        },
+        { /* end of list */ }
+    },
+};
+
+static int tar_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
+{
+    BDRVTarState *s = bs->opaque;
+    char header[SECTOR_SIZE];
+    char *real_file = header;
+    char *magic;
+    size_t header_offs = 0;
+    int ret;
+    QemuOpts *opts;
+    Error *local_err = NULL;
+    const char *filename;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    filename = qemu_opt_get(opts, "filename");
+
+    if (!strncmp(filename, "tar://", 6))
+        filename += 6;
+    else if (!strncmp(filename, "tar:", 4))
+        filename += 4;
+
+    ret = bdrv_open(&s->hd, filename, NULL, NULL, flags | BDRV_O_PROTOCOL, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        qemu_opts_del(opts);
+        return ret;
+    }
+
+    /* Search the file for an image */
+
+    do {
+        /* tar header */
+        if (bdrv_pread(s->hd, header_offs, header, SECTOR_SIZE) != SECTOR_SIZE)
+            goto fail;
+
+        if ((header_offs > 1) && !header[0]) {
+            fprintf(stderr, "Tar: No image file found in archive\n");
+            goto fail;
+        }
+
+        magic = &header[OFFS_MAGIC];
+        if (strncmp(magic, POSIX_TAR_MAGIC, 5)) {
+            fprintf(stderr, "Tar: Invalid magic: %s\n", magic);
+            goto fail;
+        }
+
+        dprintf("file type: %c\n", header[OFFS_TYPE]);
+
+        /* file length*/
+        s->file_len = (tar2u64(&header[OFFS_LENGTH]) + (SECTOR_SIZE - 1)) &
+                      ~(SECTOR_SIZE - 1);
+        s->file_sec = (header_offs / SECTOR_SIZE) + 1;
+
+        header_offs += s->file_len + SECTOR_SIZE;
+
+        if (header[OFFS_TYPE] == 'L') {
+            bdrv_pread(s->hd, header_offs - s->file_len, s->longfile,
+                       sizeof(s->longfile));
+            s->longfile[sizeof(s->longfile)-1] = '\0';
+            real_file = header;
+        } else if (s->longfile[0]) {
+            real_file = s->longfile;
+        } else {
+            real_file = header;
+        }
+    } while(!is_target_file(bs, real_file, header));
+
+    /* We found an image! */
+
+    if (header[OFFS_TYPE] == 'S') {
+        uint8_t isextended;
+        int i;
+
+        for (i = OFFS_S_SP; i < (OFFS_S_SP + (4 * 24)); i += 24)
+            tar_sparse(s, tar2u64(&header[i]), tar2u64(&header[i+12]));
+
+        s->file_len = tar2u64(&header[OFFS_S_LENGTH]);
+        isextended = header[OFFS_S_EXT];
+
+        while (isextended) {
+            if (bdrv_pread(s->hd, s->file_sec * SECTOR_SIZE, header,
+                           SECTOR_SIZE) != SECTOR_SIZE)
+                goto fail;
+
+            for (i = 0; i < (21 * 24); i += 24)
+                tar_sparse(s, tar2u64(&header[i]), tar2u64(&header[i+12]));
+            isextended = header[OFFS_SX_EXT];
+            s->file_sec++;
+        }
+        tar_sparse(s, s->file_len, 1);
+    }
+    qemu_opts_del(opts);
+
+    return 0;
+
+fail:
+    fprintf(stderr, "Tar: Error opening file\n");
+    bdrv_unref(s->hd);
+    qemu_opts_del(opts);
+    return -EINVAL;
+}
+
+typedef struct TarAIOCB {
+    BlockAIOCB common;
+    QEMUBH *bh;
+} TarAIOCB;
+
+/* This callback gets invoked when we have pure sparseness */
+static void tar_sparse_cb(void *opaque)
+{
+    TarAIOCB *acb = (TarAIOCB *)opaque;
+
+    acb->common.cb(acb->common.opaque, 0);
+    qemu_bh_delete(acb->bh);
+    qemu_aio_unref(acb);
+}
+
+static AIOCBInfo tar_aiocb_info = {
+    .aiocb_size         = sizeof(TarAIOCB),
+};
+
+/* This is where we get a request from a caller to read something */
+static BlockAIOCB *tar_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    BDRVTarState *s = bs->opaque;
+    SparseCache *sparse;
+    int64_t sec_file = sector_num + s->file_sec;
+    int64_t start = sector_num * SECTOR_SIZE;
+    int64_t end = start + (nb_sectors * SECTOR_SIZE);
+    int i;
+    TarAIOCB *acb;
+
+    for (i = 0; i < s->sparse_num; i++) {
+        sparse = &s->sparse[i];
+        if (sparse->start > end) {
+            /* We expect the cache to be start increasing */
+            break;
+        } else if ((sparse->start < start) && (sparse->end <= start)) {
+            /* sparse before our offset */
+            sec_file -= (sparse->end - sparse->start) / SECTOR_SIZE;
+        } else if ((sparse->start <= start) && (sparse->end >= end)) {
+            /* all our sectors are sparse */
+            char *buf = g_malloc0(nb_sectors * SECTOR_SIZE);
+
+            acb = qemu_aio_get(&tar_aiocb_info, bs, cb, opaque);
+            qemu_iovec_from_buf(qiov, 0, buf, nb_sectors * SECTOR_SIZE);
+            g_free(buf);
+            acb->bh = qemu_bh_new(tar_sparse_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        } else if (((sparse->start >= start) && (sparse->start < end)) ||
+                   ((sparse->end >= start) && (sparse->end < end))) {
+            /* we're semi-sparse (worst case) */
+            /* let's go synchronous and read all sectors individually */
+            char *buf = g_malloc(nb_sectors * SECTOR_SIZE);
+            uint64_t offs;
+
+            for (offs = 0; offs < (nb_sectors * SECTOR_SIZE);
+                 offs += SECTOR_SIZE) {
+                bdrv_pread(bs, (sector_num * SECTOR_SIZE) + offs,
+                           buf + offs, SECTOR_SIZE);
+            }
+
+            qemu_iovec_from_buf(qiov, 0, buf, nb_sectors * SECTOR_SIZE);
+            acb = qemu_aio_get(&tar_aiocb_info, bs, cb, opaque);
+            acb->bh = qemu_bh_new(tar_sparse_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        }
+    }
+
+    return bdrv_aio_readv(s->hd, sec_file, qiov, nb_sectors,
+                          cb, opaque);
+}
+
+static void tar_close(BlockDriverState *bs)
+{
+    dprintf("Close\n");
+}
+
+static int64_t tar_getlength(BlockDriverState *bs)
+{
+    BDRVTarState *s = bs->opaque;
+    dprintf("getlength -> %ld\n", s->file_len);
+    return s->file_len;
+}
+
+static BlockDriver bdrv_tar = {
+    .format_name     = "tar",
+    .protocol_name   = "tar",
+
+    .instance_size   = sizeof(BDRVTarState),
+    .bdrv_file_open  = tar_open,
+    .bdrv_close      = tar_close,
+    .bdrv_getlength  = tar_getlength,
+
+    .bdrv_aio_readv  = tar_aio_readv,
+};
+
+static void tar_block_init(void)
+{
+    bdrv_register(&bdrv_tar);
+}
+
+block_init(tar_block_init);
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -57,6 +57,7 @@
 #include "migration/migration.h"
 #include "qemu/coroutine.h"
 #include "qemu/cutils.h"
+#include "qemu/uuid.h"

 #if defined(CONFIG_UUID)
 #include <uuid/uuid.h>
@@ -808,7 +809,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    vdi_header_print(&header);
 #endif
    vdi_header_to_le(&header);
-    ret = blk_pwrite(blk, offset, &header, sizeof(header));
+    ret = blk_pwrite(blk, offset, &header, sizeof(header), 0);
    if (ret < 0) {
        error_setg(errp, "Error writing header to %s", filename);
        goto exit;
@@ -829,7 +830,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
                bmap[i] = VDI_UNALLOCATED;
            }
        }
-        ret = blk_pwrite(blk, offset, bmap, bmap_size);
+        ret = blk_pwrite(blk, offset, bmap, bmap_size, 0);
        if (ret < 0) {
            error_setg(errp, "Error writing bmap to %s", filename);
            goto exit;
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1856,13 +1856,14 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
                              &creator_items, NULL);
    signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
-    ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+    ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature),
+                     0);
    if (ret < 0) {
        goto delete_and_exit;
    }
    if (creator) {
        ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
-                         creator, creator_items * sizeof(gunichar2));
+                         creator, creator_items * sizeof(gunichar2), 0);
        if (ret < 0) {
            goto delete_and_exit;
        }
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1728,12 +1728,12 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    header.check_bytes[3] = 0xa;

    /* write all the data */
-    ret = blk_pwrite(blk, 0, &magic, sizeof(magic));
+    ret = blk_pwrite(blk, 0, &magic, sizeof(magic), 0);
    if (ret < 0) {
        error_setg(errp, QERR_IO_ERROR);
        goto exit;
    }
-    ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header));
+    ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header), 0);
    if (ret < 0) {
        error_setg(errp, QERR_IO_ERROR);
        goto exit;
@@ -1753,7 +1753,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
        gd_buf[i] = cpu_to_le32(tmp);
    }
    ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
-                     gd_buf, gd_buf_size);
+                     gd_buf, gd_buf_size, 0);
    if (ret < 0) {
        error_setg(errp, QERR_IO_ERROR);
        goto exit;
@@ -1765,7 +1765,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
        gd_buf[i] = cpu_to_le32(tmp);
    }
    ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
-                     gd_buf, gd_buf_size);
+                     gd_buf, gd_buf_size, 0);
    if (ret < 0) {
        error_setg(errp, QERR_IO_ERROR);
        goto exit;
@@ -1881,13 +1881,16 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false)) {
        flags |= BLOCK_FLAG_COMPAT6;
    }
+    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_SCSI, false)) {
+        flags |= BLOCK_FLAG_SCSI;
+    }
    fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false)) {
        zeroed_grain = true;
    }

    if (!adapter_type) {
-        adapter_type = g_strdup("ide");
+        adapter_type = g_strdup(flags & BLOCK_FLAG_SCSI ? "lsilogic" : "ide");
    } else if (strcmp(adapter_type, "ide") &&
               strcmp(adapter_type, "buslogic") &&
               strcmp(adapter_type, "lsilogic") &&
@@ -2028,7 +2031,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(new_blk, true);

-    ret = blk_pwrite(new_blk, desc_offset, desc, desc_len);
+    ret = blk_pwrite(new_blk, desc_offset, desc, desc_len, 0);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not write description");
        goto exit;
@@ -2310,6 +2313,12 @@ static QemuOptsList vmdk_create_opts = {
            .help = "Enable efficient zero writes "
                    "using the zeroed-grain GTE feature"
        },
+        {
+            .name = BLOCK_OPT_SCSI,
+            .type = QEMU_OPT_BOOL,
+            .help = "SCSI image",
+            .def_value_str = "off"
+        },
        { /* end of list */ }
    }
 };
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -783,13 +783,13 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
    block_size = 0x200000;
    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);

-    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
+    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
    if (ret < 0) {
        goto fail;
    }

    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
-    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
+    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
    if (ret < 0) {
        goto fail;
    }
@@ -799,7 +799,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,

    memset(buf, 0xFF, 512);
    for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
-        ret = blk_pwrite(blk, offset, buf, 512);
+        ret = blk_pwrite(blk, offset, buf, 512, 0);
        if (ret < 0) {
            goto fail;
        }
@@ -826,7 +826,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
    /* Write the header */
    offset = 512;

-    ret = blk_pwrite(blk, offset, buf, 1024);
+    ret = blk_pwrite(blk, offset, buf, 1024, 0);
    if (ret < 0) {
        goto fail;
    }
@@ -848,7 +848,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
        return ret;
    }

-    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
+    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
    if (ret < 0) {
        return ret;
    }
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -28,6 +28,10 @@ typedef struct NBDServerData {

 static NBDServerData *nbd_server;

+static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
+{
+    nbd_client_put(client);
+}

 static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
                           gpointer opaque)
@@ -46,7 +50,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,

    nbd_client_new(NULL, cioc,
                   nbd_server->tlscreds, NULL,
-                   nbd_client_put);
+                   nbd_blockdev_client_closed);
    object_unref(OBJECT(cioc));
    return TRUE;
 }
--- a/blockdev.c
+++ b/blockdev.c
@@ -481,6 +481,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
    const char *id;
    BlockdevDetectZeroesOptions detect_zeroes =
        BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
+    const char *blk_id;
    const char *throttling_group = NULL;

    /* Check common options by copying from bs_opts to opts, all other options
@@ -510,6 +511,8 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,

    writethrough = !qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true);

+    blk_id = qemu_opts_id(opts);
+
    qdict_extract_subqdict(bs_opts, &interval_dict, "stats-intervals.");
    qdict_array_split(interval_dict, &interval_list);

@@ -579,7 +582,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,

        if (throttle_enabled(&cfg)) {
            if (!throttling_group) {
-                throttling_group = blk_name(blk);
+                throttling_group = blk_id;
            }
            blk_rs->throttle_group = g_strdup(throttling_group);
            blk_rs->throttle_state = throttle_group_incref(throttling_group);
@@ -614,7 +617,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
        /* disk I/O throttling */
        if (throttle_enabled(&cfg)) {
            if (!throttling_group) {
-                throttling_group = blk_name(blk);
+                throttling_group = blk_id;
            }
            bdrv_io_limits_enable(bs, throttling_group);
            bdrv_set_io_limits(bs, &cfg);
@@ -636,7 +639,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
    blk_set_enable_write_cache(blk, !writethrough);
    blk_set_on_error(blk, on_read_error, on_write_error);

-    if (!monitor_add_blk(blk, qemu_opts_id(opts), errp)) {
+    if (!monitor_add_blk(blk, blk_id, errp)) {
        blk_unref(blk);
        blk = NULL;
        goto err_no_bs_opts;
@@ -2904,6 +2907,7 @@ void qmp_block_resize(bool has_device, const char *device,
 {
    Error *local_err = NULL;
    BlockDriverState *bs;
+    BlockBackend *cb_blk = NULL;
    AioContext *aio_context;
    int ret;

@@ -2915,6 +2919,10 @@ void qmp_block_resize(bool has_device, const char *device,
        return;
    }

+    if (has_device) {
+        cb_blk = blk_by_name(device);
+    }
+
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);

@@ -2939,6 +2947,9 @@ void qmp_block_resize(bool has_device, const char *device,
    ret = bdrv_truncate(bs, size);
    switch (ret) {
    case 0:
+        if (cb_blk) {
+            blk_legacy_resize_cb(cb_blk);
+        }
        break;
    case -ENOMEDIUM:
        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
--- a/50
+++ b/50
@@ -1537,7 +1537,7 @@ fi

 if test "$pie" = ""; then
  case "$cpu-$targetos" in
-    i386-Linux|x86_64-Linux|x32-Linux|i386-OpenBSD|x86_64-OpenBSD)
+    i386-Linux|x86_64-Linux|x32-Linux|ppc*-Linux|i386-OpenBSD|x86_64-OpenBSD)
      ;;
    *)
      pie="no"
@@ -1577,7 +1577,7 @@ EOF

  if compile_prog "-Werror -fno-pie" "-nopie"; then
    CFLAGS_NOPIE="-fno-pie"
-    LDFLAGS_NOPIE="-nopie"
+    LDFLAGS_NOPIE="-no-pie"
  fi
 fi

@@ -1868,13 +1868,10 @@ fi
 ##########################################
 # libseccomp check

+libseccomp_minver="2.2.0"
 if test "$seccomp" != "no" ; then
    case "$cpu" in
-    i386|x86_64)
-        libseccomp_minver="2.1.0"
-        ;;
-    mips)
-        libseccomp_minver="2.2.0"
+    i386|x86_64|mips)
        ;;
    arm|aarch64)
        libseccomp_minver="2.2.3"
@@ -2342,6 +2339,7 @@ if test "$nettle" != "no"; then
        nettle="yes"

        cat > $TMPC << EOF
+#include <stddef.h>
 #include <nettle/pbkdf2.h>
 int main(void) {
     pbkdf2_hmac_sha256(8, NULL, 1000, 8, NULL, 8, NULL);
@@ -2392,20 +2390,25 @@ fi

 if test "$vte" != "no"; then
    if test "$gtkabi" = "3.0"; then
-      vtepackage="vte-2.90"
-      vteversion="0.32.0"
+      vteminversion="0.32.0"
+      if $pkg_config --exists "vte-2.91"; then
+        vtepackage="vte-2.91"
+      else
+        vtepackage="vte-2.90"
+      fi
    else
      vtepackage="vte"
-      vteversion="0.24.0"
+      vteminversion="0.24.0"
    fi
-    if $pkg_config --exists "$vtepackage >= $vteversion"; then
+    if $pkg_config --exists "$vtepackage >= $vteminversion"; then
        vte_cflags=`$pkg_config --cflags $vtepackage`
        vte_libs=`$pkg_config --libs $vtepackage`
+        vteversion=`$pkg_config --modversion $vtepackage`
        libs_softmmu="$vte_libs $libs_softmmu"
        vte="yes"
    elif test "$vte" = "yes"; then
        if test "$gtkabi" = "3.0"; then
-            feature_not_found "vte" "Install libvte-2.90 devel"
+            feature_not_found "vte" "Install libvte-2.90/2.91 devel"
        else
            feature_not_found "vte" "Install libvte devel"
        fi
@@ -2967,7 +2970,7 @@ int main(void) {
 }
 EOF

-if ! compile_prog "-Werror $CFLAGS" "$LIBS" ; then
+if ! compile_prog "$CFLAGS" "$LIBS" ; then
    error_exit "sizeof(size_t) doesn't match GLIB_SIZEOF_SIZE_T."\
               "You probably need to set PKG_CONFIG_LIBDIR"\
 	       "to point to the right pkg-config files for your"\
@@ -4718,6 +4721,12 @@ EOF
  fi
 fi

+echo_version() {
+    if test "$1" = "yes" ; then
+        echo "($2)"
+    fi
+}
+
 # prepend pixman and ftd flags after all config tests are done
 QEMU_CFLAGS="$pixman_cflags $fdt_cflags $QEMU_CFLAGS"
 libs_softmmu="$pixman_libs $libs_softmmu"
@@ -4770,19 +4779,15 @@ echo "pixman            $pixman"
 echo "SDL support       $sdl"
 echo "GTK support       $gtk"
 echo "GTK GL support    $gtk_gl"
+echo "VTE support       $vte `echo_version $vte $vteversion`"
 echo "GNUTLS support    $gnutls"
 echo "GNUTLS hash       $gnutls_hash"
 echo "GNUTLS rnd        $gnutls_rnd"
 echo "libgcrypt         $gcrypt"
 echo "libgcrypt kdf     $gcrypt_kdf"
-if test "$nettle" = "yes"; then
-    echo "nettle            $nettle ($nettle_version)"
-else
-    echo "nettle            $nettle"
-fi
+echo "nettle            $nettle `echo_version $nettle $nettle_version`"
 echo "nettle kdf        $nettle_kdf"
 echo "libtasn1          $tasn1"
-echo "VTE support       $vte"
 echo "curses support    $curses"
 echo "virgl support     $virglrenderer"
 echo "curl support      $curl"
@@ -4831,11 +4836,7 @@ echo "Trace backends    $trace_backends"
 if have_backend "simple"; then
 echo "Trace output file $trace_file-<pid>"
 fi
-if test "$spice" = "yes"; then
-echo "spice support     $spice ($spice_protocol_version/$spice_server_version)"
-else
-echo "spice support     $spice"
-fi
+echo "spice support     $spice `echo_version $spice $spice_protocol_version/$spice_server_version`"
 echo "rbd support       $rbd"
 echo "xfsctl support    $xfs"
 echo "smartcard support $smartcard"
@@ -5512,7 +5513,6 @@ else
  echo "AUTOCONF_HOST := "                             >> $config_host_mak
 fi
 echo "LDFLAGS=$LDFLAGS" >> $config_host_mak
-echo "LDFLAGS_NOPIE=$LDFLAGS_NOPIE" >> $config_host_mak
 echo "LIBS+=$LIBS" >> $config_host_mak
 echo "LIBS_TOOLS+=$libs_tools" >> $config_host_mak
 echo "EXESUF=$EXESUF" >> $config_host_mak
--- a/contrib/ivshmem-server/ivshmem-server.h
+++ b/contrib/ivshmem-server/ivshmem-server.h
@@ -15,7 +15,7 @@
 * unix socket. For each client, the server will create some eventfd
 * (see EVENTFD(2)), one per vector. These fd are transmitted to all
 * clients using the SCM_RIGHTS cmsg message. Therefore, each client is
- * able to send a notification to another client without beeing
+ * able to send a notification to another client without being
 * "profixied" by the server.
 *
 * We use this mechanism to send interruptions between guests.
--- a/cpus.c
+++ b/cpus.c
@@ -743,7 +743,7 @@ static int do_vm_stop(RunState state)
    }

    bdrv_drain_all();
-    ret = blk_flush_all();
+    ret = bdrv_flush_all();

    return ret;
 }
@@ -1442,7 +1442,7 @@ int vm_stop_force_state(RunState state)
        bdrv_drain_all();
        /* Make sure to return an error if the flush in a previous vm_stop()
         * failed. */
-        return blk_flush_all();
+        return bdrv_flush_all();
    }
 }

@@ -1689,6 +1689,18 @@ exit:
    fclose(f);
 }

+bool spec_ctrl_is_inconsistent(void)
+{
+#if defined(TARGET_I386)
+    X86CPU *x86_cpu = X86_CPU(current_cpu);
+    CPUX86State *env = x86_cpu != NULL ? &x86_cpu->env : NULL;
+    if (env && !(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_SPEC_CTRL) &&
+	    env->spec_ctrl)
+        return true;
+#endif
+    return false;
+}
+
 void qmp_inject_nmi(Error **errp)
 {
 #if defined(TARGET_I386)
--- a/crypto/cipher-gcrypt.c
+++ b/crypto/cipher-gcrypt.c
@@ -192,6 +192,12 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
    }

    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+        if (ctx->blocksize != XTS_BLOCK_SIZE) {
+            error_setg(errp,
+                       "Cipher block size %zu must equal XTS block size %d",
+                       ctx->blocksize, XTS_BLOCK_SIZE);
+            goto error;
+        }
        ctx->iv = g_new0(uint8_t, ctx->blocksize);
    }

--- a/crypto/cipher-nettle.c
+++ b/crypto/cipher-nettle.c
@@ -361,6 +361,13 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        goto error;
    }

+    if (mode == QCRYPTO_CIPHER_MODE_XTS &&
+        ctx->blocksize != XTS_BLOCK_SIZE) {
+        error_setg(errp, "Cipher block size %zu must equal XTS block size %d",
+                   ctx->blocksize, XTS_BLOCK_SIZE);
+        goto error;
+    }
+
    ctx->iv = g_new0(uint8_t, ctx->blocksize);
    cipher->opaque = ctx;

@@ -456,11 +463,6 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
        break;

    case QCRYPTO_CIPHER_MODE_XTS:
-        if (ctx->blocksize != XTS_BLOCK_SIZE) {
-            error_setg(errp, "Block size must be %d not %zu",
-                       XTS_BLOCK_SIZE, ctx->blocksize);
-            return -1;
-        }
        xts_decrypt(ctx->ctx, ctx->ctx_tweak,
                    ctx->alg_encrypt_wrapper, ctx->alg_decrypt_wrapper,
                    ctx->iv, len, out, in);
--- a/device_tree.c
+++ b/device_tree.c
@@ -90,7 +90,7 @@ void *load_device_tree(const char *filename_path, int *sizep)
    /* First allocate space in qemu for device tree */
    fdt = g_malloc0(dt_size);

-    dt_file_load_size = load_image(filename_path, fdt);
+    dt_file_load_size = load_image_size(filename_path, fdt, dt_size);
    if (dt_file_load_size < 0) {
        error_report("Unable to open device tree file '%s'",
                     filename_path);
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -70,16 +70,17 @@ void qemu_sglist_destroy(QEMUSGList *qsg)

 typedef struct {
    BlockAIOCB common;
-    BlockBackend *blk;
+    AioContext *ctx;
    BlockAIOCB *acb;
    QEMUSGList *sg;
-    uint64_t sector_num;
+    uint64_t offset;
    DMADirection dir;
    int sg_cur_index;
    dma_addr_t sg_cur_byte;
    QEMUIOVector iov;
    QEMUBH *bh;
    DMAIOFunc *io_func;
+    void *io_func_opaque;
 } DMAAIOCB;

 static void dma_blk_cb(void *opaque, int ret);
@@ -130,7 +131,7 @@ static void dma_blk_cb(void *opaque, int ret)
    trace_dma_blk_cb(dbs, ret);

    dbs->acb = NULL;
-    dbs->sector_num += dbs->iov.size / 512;
+    dbs->offset += dbs->iov.size;

    if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
        dma_complete(dbs, ret);
@@ -154,8 +155,7 @@ static void dma_blk_cb(void *opaque, int ret)

    if (dbs->iov.size == 0) {
        trace_dma_map_wait(dbs);
-        dbs->bh = aio_bh_new(blk_get_aio_context(dbs->blk),
-                             reschedule_dma, dbs);
+        dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
        cpu_register_map_client(dbs->bh);
        return;
    }
@@ -164,8 +164,8 @@ static void dma_blk_cb(void *opaque, int ret)
        qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
    }

-    dbs->acb = dbs->io_func(dbs->blk, dbs->sector_num, &dbs->iov,
-                            dbs->iov.size / 512, dma_blk_cb, dbs);
+    dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
+                            dma_blk_cb, dbs, dbs->io_func_opaque);
    assert(dbs->acb);
 }

@@ -191,23 +191,25 @@ static const AIOCBInfo dma_aiocb_info = {
    .cancel_async       = dma_aio_cancel,
 };

-BlockAIOCB *dma_blk_io(
-    BlockBackend *blk, QEMUSGList *sg, uint64_t sector_num,
-    DMAIOFunc *io_func, BlockCompletionFunc *cb,
+BlockAIOCB *dma_blk_io(AioContext *ctx,
+    QEMUSGList *sg, uint64_t offset,
+    DMAIOFunc *io_func, void *io_func_opaque,
+    BlockCompletionFunc *cb,
    void *opaque, DMADirection dir)
 {
-    DMAAIOCB *dbs = blk_aio_get(&dma_aiocb_info, blk, cb, opaque);
+    DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);

-    trace_dma_blk_io(dbs, blk, sector_num, (dir == DMA_DIRECTION_TO_DEVICE));
+    trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));

    dbs->acb = NULL;
-    dbs->blk = blk;
    dbs->sg = sg;
-    dbs->sector_num = sector_num;
+    dbs->ctx = ctx;
+    dbs->offset = offset;
    dbs->sg_cur_index = 0;
    dbs->sg_cur_byte = 0;
    dbs->dir = dir;
    dbs->io_func = io_func;
+    dbs->io_func_opaque = io_func_opaque;
    dbs->bh = NULL;
    qemu_iovec_init(&dbs->iov, sg->nsg);
    dma_blk_cb(dbs, 0);
@@ -215,19 +217,39 @@ BlockAIOCB *dma_blk_io(
 }


+static
+BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
+                                 BlockCompletionFunc *cb, void *cb_opaque,
+                                 void *opaque)
+{
+    BlockBackend *blk = opaque;
+    return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
+}
+
 BlockAIOCB *dma_blk_read(BlockBackend *blk,
-                         QEMUSGList *sg, uint64_t sector,
+                         QEMUSGList *sg, uint64_t offset,
                         void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_blk_io(blk, sg, sector, blk_aio_readv, cb, opaque,
+    return dma_blk_io(blk_get_aio_context(blk),
+                      sg, offset, dma_blk_read_io_func, blk, cb, opaque,
                      DMA_DIRECTION_FROM_DEVICE);
 }

+static
+BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
+                                  BlockCompletionFunc *cb, void *cb_opaque,
+                                  void *opaque)
+{
+    BlockBackend *blk = opaque;
+    return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
+}
+
 BlockAIOCB *dma_blk_write(BlockBackend *blk,
-                          QEMUSGList *sg, uint64_t sector,
+                          QEMUSGList *sg, uint64_t offset,
                          void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_blk_io(blk, sg, sector, blk_aio_writev, cb, opaque,
+    return dma_blk_io(blk_get_aio_context(blk),
+                      sg, offset, dma_blk_write_io_func, blk, cb, opaque,
                      DMA_DIRECTION_TO_DEVICE);
 }

--- a/docs/specs/rocker.txt
+++ b/docs/specs/rocker.txt
@@ -303,7 +303,7 @@ Endianness
 ----------

 Device registers are hard-coded to little-endian (LE).  The driver should
-convert to/from host endianess to LE for device register accesses.
+convert to/from host endianness to LE for device register accesses.

 Descriptors are LE.  Descriptor buffer TLVs will have LE type and length
 fields, but the value field can either be LE or network-byte-order, depending
--- a/docs/throttle.txt
+++ b/docs/throttle.txt
@@ -10,7 +10,7 @@ Introduction
 ------------
 QEMU includes a throttling module that can be used to set limits to
 I/O operations. The code itself is generic and independent of the I/O
-units, but it is currenly used to limit the number of bytes per second
+units, but it is currently used to limit the number of bytes per second
 and operations per second (IOPS) when performing disk I/O.

 This document explains how to use the throttling code in QEMU, and how
--- a/exec.c
+++ b/exec.c
@@ -36,6 +36,7 @@
 #include "qemu/timer.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
+#include "qemu/log.h"
 #include "exec/memory.h"
 #include "sysemu/dma.h"
 #include "exec/address-spaces.h"
@@ -178,6 +179,12 @@ struct CPUAddressSpace {
    MemoryListener tcg_as_listener;
 };

+struct DirtyBitmapSnapshot {
+    ram_addr_t start;
+    ram_addr_t end;
+    unsigned long dirty[];
+};
+
 #endif

 #if !defined(CONFIG_USER_ONLY)
@@ -1030,6 +1037,75 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
    return dirty;
 }

+DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
+     (ram_addr_t start, ram_addr_t length, unsigned client)
+{
+    DirtyMemoryBlocks *blocks;
+    unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL);
+    ram_addr_t first = QEMU_ALIGN_DOWN(start, align);
+    ram_addr_t last  = QEMU_ALIGN_UP(start + length, align);
+    DirtyBitmapSnapshot *snap;
+    unsigned long page, end, dest;
+
+    snap = g_malloc0(sizeof(*snap) +
+                     ((last - first) >> (TARGET_PAGE_BITS + 3)));
+    snap->start = first;
+    snap->end   = last;
+
+    page = first >> TARGET_PAGE_BITS;
+    end  = last  >> TARGET_PAGE_BITS;
+    dest = 0;
+
+    rcu_read_lock();
+
+    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+
+    while (page < end) {
+        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
+
+        assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
+        assert(QEMU_IS_ALIGNED(num,    (1 << BITS_PER_LEVEL)));
+        offset >>= BITS_PER_LEVEL;
+
+        bitmap_copy_and_clear_atomic(snap->dirty + dest,
+                                     blocks->blocks[idx] + offset,
+                                     num);
+        page += num;
+        dest += num >> BITS_PER_LEVEL;
+    }
+
+    rcu_read_unlock();
+
+    if (tcg_enabled()) {
+        tlb_reset_dirty_range_all(start, length);
+    }
+
+    return snap;
+}
+
+bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
+                                            ram_addr_t start,
+                                            ram_addr_t length)
+{
+    unsigned long page, end;
+
+    assert(start >= snap->start);
+    assert(start + length <= snap->end);
+
+    end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS;
+    page = (start - snap->start) >> TARGET_PAGE_BITS;
+
+    while (page < end) {
+        if (test_bit(page, snap->dirty)) {
+            return true;
+        }
+        page++;
+    }
+    return false;
+}
+
 /* Called from RCU critical section */
 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
                                       MemoryRegionSection *section,
@@ -1242,11 +1318,13 @@ static void *file_ram_alloc(RAMBlock *block,
    int fd = -1;
    int64_t page_size;

+#ifndef TARGET_PPC
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
        error_setg(errp,
                   "host lacks kvm mmu notifiers, -mem-path unsupported");
        return NULL;
    }
+#endif

    for (;;) {
        fd = open(path, O_RDWR);
@@ -1296,7 +1374,7 @@ static void *file_ram_alloc(RAMBlock *block,
    }

    page_size = qemu_fd_getpagesize(fd);
-    block->mr->align = page_size;
+    block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);

    if (memory < page_size) {
        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
@@ -1317,7 +1395,8 @@ static void *file_ram_alloc(RAMBlock *block,
        perror("ftruncate");
    }

-    area = qemu_ram_mmap(fd, memory, page_size, block->flags & RAM_SHARED);
+    area = qemu_ram_mmap(fd, memory, block->mr->align,
+                         block->flags & RAM_SHARED);
    if (area == MAP_FAILED) {
        error_setg_errno(errp, errno,
                         "unable to map backing store for guest RAM");
@@ -1891,10 +1970,10 @@ void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
         * In that case just map until the end of the page.
         */
        if (block->offset == 0) {
-            return xen_map_cache(addr, 0, 0);
+            return xen_map_cache(addr, 0, 0, false);
        }

-        block->host = xen_map_cache(block->offset, block->max_length, 1);
+        block->host = xen_map_cache(block->offset, block->max_length, 1, false);
    }
    return ramblock_ptr(block, addr - block->offset);
 }
@@ -1905,7 +1984,7 @@ void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
 * Called within RCU critical section.
 */
 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
-                                 hwaddr *size)
+                                 hwaddr *size, bool lock)
 {
    RAMBlock *block = ram_block;
    ram_addr_t offset_inside_block;
@@ -1925,10 +2004,10 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
         * In that case just map the requested area.
         */
        if (block->offset == 0) {
-            return xen_map_cache(addr, *size, 1);
+            return xen_map_cache(addr, *size, lock, lock);
        }

-        block->host = xen_map_cache(block->offset, block->max_length, 1);
+        block->host = xen_map_cache(block->offset, block->max_length, 1, lock);
    }

    return ramblock_ptr(block, offset_inside_block);
@@ -2576,6 +2655,33 @@ static bool prepare_mmio_access(MemoryRegion *mr)
    return release_lock;
 }

+/**
+ * address_space_access_allowed
+ * @mr: #MemoryRegion to be accessed
+ * @attrs: memory transaction attributes
+ * @addr: address within that memory region
+ * @len: the number of bytes to access
+ *
+ * Check if a memory transaction is allowed.
+ *
+ * Returns: true if transaction is allowed, false if denied.
+ */
+static bool address_space_access_allowed(MemoryRegion *mr, MemTxAttrs attrs,
+                                         hwaddr addr, hwaddr len)
+{
+    if (likely(!attrs.memory)) {
+        return true;
+    }
+    if (memory_region_is_ram(mr)) {
+        return true;
+    }
+    qemu_log_mask(LOG_GUEST_ERROR,
+                  "Invalid access to non-RAM device at "
+                  "addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", "
+                  "region '%s'\n", addr, len, memory_region_name(mr));
+    return false;
+}
+
 /* Called within RCU critical section.  */
 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
                                                MemTxAttrs attrs,
@@ -2589,7 +2695,10 @@ static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
    bool release_lock = false;

    for (;;) {
-        if (!memory_access_is_direct(mr, true)) {
+        if (!address_space_access_allowed(mr, attrs, addr1, l)) {
+            result |= MEMTX_ACCESS_ERROR;
+            /* Keep going. */
+        } else if (!memory_access_is_direct(mr, true)) {
            release_lock |= prepare_mmio_access(mr);
            l = memory_access_size(mr, l, addr1);
            /* XXX: could force current_cpu to NULL to avoid
@@ -2625,7 +2734,7 @@ static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
        } else {
            addr1 += memory_region_get_ram_addr(mr);
            /* RAM case */
-            ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
+            ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
            memcpy(ptr, buf, l);
            invalidate_and_set_dirty(mr, addr1, l);
        }
@@ -2662,8 +2771,12 @@ MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
        rcu_read_lock();
        l = len;
        mr = address_space_translate(as, addr, &addr1, &l, true);
-        result = address_space_write_continue(as, addr, attrs, buf, len,
-                                              addr1, l, mr);
+        if (!address_space_access_allowed(mr, attrs, addr, len)) {
+            result = MEMTX_ACCESS_ERROR;
+        } else {
+            result = address_space_write_continue(as, addr, attrs, buf, len,
+                                                  addr1, l, mr);
+        }
        rcu_read_unlock();
    }

@@ -2682,7 +2795,10 @@ MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
    bool release_lock = false;

    for (;;) {
-        if (!memory_access_is_direct(mr, false)) {
+        if (!address_space_access_allowed(mr, attrs, addr1, l)) {
+            result |= MEMTX_ACCESS_ERROR;
+            /* Keep going. */
+        } else if (!memory_access_is_direct(mr, false)) {
            /* I/O case */
            release_lock |= prepare_mmio_access(mr);
            l = memory_access_size(mr, l, addr1);
@@ -2716,8 +2832,9 @@ MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
            }
        } else {
            /* RAM case */
-            ptr = qemu_get_ram_ptr(mr->ram_block,
-                                   memory_region_get_ram_addr(mr) + addr1);
+            ptr = qemu_ram_ptr_length(mr->ram_block,
+                                      memory_region_get_ram_addr(mr) + addr1,
+                                      &l, false);
            memcpy(buf, ptr, l);
        }

@@ -2753,8 +2870,12 @@ MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
        rcu_read_lock();
        l = len;
        mr = address_space_translate(as, addr, &addr1, &l, false);
-        result = address_space_read_continue(as, addr, attrs, buf, len,
-                                             addr1, l, mr);
+        if (!address_space_access_allowed(mr, attrs, addr, len)) {
+            result = MEMTX_ACCESS_ERROR;
+        } else {
+            result = address_space_read_continue(as, addr, attrs, buf, len,
+                                                 addr1, l, mr);
+        }
        rcu_read_unlock();
    }

@@ -2974,6 +3095,7 @@ void *address_space_map(AddressSpace *as,
    if (!memory_access_is_direct(mr, is_write)) {
        if (atomic_xchg(&bounce.in_use, true)) {
            rcu_read_unlock();
+            *plen = 0;
            return NULL;
        }
        /* Avoid unbounded allocations */
@@ -3014,7 +3136,7 @@ void *address_space_map(AddressSpace *as,

    memory_region_ref(mr);
    *plen = done;
-    ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
+    ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen, true);
    rcu_read_unlock();

    return ptr;
--- a/fsdev/9p-iov-marshal.c
+++ b/fsdev/9p-iov-marshal.c
@@ -127,7 +127,7 @@ ssize_t v9fs_iov_vunmarshal(struct iovec *out_sg, int out_num, size_t offset,
                str->data = g_malloc(str->size + 1);
                copied = v9fs_unpack(str->data, out_sg, out_num, offset,
                                     str->size);
-                if (copied > 0) {
+                if (copied >= 0) {
                    str->data[str->size] = 0;
                } else {
                    v9fs_string_free(str);
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -99,6 +99,7 @@ struct FileOperations
 {
    int (*parse_opts)(QemuOpts *, struct FsDriverEntry *);
    int (*init)(struct FsContext *);
+    void (*cleanup)(struct FsContext *);
    int (*lstat)(FsContext *, V9fsPath *, struct stat *);
    ssize_t (*readlink)(FsContext *, V9fsPath *, char *, size_t);
    int (*chmod)(FsContext *, V9fsPath *, FsCred *);
--- a/fsdev/virtfs-proxy-helper.c
+++ b/fsdev/virtfs-proxy-helper.c
@@ -9,6 +9,13 @@
 * the COPYING file in the top-level directory.
 */

+/* work around a broken sys/capability.h */
+#if defined(__i386__)
+typedef unsigned long long __u64;
+#endif
+#if defined(__powerpc64__)
+#include <asm/types.h>
+#endif
 #include "qemu/osdep.h"
 #include <sys/resource.h>
 #include <getopt.h>
@@ -26,6 +33,7 @@
 #include "qemu/xattr.h"
 #include "9p-iov-marshal.h"
 #include "hw/9pfs/9p-proxy.h"
+#include "hw/9pfs/9p-util.h"
 #include "fsdev/9p-iov-marshal.h"

 #define PROGNAME "virtfs-proxy-helper"
@@ -349,6 +357,28 @@ static void resetugid(int suid, int sgid)
    }
 }

+/*
+ * Open regular file or directory. Attempts to open any special file are
+ * rejected.
+ *
+ * returns file descriptor or -1 on error
+ */
+static int open_regular(const char *pathname, int flags, mode_t mode)
+{
+    int fd;
+
+    fd = open(pathname, flags, mode);
+    if (fd < 0) {
+        return fd;
+    }
+
+    if (close_if_special_file(fd) < 0) {
+        return -1;
+    }
+
+    return fd;
+}
+
 /*
 * send response in two parts
 * 1) ProxyHeader
@@ -693,7 +723,7 @@ static int do_create(struct iovec *iovec)
    if (ret < 0) {
        goto unmarshal_err_out;
    }
-    ret = open(path.data, flags, mode);
+    ret = open_regular(path.data, flags, mode);
    if (ret < 0) {
        ret = -errno;
    }
@@ -718,7 +748,7 @@ static int do_open(struct iovec *iovec)
    if (ret < 0) {
        goto err_out;
    }
-    ret = open(path.data, flags);
+    ret = open_regular(path.data, flags, 0);
    if (ret < 0) {
        ret = -errno;
    }
--- a/hw/9pfs/9p-handle.c
+++ b/hw/9pfs/9p-handle.c
@@ -112,7 +112,7 @@ static int handle_close(FsContext *ctx, V9fsFidOpenState *fs)

 static int handle_closedir(FsContext *ctx, V9fsFidOpenState *fs)
 {
-    return closedir(fs->dir);
+    return closedir(fs->dir.stream);
 }

 static int handle_open(FsContext *ctx, V9fsPath *fs_path,
@@ -132,8 +132,8 @@ static int handle_opendir(FsContext *ctx,
    if (ret < 0) {
        return -1;
    }
-    fs->dir = fdopendir(ret);
-    if (!fs->dir) {
+    fs->dir.stream = fdopendir(ret);
+    if (!fs->dir.stream) {
        return -1;
    }
    return 0;
@@ -141,24 +141,24 @@ static int handle_opendir(FsContext *ctx,

 static void handle_rewinddir(FsContext *ctx, V9fsFidOpenState *fs)
 {
-    rewinddir(fs->dir);
+    rewinddir(fs->dir.stream);
 }

 static off_t handle_telldir(FsContext *ctx, V9fsFidOpenState *fs)
 {
-    return telldir(fs->dir);
+    return telldir(fs->dir.stream);
 }

 static int handle_readdir_r(FsContext *ctx, V9fsFidOpenState *fs,
                            struct dirent *entry,
                            struct dirent **result)
 {
-    return readdir_r(fs->dir, entry, result);
+    return readdir_r(fs->dir.stream, entry, result);
 }

 static void handle_seekdir(FsContext *ctx, V9fsFidOpenState *fs, off_t off)
 {
-    seekdir(fs->dir, off);
+    seekdir(fs->dir.stream, off);
 }

 static ssize_t handle_preadv(FsContext *ctx, V9fsFidOpenState *fs,
@@ -262,7 +262,7 @@ static int handle_fstat(FsContext *fs_ctx, int fid_type,
    int fd;

    if (fid_type == P9_FID_DIR) {
-        fd = dirfd(fs->dir);
+        fd = dirfd(fs->dir.stream);
    } else {
        fd = fs->fd;
    }
@@ -409,7 +409,7 @@ static int handle_fsync(FsContext *ctx, int fid_type,
    int fd;

    if (fid_type == P9_FID_DIR) {
-        fd = dirfd(fs->dir);
+        fd = dirfd(fs->dir.stream);
    } else {
        fd = fs->fd;
    }
@@ -651,6 +651,14 @@ out:
    return ret;
 }

+static void handle_cleanup(FsContext *ctx)
+{
+    struct handle_data *data = ctx->private;
+
+    close(data->mountfd);
+    g_free(data);
+}
+
 static int handle_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
 {
    const char *sec_model = qemu_opt_get(opts, "security_model");
@@ -673,6 +681,7 @@ static int handle_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
 FileOperations handle_ops = {
    .parse_opts   = handle_parse_opts,
    .init         = handle_init,
+    .cleanup      = handle_cleanup,
    .lstat        = handle_lstat,
    .readlink     = handle_readlink,
    .close        = handle_close,
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
--- a/hw/9pfs/9p-local.h
+++ b/hw/9pfs/9p-local.h
@@ -0,0 +1,20 @@
+/*
+ * 9p local backend utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_9P_LOCAL_H
+#define QEMU_9P_LOCAL_H
+
+int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags,
+                        mode_t mode);
+int local_opendir_nofollow(FsContext *fs_ctx, const char *path);
+
+#endif
--- a/hw/9pfs/9p-posix-acl.c
+++ b/hw/9pfs/9p-posix-acl.c
@@ -25,13 +25,7 @@
 static ssize_t mp_pacl_getxattr(FsContext *ctx, const char *path,
                                const char *name, void *value, size_t size)
 {
-    char *buffer;
-    ssize_t ret;
-
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, MAP_ACL_ACCESS, value, size);
-    g_free(buffer);
-    return ret;
+    return local_getxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size);
 }

 static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
@@ -56,23 +50,16 @@ static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
 static int mp_pacl_setxattr(FsContext *ctx, const char *path, const char *name,
                            void *value, size_t size, int flags)
 {
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, MAP_ACL_ACCESS, value, size, flags);
-    g_free(buffer);
-    return ret;
+    return local_setxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size,
+                                   flags);
 }

 static int mp_pacl_removexattr(FsContext *ctx,
                               const char *path, const char *name)
 {
    int ret;
-    char *buffer;

-    buffer = rpath(ctx, path);
-    ret  = lremovexattr(buffer, MAP_ACL_ACCESS);
+    ret = local_removexattr_nofollow(ctx, path, MAP_ACL_ACCESS);
    if (ret == -1 && errno == ENODATA) {
        /*
         * We don't get ENODATA error when trying to remove a
@@ -82,20 +69,13 @@ static int mp_pacl_removexattr(FsContext *ctx,
        errno = 0;
        ret = 0;
    }
-    g_free(buffer);
    return ret;
 }

 static ssize_t mp_dacl_getxattr(FsContext *ctx, const char *path,
                                const char *name, void *value, size_t size)
 {
-    char *buffer;
-    ssize_t ret;
-
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, MAP_ACL_DEFAULT, value, size);
-    g_free(buffer);
-    return ret;
+    return local_getxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size);
 }

 static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
@@ -120,23 +100,16 @@ static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
 static int mp_dacl_setxattr(FsContext *ctx, const char *path, const char *name,
                            void *value, size_t size, int flags)
 {
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, MAP_ACL_DEFAULT, value, size, flags);
-    g_free(buffer);
-    return ret;
+    return local_setxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size,
+                                   flags);
 }

 static int mp_dacl_removexattr(FsContext *ctx,
                               const char *path, const char *name)
 {
    int ret;
-    char *buffer;

-    buffer = rpath(ctx, path);
-    ret  = lremovexattr(buffer, MAP_ACL_DEFAULT);
+    ret = local_removexattr_nofollow(ctx, path, MAP_ACL_DEFAULT);
    if (ret == -1 && errno == ENODATA) {
        /*
         * We don't get ENODATA error when trying to remove a
@@ -146,7 +119,6 @@ static int mp_dacl_removexattr(FsContext *ctx,
        errno = 0;
        ret = 0;
    }
-    g_free(buffer);
    return ret;
 }

--- a/hw/9pfs/9p-proxy.c
+++ b/hw/9pfs/9p-proxy.c
@@ -633,7 +633,7 @@ static int proxy_close(FsContext *ctx, V9fsFidOpenState *fs)

 static int proxy_closedir(FsContext *ctx, V9fsFidOpenState *fs)
 {
-    return closedir(fs->dir);
+    return closedir(fs->dir.stream);
 }

 static int proxy_open(FsContext *ctx, V9fsPath *fs_path,
@@ -652,14 +652,14 @@ static int proxy_opendir(FsContext *ctx,
 {
    int serrno, fd;

-    fs->dir = NULL;
+    fs->dir.stream = NULL;
    fd = v9fs_request(ctx->private, T_OPEN, NULL, "sd", fs_path, O_DIRECTORY);
    if (fd < 0) {
        errno = -fd;
        return -1;
    }
-    fs->dir = fdopendir(fd);
-    if (!fs->dir) {
+    fs->dir.stream = fdopendir(fd);
+    if (!fs->dir.stream) {
        serrno = errno;
        close(fd);
        errno = serrno;
@@ -670,24 +670,24 @@ static int proxy_opendir(FsContext *ctx,

 static void proxy_rewinddir(FsContext *ctx, V9fsFidOpenState *fs)
 {
-    rewinddir(fs->dir);
+    rewinddir(fs->dir.stream);
 }

 static off_t proxy_telldir(FsContext *ctx, V9fsFidOpenState *fs)
 {
-    return telldir(fs->dir);
+    return telldir(fs->dir.stream);
 }

 static int proxy_readdir_r(FsContext *ctx, V9fsFidOpenState *fs,
                           struct dirent *entry,
                           struct dirent **result)
 {
-    return readdir_r(fs->dir, entry, result);
+    return readdir_r(fs->dir.stream, entry, result);
 }

 static void proxy_seekdir(FsContext *ctx, V9fsFidOpenState *fs, off_t off)
 {
-    seekdir(fs->dir, off);
+    seekdir(fs->dir.stream, off);
 }

 static ssize_t proxy_preadv(FsContext *ctx, V9fsFidOpenState *fs,
@@ -791,7 +791,7 @@ static int proxy_fstat(FsContext *fs_ctx, int fid_type,
    int fd;

    if (fid_type == P9_FID_DIR) {
-        fd = dirfd(fs->dir);
+        fd = dirfd(fs->dir.stream);
    } else {
        fd = fs->fd;
    }
@@ -936,7 +936,7 @@ static int proxy_fsync(FsContext *ctx, int fid_type,
    int fd;

    if (fid_type == P9_FID_DIR) {
-        fd = dirfd(fs->dir);
+        fd = dirfd(fs->dir.stream);
    } else {
        fd = fs->fd;
    }
@@ -1033,13 +1033,10 @@ static int proxy_name_to_path(FsContext *ctx, V9fsPath *dir_path,
                              const char *name, V9fsPath *target)
 {
    if (dir_path) {
-        v9fs_string_sprintf((V9fsString *)target, "%s/%s",
-                            dir_path->data, name);
+        v9fs_path_sprintf(target, "%s/%s", dir_path->data, name);
    } else {
-        v9fs_string_sprintf((V9fsString *)target, "%s", name);
+        v9fs_path_sprintf(target, "%s", name);
    }
-    /* Bump the size for including terminating NULL */
-    target->size++;
    return 0;
 }

@@ -1181,9 +1178,22 @@ static int proxy_init(FsContext *ctx)
    return 0;
 }

+static void proxy_cleanup(FsContext *ctx)
+{
+    V9fsProxy *proxy = ctx->private;
+
+    g_free(proxy->out_iovec.iov_base);
+    g_free(proxy->in_iovec.iov_base);
+    if (ctx->export_flags & V9FS_PROXY_SOCK_NAME) {
+        close(proxy->sockfd);
+    }
+    g_free(proxy);
+}
+
 FileOperations proxy_ops = {
    .parse_opts   = proxy_parse_opts,
    .init         = proxy_init,
+    .cleanup      = proxy_cleanup,
    .lstat        = proxy_lstat,
    .readlink     = proxy_readlink,
    .close        = proxy_close,
--- a/hw/9pfs/9p-util.c
+++ b/hw/9pfs/9p-util.c
@@ -0,0 +1,69 @@
+/*
+ * 9p utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/xattr.h"
+#include "9p-util.h"
+
+int relative_openat_nofollow(int dirfd, const char *path, int flags,
+                             mode_t mode)
+{
+    int fd;
+
+    fd = dup(dirfd);
+    if (fd == -1) {
+        return -1;
+    }
+
+    while (*path) {
+        const char *c;
+        int next_fd;
+        char *head;
+
+        /* Only relative paths without consecutive slashes */
+        assert(path[0] != '/');
+
+        head = g_strdup(path);
+        c = strchr(path, '/');
+        if (c) {
+            head[c - path] = 0;
+            next_fd = openat_dir(fd, head);
+        } else {
+            next_fd = openat_file(fd, head, flags, mode);
+        }
+        g_free(head);
+        if (next_fd == -1) {
+            close_preserve_errno(fd);
+            return -1;
+        }
+        close(fd);
+        fd = next_fd;
+
+        if (!c) {
+            break;
+        }
+        path = c + 1;
+    }
+
+    return fd;
+}
+
+ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+                             void *value, size_t size)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = lgetxattr(proc_path, name, value, size);
+    g_free(proc_path);
+    return ret;
+}
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -0,0 +1,98 @@
+/*
+ * 9p utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_9P_UTIL_H
+#define QEMU_9P_UTIL_H
+
+#include "qemu/error-report.h"
+
+static inline void close_preserve_errno(int fd)
+{
+    int serrno = errno;
+    close(fd);
+    errno = serrno;
+}
+
+/**
+ * close_if_special_file() - Close @fd if neither regular file nor directory.
+ *
+ * @fd: file descriptor of open file
+ * Return: 0 on regular file or directory, -1 otherwise
+ *
+ * CVE-2023-2861: Prohibit opening any special file directly on host
+ * (especially device files), as a compromised client could potentially gain
+ * access outside exported tree under certain, unsafe setups. We expect
+ * client to handle I/O on special files exclusively on guest side.
+ */
+static inline int close_if_special_file(int fd)
+{
+    struct stat stbuf;
+
+    if (fstat(fd, &stbuf) < 0) {
+        close_preserve_errno(fd);
+        return -1;
+    }
+    if (!S_ISREG(stbuf.st_mode) && !S_ISDIR(stbuf.st_mode)) {
+        error_report_once(
+            "9p: broken or compromised client detected; attempt to open "
+            "special file (i.e. neither regular file, nor directory)"
+        );
+        close(fd);
+        errno = ENXIO;
+        return -1;
+    }
+
+    return 0;
+}
+
+static inline int openat_dir(int dirfd, const char *name)
+{
+#ifdef O_PATH
+#define OPENAT_DIR_O_PATH O_PATH
+#else
+#define OPENAT_DIR_O_PATH 0
+#endif
+    return openat(dirfd, name,
+                  O_DIRECTORY | O_RDONLY | O_NOFOLLOW | OPENAT_DIR_O_PATH);
+}
+
+static inline int openat_file(int dirfd, const char *name, int flags,
+                              mode_t mode)
+{
+    int fd, serrno, ret;
+
+    fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK,
+                mode);
+    if (fd == -1) {
+        return -1;
+    }
+
+    if (close_if_special_file(fd) < 0) {
+        return -1;
+    }
+
+    serrno = errno;
+    /* O_NONBLOCK was only needed to open the file. Let's drop it. */
+    ret = fcntl(fd, F_SETFL, flags);
+    assert(!ret);
+    errno = serrno;
+    return fd;
+}
+
+int relative_openat_nofollow(int dirfd, const char *path, int flags,
+                             mode_t mode);
+ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name,
+                             void *value, size_t size);
+int fsetxattrat_nofollow(int dirfd, const char *path, const char *name,
+                         void *value, size_t size, int flags);
+
+#endif
--- a/hw/9pfs/9p-xattr-user.c
+++ b/hw/9pfs/9p-xattr-user.c
@@ -20,9 +20,6 @@
 static ssize_t mp_user_getxattr(FsContext *ctx, const char *path,
                                const char *name, void *value, size_t size)
 {
-    char *buffer;
-    ssize_t ret;
-
    if (strncmp(name, "user.virtfs.", 12) == 0) {
        /*
         * Don't allow fetch of user.virtfs namesapce
@@ -31,10 +28,7 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path,
        errno = ENOATTR;
        return -1;
    }
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, name, value, size);
-    g_free(buffer);
-    return ret;
+    return local_getxattr_nofollow(ctx, path, name, value, size);
 }

 static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
@@ -73,9 +67,6 @@ static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
 static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name,
                            void *value, size_t size, int flags)
 {
-    char *buffer;
-    int ret;
-
    if (strncmp(name, "user.virtfs.", 12) == 0) {
        /*
         * Don't allow fetch of user.virtfs namesapce
@@ -84,18 +75,12 @@ static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name,
        errno = EACCES;
        return -1;
    }
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, name, value, size, flags);
-    g_free(buffer);
-    return ret;
+    return local_setxattr_nofollow(ctx, path, name, value, size, flags);
 }

 static int mp_user_removexattr(FsContext *ctx,
                               const char *path, const char *name)
 {
-    char *buffer;
-    int ret;
-
    if (strncmp(name, "user.virtfs.", 12) == 0) {
        /*
         * Don't allow fetch of user.virtfs namesapce
@@ -104,10 +89,7 @@ static int mp_user_removexattr(FsContext *ctx,
        errno = EACCES;
        return -1;
    }
-    buffer = rpath(ctx, path);
-    ret = lremovexattr(buffer, name);
-    g_free(buffer);
-    return ret;
+    return local_removexattr_nofollow(ctx, path, name);
 }

 XattrOperations mapped_user_xattr = {
--- a/hw/9pfs/9p-xattr.c
+++ b/hw/9pfs/9p-xattr.c
@@ -15,6 +15,8 @@
 #include "9p.h"
 #include "fsdev/file-op-9p.h"
 #include "9p-xattr.h"
+#include "9p-util.h"
+#include "9p-local.h"


 static XattrOperations *get_xattr_operations(XattrOperations **h,
@@ -58,6 +60,16 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path,
    return name_size;
 }

+static ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+                                     char *list, size_t size)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = llistxattr(proc_path, list, size);
+    g_free(proc_path);
+    return ret;
+}

 /*
 * Get the list and pass to each layer to find out whether
@@ -67,24 +79,38 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path,
                        void *value, size_t vsize)
 {
    ssize_t size = 0;
-    char *buffer;
    void *ovalue = value;
    XattrOperations *xops;
    char *orig_value, *orig_value_start;
    ssize_t xattr_len, parsed_len = 0, attr_len;
+    char *dirpath, *name;
+    int dirfd;

    /* Get the actual len */
-    buffer = rpath(ctx, path);
-    xattr_len = llistxattr(buffer, value, 0);
+    dirpath = g_path_get_dirname(path);
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    g_free(dirpath);
+    if (dirfd == -1) {
+        return -1;
+    }
+
+    name = g_path_get_basename(path);
+    xattr_len = flistxattrat_nofollow(dirfd, name, value, 0);
    if (xattr_len <= 0) {
-        g_free(buffer);
+        g_free(name);
+        close_preserve_errno(dirfd);
        return xattr_len;
    }

    /* Now fetch the xattr and find the actual size */
    orig_value = g_malloc(xattr_len);
-    xattr_len = llistxattr(buffer, orig_value, xattr_len);
-    g_free(buffer);
+    xattr_len = flistxattrat_nofollow(dirfd, name, orig_value, xattr_len);
+    g_free(name);
+    close_preserve_errno(dirfd);
+    if (xattr_len < 0) {
+        g_free(orig_value);
+        return -1;
+    }

    /* store the orig pointer */
    orig_value_start = orig_value;
@@ -143,6 +169,135 @@ int v9fs_remove_xattr(FsContext *ctx,

 }

+ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size)
+{
+    char *dirpath = g_path_get_dirname(path);
+    char *filename = g_path_get_basename(path);
+    int dirfd;
+    ssize_t ret = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    ret = fgetxattrat_nofollow(dirfd, filename, name, value, size);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(filename);
+    return ret;
+}
+
+ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size)
+{
+    return local_getxattr_nofollow(ctx, path, name, value, size);
+}
+
+int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+                         void *value, size_t size, int flags)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = lsetxattr(proc_path, name, value, size, flags);
+    g_free(proc_path);
+    return ret;
+}
+
+ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size,
+                                int flags)
+{
+    char *dirpath = g_path_get_dirname(path);
+    char *filename = g_path_get_basename(path);
+    int dirfd;
+    ssize_t ret = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    ret = fsetxattrat_nofollow(dirfd, filename, name, value, size, flags);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(filename);
+    return ret;
+}
+
+int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value,
+                size_t size, int flags)
+{
+    return local_setxattr_nofollow(ctx, path, name, value, size, flags);
+}
+
+static ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+                                       const char *name)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = lremovexattr(proc_path, name);
+    g_free(proc_path);
+    return ret;
+}
+
+ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
+                                   const char *name)
+{
+    char *dirpath = g_path_get_dirname(path);
+    char *filename = g_path_get_basename(path);
+    int dirfd;
+    ssize_t ret = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    ret = fremovexattrat_nofollow(dirfd, filename, name);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(filename);
+    return ret;
+}
+
+int pt_removexattr(FsContext *ctx, const char *path, const char *name)
+{
+    return local_removexattr_nofollow(ctx, path, name);
+}
+
+ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name,
+                        void *value, size_t size)
+{
+    errno = ENOTSUP;
+    return -1;
+}
+
+int notsup_setxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size, int flags)
+{
+    errno = ENOTSUP;
+    return -1;
+}
+
+ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name,
+                         void *value, size_t size)
+{
+    return 0;
+}
+
+int notsup_removexattr(FsContext *ctx, const char *path, const char *name)
+{
+    errno = ENOTSUP;
+    return -1;
+}
+
 XattrOperations *mapped_xattr_ops[] = {
    &mapped_user_xattr,
    &mapped_pacl_xattr,
--- a/hw/9pfs/9p-xattr.h
+++ b/hw/9pfs/9p-xattr.h
@@ -28,6 +28,13 @@ typedef struct xattr_operations
                       const char *path, const char *name);
 } XattrOperations;

+ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size);
+ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size,
+                                int flags);
+ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
+                                   const char *name);

 extern XattrOperations mapped_user_xattr;
 extern XattrOperations passthrough_user_xattr;
@@ -48,73 +55,21 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value,
 int v9fs_set_xattr(FsContext *ctx, const char *path, const char *name,
                          void *value, size_t size, int flags);
 int v9fs_remove_xattr(FsContext *ctx, const char *path, const char *name);
+
 ssize_t pt_listxattr(FsContext *ctx, const char *path, char *name, void *value,
                     size_t size);
+ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size);
+int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value,
+                size_t size, int flags);
+int pt_removexattr(FsContext *ctx, const char *path, const char *name);

-static inline ssize_t pt_getxattr(FsContext *ctx, const char *path,
-                                  const char *name, void *value, size_t size)
-{
-    char *buffer;
-    ssize_t ret;
-
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, name, value, size);
-    g_free(buffer);
-    return ret;
-}
-
-static inline int pt_setxattr(FsContext *ctx, const char *path,
-                              const char *name, void *value,
-                              size_t size, int flags)
-{
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, name, value, size, flags);
-    g_free(buffer);
-    return ret;
-}
-
-static inline int pt_removexattr(FsContext *ctx,
-                                 const char *path, const char *name)
-{
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lremovexattr(path, name);
-    g_free(buffer);
-    return ret;
-}
-
-static inline ssize_t notsup_getxattr(FsContext *ctx, const char *path,
-                                      const char *name, void *value,
-                                      size_t size)
-{
-    errno = ENOTSUP;
-    return -1;
-}
-
-static inline int notsup_setxattr(FsContext *ctx, const char *path,
-                                  const char *name, void *value,
-                                  size_t size, int flags)
-{
-    errno = ENOTSUP;
-    return -1;
-}
-
-static inline ssize_t notsup_listxattr(FsContext *ctx, const char *path,
-                                       char *name, void *value, size_t size)
-{
-    return 0;
-}
-
-static inline int notsup_removexattr(FsContext *ctx,
-                                     const char *path, const char *name)
-{
-    errno = ENOTSUP;
-    return -1;
-}
+ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name,
+                        void *value, size_t size);
+int notsup_setxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size, int flags);
+ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name,
+                         void *value, size_t size);
+int notsup_removexattr(FsContext *ctx, const char *path, const char *name);

 #endif
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -12,6 +12,7 @@
 */

 #include "qemu/osdep.h"
+#include <glib/gprintf.h>
 #include "hw/virtio/virtio.h"
 #include "hw/i386/pc.h"
 #include "qapi/error.h"
@@ -180,6 +181,20 @@ void v9fs_path_free(V9fsPath *path)
    path->size = 0;
 }

+
+void GCC_FMT_ATTR(2, 3)
+v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
+{
+    va_list ap;
+
+    v9fs_path_free(path);
+
+    va_start(ap, fmt);
+    /* Bump the size for including terminating NULL */
+    path->size = g_vasprintf(&path->data, fmt, ap) + 1;
+    va_end(ap);
+}
+
 void v9fs_path_copy(V9fsPath *lhs, V9fsPath *rhs)
 {
    v9fs_path_free(lhs);
@@ -232,7 +247,7 @@ static int v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
            } while (err == -EINTR && !pdu->cancelled);
        }
    } else if (f->fid_type == P9_FID_DIR) {
-        if (f->fs.dir == NULL) {
+        if (f->fs.dir.stream == NULL) {
            do {
                err = v9fs_co_opendir(pdu, f);
            } while (err == -EINTR && !pdu->cancelled);
@@ -346,7 +361,7 @@ static int free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
            retval = v9fs_co_close(pdu, &fidp->fs);
        }
    } else if (fidp->fid_type == P9_FID_DIR) {
-        if (fidp->fs.dir != NULL) {
+        if (fidp->fs.dir.stream != NULL) {
            retval = v9fs_co_closedir(pdu, &fidp->fs);
        }
    } else if (fidp->fid_type == P9_FID_XATTR) {
@@ -444,7 +459,7 @@ void v9fs_reclaim_fd(V9fsPDU *pdu)
                reclaim_count++;
            }
        } else if (f->fid_type == P9_FID_DIR) {
-            if (f->fs.dir != NULL) {
+            if (f->fs.dir.stream != NULL) {
                /*
                 * Up the reference count so that
                 * a clunk request won't free this fid
@@ -452,8 +467,8 @@ void v9fs_reclaim_fd(V9fsPDU *pdu)
                f->ref++;
                f->rclm_lst = reclaim_list;
                reclaim_list = f;
-                f->fs_reclaim.dir = f->fs.dir;
-                f->fs.dir = NULL;
+                f->fs_reclaim.dir.stream = f->fs.dir.stream;
+                f->fs.dir.stream = NULL;
                reclaim_count++;
            }
        }
@@ -486,9 +501,9 @@ static int v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
 {
    int err;
    V9fsState *s = pdu->s;
-    V9fsFidState *fidp, head_fid;
+    V9fsFidState *fidp;

-    head_fid.next = s->fid_list;
+again:
    for (fidp = s->fid_list; fidp; fidp = fidp->next) {
        if (fidp->path.size != path->size) {
            continue;
@@ -508,7 +523,7 @@ static int v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
             * switched to the worker thread
             */
            if (err == 0) {
-                fidp = &head_fid;
+                goto again;
            }
        }
    }
@@ -915,10 +930,8 @@ static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
    V9fsPath str;
    v9fs_path_init(&str);
    v9fs_path_copy(&str, dst);
-    v9fs_string_sprintf((V9fsString *)dst, "%s%s", src->data, str.data+len);
+    v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len);
    v9fs_path_free(&str);
-    /* +1 to include terminating NULL */
-    dst->size++;
 }

 static inline bool is_ro_export(FsContext *ctx)
@@ -1008,6 +1021,7 @@ static void v9fs_attach(void *opaque)
        goto out;
    }
    err += offset;
+    memcpy(&s->root_qid, &qid, sizeof(qid));
    trace_v9fs_attach_return(pdu->tag, pdu->id,
                             qid.type, qid.version, qid.path);
    /*
@@ -1254,6 +1268,19 @@ static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
    return offset;
 }

+static bool name_is_illegal(const char *name)
+{
+    return !*name || strchr(name, '/') != NULL;
+}
+
+static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2)
+{
+    return
+        qid1->type != qid2->type ||
+        qid1->version != qid2->version ||
+        qid1->path != qid2->path;
+}
+
 static void v9fs_walk(void *opaque)
 {
    int name_idx;
@@ -1269,6 +1296,7 @@ static void v9fs_walk(void *opaque)
    V9fsFidState *newfidp = NULL;
    V9fsPDU *pdu = opaque;
    V9fsState *s = pdu->s;
+    V9fsQID qid;

    err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
    if (err < 0) {
@@ -1287,6 +1315,10 @@ static void v9fs_walk(void *opaque)
            if (err < 0) {
                goto out_nofid;
            }
+            if (name_is_illegal(wnames[i].data)) {
+                err = -ENOENT;
+                goto out_nofid;
+            }
            offset += err;
        }
    } else if (nwnames > P9_MAXWELEM) {
@@ -1298,8 +1330,15 @@ static void v9fs_walk(void *opaque)
        err = -ENOENT;
        goto out_nofid;
    }
+
    v9fs_path_init(&dpath);
    v9fs_path_init(&path);
+
+    err = fid_to_qid(pdu, fidp, &qid);
+    if (err < 0) {
+        goto out;
+    }
+
    /*
     * Both dpath and path initially poin to fidp.
     * Needed to handle request with nwnames == 0
@@ -1307,20 +1346,28 @@ static void v9fs_walk(void *opaque)
    v9fs_path_copy(&dpath, &fidp->path);
    v9fs_path_copy(&path, &fidp->path);
    for (name_idx = 0; name_idx < nwnames; name_idx++) {
-        err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data, &path);
-        if (err < 0) {
-            goto out;
+        if (not_same_qid(&pdu->s->root_qid, &qid) ||
+            strcmp("..", wnames[name_idx].data)) {
+            err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data,
+                                       &path);
+            if (err < 0) {
+                goto out;
+            }
+
+            err = v9fs_co_lstat(pdu, &path, &stbuf);
+            if (err < 0) {
+                goto out;
+            }
+            stat_to_qid(&stbuf, &qid);
+            v9fs_path_copy(&dpath, &path);
        }
-        err = v9fs_co_lstat(pdu, &path, &stbuf);
-        if (err < 0) {
-            goto out;
-        }
-        stat_to_qid(&stbuf, &qids[name_idx]);
-        v9fs_path_copy(&dpath, &path);
+        memcpy(&qids[name_idx], &qid, sizeof(qid));
    }
    if (fid == newfid) {
        BUG_ON(fidp->fid_type != P9_FID_NONE);
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
    } else {
        newfidp = alloc_fid(s, newfid);
        if (newfidp == NULL) {
@@ -1481,11 +1528,25 @@ static void v9fs_lcreate(void *opaque)
    }
    trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);

+    if (name_is_illegal(name.data)) {
+        err = -ENOENT;
+        goto out_nofid;
+    }
+
+    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
+        err = -EEXIST;
+        goto out_nofid;
+    }
+
    fidp = get_fid(pdu, dfid);
    if (fidp == NULL) {
        err = -ENOENT;
        goto out_nofid;
    }
+    if (fidp->fid_type != P9_FID_NONE) {
+        err = -EINVAL;
+        goto out;
+    }

    flags = get_dotl_openflags(pdu->s, flags);
    err = v9fs_co_open2(pdu, fidp, &name, gid,
@@ -1585,20 +1646,17 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
 {
    ssize_t err;
    size_t offset = 7;
-    int read_count;
-    int64_t xattr_len;
+    uint64_t read_count;
    V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
    VirtQueueElement *elem = v->elems[pdu->idx];

-    xattr_len = fidp->fs.xattr.len;
-    read_count = xattr_len - off;
+    if (fidp->fs.xattr.len < off) {
+        read_count = 0;
+    } else {
+        read_count = fidp->fs.xattr.len - off;
+    }
    if (read_count > max_count) {
        read_count = max_count;
-    } else if (read_count < 0) {
-        /*
-         * read beyond XATTR value
-         */
-        read_count = 0;
    }
    err = pdu_marshal(pdu, offset, "d", read_count);
    if (err < 0) {
@@ -1765,14 +1823,15 @@ static void v9fs_read(void *opaque)
            if (len < 0) {
                /* IO error return the error */
                err = len;
-                goto out;
+                goto out_free_iovec;
            }
        } while (count < max_count && len > 0);
        err = pdu_marshal(pdu, offset, "d", count);
        if (err < 0) {
-            goto out;
+            goto out_free_iovec;
        }
        err += offset + count;
+out_free_iovec:
        qemu_iovec_destroy(&qiov);
        qemu_iovec_destroy(&qiov_full);
    } else if (fidp->fid_type == P9_FID_XATTR) {
@@ -1884,7 +1943,7 @@ static void v9fs_readdir(void *opaque)
        retval = -EINVAL;
        goto out_nofid;
    }
-    if (!fidp->fs.dir) {
+    if (!fidp->fs.dir.stream) {
        retval = -EINVAL;
        goto out;
    }
@@ -1916,23 +1975,18 @@ static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
 {
    int i, to_copy;
    ssize_t err = 0;
-    int write_count;
-    int64_t xattr_len;
+    uint64_t write_count;
    size_t offset = 7;


-    xattr_len = fidp->fs.xattr.len;
-    write_count = xattr_len - off;
-    if (write_count > count) {
-        write_count = count;
-    } else if (write_count < 0) {
-        /*
-         * write beyond XATTR value len specified in
-         * xattrcreate
-         */
+    if (fidp->fs.xattr.len < off) {
        err = -ENOSPC;
        goto out;
    }
+    write_count = fidp->fs.xattr.len - off;
+    if (write_count > count) {
+        write_count = count;
+    }
    err = pdu_marshal(pdu, offset, "d", write_count);
    if (err < 0) {
        return err;
@@ -2027,7 +2081,7 @@ static void v9fs_write(void *opaque)
    offset = 7;
    err = pdu_marshal(pdu, offset, "d", total);
    if (err < 0) {
-        goto out;
+        goto out_qiov;
    }
    err += offset;
    trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
@@ -2055,6 +2109,7 @@ static void v9fs_create(void *opaque)
    V9fsString extension;
    int iounit;
    V9fsPDU *pdu = opaque;
+    V9fsState *s = pdu->s;

    v9fs_path_init(&path);
    v9fs_string_init(&name);
@@ -2066,11 +2121,25 @@ static void v9fs_create(void *opaque)
    }
    trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);

+    if (name_is_illegal(name.data)) {
+        err = -ENOENT;
+        goto out_nofid;
+    }
+
+    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
+        err = -EEXIST;
+        goto out_nofid;
+    }
+
    fidp = get_fid(pdu, fid);
    if (fidp == NULL) {
        err = -EINVAL;
        goto out_nofid;
    }
+    if (fidp->fid_type != P9_FID_NONE) {
+        err = -EINVAL;
+        goto out;
+    }
    if (perm & P9_STAT_MODE_DIR) {
        err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
                            fidp->uid, -1, &stbuf);
@@ -2081,7 +2150,9 @@ static void v9fs_create(void *opaque)
        if (err < 0) {
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
        err = v9fs_co_opendir(pdu, fidp);
        if (err < 0) {
            goto out;
@@ -2097,7 +2168,9 @@ static void v9fs_create(void *opaque)
        if (err < 0) {
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
    } else if (perm & P9_STAT_MODE_LINK) {
        int32_t ofid = atoi(extension.data);
        V9fsFidState *ofidp = get_fid(pdu, ofid);
@@ -2115,7 +2188,9 @@ static void v9fs_create(void *opaque)
            fidp->fid_type = P9_FID_NONE;
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
        if (err < 0) {
            fidp->fid_type = P9_FID_NONE;
@@ -2153,7 +2228,9 @@ static void v9fs_create(void *opaque)
        if (err < 0) {
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
    } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
                            0, S_IFIFO | (perm & 0777), &stbuf);
@@ -2164,7 +2241,9 @@ static void v9fs_create(void *opaque)
        if (err < 0) {
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
    } else if (perm & P9_STAT_MODE_SOCKET) {
        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
                            0, S_IFSOCK | (perm & 0777), &stbuf);
@@ -2175,7 +2254,9 @@ static void v9fs_create(void *opaque)
        if (err < 0) {
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
    } else {
        err = v9fs_co_open2(pdu, fidp, &name, -1,
                            omode_to_uflags(mode)|O_CREAT, perm, &stbuf);
@@ -2231,6 +2312,16 @@ static void v9fs_symlink(void *opaque)
    }
    trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);

+    if (name_is_illegal(name.data)) {
+        err = -ENOENT;
+        goto out_nofid;
+    }
+
+    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
+        err = -EEXIST;
+        goto out_nofid;
+    }
+
    dfidp = get_fid(pdu, dfid);
    if (dfidp == NULL) {
        err = -EINVAL;
@@ -2261,7 +2352,7 @@ static void v9fs_flush(void *opaque)
    ssize_t err;
    int16_t tag;
    size_t offset = 7;
-    V9fsPDU *cancel_pdu;
+    V9fsPDU *cancel_pdu = NULL;
    V9fsPDU *pdu = opaque;
    V9fsState *s = pdu->s;

@@ -2272,9 +2363,13 @@ static void v9fs_flush(void *opaque)
    }
    trace_v9fs_flush(pdu->tag, pdu->id, tag);

-    QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
-        if (cancel_pdu->tag == tag) {
-            break;
+    if (pdu->tag == tag) {
+        error_report("Warning: the guest sent a self-referencing 9P flush request");
+    } else {
+        QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
+            if (cancel_pdu->tag == tag) {
+                break;
+            }
        }
    }
    if (cancel_pdu) {
@@ -2305,6 +2400,16 @@ static void v9fs_link(void *opaque)
    }
    trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);

+    if (name_is_illegal(name.data)) {
+        err = -ENOENT;
+        goto out_nofid;
+    }
+
+    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
+        err = -EEXIST;
+        goto out_nofid;
+    }
+
    dfidp = get_fid(pdu, dfid);
    if (dfidp == NULL) {
        err = -ENOENT;
@@ -2320,6 +2425,7 @@ static void v9fs_link(void *opaque)
    if (!err) {
        err = offset;
    }
+    put_fid(pdu, oldfidp);
 out:
    put_fid(pdu, dfidp);
 out_nofid:
@@ -2387,6 +2493,22 @@ static void v9fs_unlinkat(void *opaque)
    if (err < 0) {
        goto out_nofid;
    }
+
+    if (name_is_illegal(name.data)) {
+        err = -ENOENT;
+        goto out_nofid;
+    }
+
+    if (!strcmp(".", name.data)) {
+        err = -EINVAL;
+        goto out_nofid;
+    }
+
+    if (!strcmp("..", name.data)) {
+        err = -ENOTEMPTY;
+        goto out_nofid;
+    }
+
    dfidp = get_fid(pdu, dfid);
    if (dfidp == NULL) {
        err = -EINVAL;
@@ -2493,6 +2615,17 @@ static void v9fs_rename(void *opaque)
    if (err < 0) {
        goto out_nofid;
    }
+
+    if (name_is_illegal(name.data)) {
+        err = -ENOENT;
+        goto out_nofid;
+    }
+
+    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
+        err = -EISDIR;
+        goto out_nofid;
+    }
+
    fidp = get_fid(pdu, fid);
    if (fidp == NULL) {
        err = -ENOENT;
@@ -2605,6 +2738,17 @@ static void v9fs_renameat(void *opaque)
        goto out_err;
    }

+    if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) {
+        err = -ENOENT;
+        goto out_err;
+    }
+
+    if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) ||
+        !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) {
+        err = -EISDIR;
+        goto out_err;
+    }
+
    v9fs_path_write_lock(s);
    err = v9fs_complete_renameat(pdu, olddirfid,
                                 &old_name, newdirfid, &new_name);
@@ -2629,6 +2773,7 @@ static void v9fs_wstat(void *opaque)
    struct stat stbuf;
    V9fsFidState *fidp;
    V9fsPDU *pdu = opaque;
+    V9fsState *s = pdu->s;

    v9fs_stat_init(&v9stat);
    err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
@@ -2694,7 +2839,9 @@ static void v9fs_wstat(void *opaque)
        }
    }
    if (v9stat.name.size != 0) {
+        v9fs_path_write_lock(s);
        err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
+        v9fs_path_unlock(s);
        if (err < 0) {
            goto out;
        }
@@ -2815,6 +2962,16 @@ static void v9fs_mknod(void *opaque)
    }
    trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);

+    if (name_is_illegal(name.data)) {
+        err = -ENOENT;
+        goto out_nofid;
+    }
+
+    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
+        err = -EEXIST;
+        goto out_nofid;
+    }
+
    fidp = get_fid(pdu, fid);
    if (fidp == NULL) {
        err = -ENOENT;
@@ -2966,6 +3123,16 @@ static void v9fs_mkdir(void *opaque)
    }
    trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);

+    if (name_is_illegal(name.data)) {
+        err = -ENOENT;
+        goto out_nofid;
+    }
+
+    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
+        err = -EEXIST;
+        goto out_nofid;
+    }
+
    fidp = get_fid(pdu, fid);
    if (fidp == NULL) {
        err = -ENOENT;
@@ -3020,7 +3187,7 @@ static void v9fs_xattrwalk(void *opaque)
        goto out;
    }
    v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
-    if (name.data == NULL) {
+    if (!v9fs_string_size(&name)) {
        /*
         * listxattr request. Get the size first
         */
@@ -3037,7 +3204,7 @@ static void v9fs_xattrwalk(void *opaque)
        xattr_fidp->fid_type = P9_FID_XATTR;
        xattr_fidp->fs.xattr.copied_len = -1;
        if (size) {
-            xattr_fidp->fs.xattr.value = g_malloc(size);
+            xattr_fidp->fs.xattr.value = g_malloc0(size);
            err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
                                     xattr_fidp->fs.xattr.value,
                                     xattr_fidp->fs.xattr.len);
@@ -3070,7 +3237,7 @@ static void v9fs_xattrwalk(void *opaque)
        xattr_fidp->fid_type = P9_FID_XATTR;
        xattr_fidp->fs.xattr.copied_len = -1;
        if (size) {
-            xattr_fidp->fs.xattr.value = g_malloc(size);
+            xattr_fidp->fs.xattr.value = g_malloc0(size);
            err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
                                    &name, xattr_fidp->fs.xattr.value,
                                    xattr_fidp->fs.xattr.len);
@@ -3128,7 +3295,8 @@ static void v9fs_xattrcreate(void *opaque)
    xattr_fidp->fs.xattr.flags = flags;
    v9fs_string_init(&xattr_fidp->fs.xattr.name);
    v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
-    xattr_fidp->fs.xattr.value = g_malloc(size);
+    g_free(xattr_fidp->fs.xattr.value);
+    xattr_fidp->fs.xattr.value = g_malloc0(size);
    err = offset;
    put_fid(pdu, file_fidp);
 out_nofid:
@@ -3274,7 +3442,6 @@ void pdu_submit(V9fsPDU *pdu)
 /* Returns 0 on success, 1 on failure. */
 int v9fs_device_realize_common(V9fsState *s, Error **errp)
 {
-    V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
    int i, len;
    struct stat stat;
    FsDriverEntry *fse;
@@ -3284,10 +3451,10 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
    /* initialize pdu allocator */
    QLIST_INIT(&s->free_list);
    QLIST_INIT(&s->active_list);
-    for (i = 0; i < (MAX_REQ - 1); i++) {
-        QLIST_INSERT_HEAD(&s->free_list, &v->pdus[i], next);
-        v->pdus[i].s = s;
-        v->pdus[i].idx = i;
+    for (i = 0; i < MAX_REQ; i++) {
+        QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next);
+        s->pdus[i].s = s;
+        s->pdus[i].idx = i;
    }

    v9fs_path_init(&path);
@@ -3355,8 +3522,11 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
    rc = 0;
 out:
    if (rc) {
-        g_free(s->ctx.fs_root);
+        if (s->ops && s->ops->cleanup && s->ctx.private) {
+            s->ops->cleanup(&s->ctx);
+        }
        g_free(s->tag);
+        g_free(s->ctx.fs_root);
        v9fs_path_free(&path);
    }
    return rc;
@@ -3364,8 +3534,11 @@ out:

 void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
 {
-    g_free(s->ctx.fs_root);
+    if (s->ops->cleanup) {
+        s->ops->cleanup(&s->ctx);
+    }
    g_free(s->tag);
+    g_free(s->ctx.fs_root);
 }

 static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -169,13 +169,17 @@ typedef struct V9fsXattr
    int flags;
 } V9fsXattr;

+typedef struct V9fsDir {
+    DIR *stream;
+} V9fsDir;
+
 /*
 * Filled by fs driver on open and other
 * calls.
 */
 union V9fsFidOpenState {
    int fd;
-    DIR *dir;
+    V9fsDir dir;
    V9fsXattr xattr;
    /*
     * private pointer for fs drivers, that
@@ -211,6 +215,7 @@ typedef struct V9fsState
    char *tag;
    enum p9_proto_version proto_version;
    int32_t msize;
+    V9fsPDU pdus[MAX_REQ];
    /*
     * lock ensuring atomic path update
     * on rename.
@@ -219,6 +224,7 @@ typedef struct V9fsState
    int32_t root_fid;
    Error *migration_blocker;
    V9fsConf fsconf;
+    V9fsQID root_qid;
 } V9fsState;

 /* 9p2000.L open flags */
@@ -309,6 +315,7 @@ static inline uint8_t v9fs_request_cancelled(V9fsPDU *pdu)
 extern void v9fs_reclaim_fd(V9fsPDU *pdu);
 extern void v9fs_path_init(V9fsPath *path);
 extern void v9fs_path_free(V9fsPath *path);
+extern void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...);
 extern void v9fs_path_copy(V9fsPath *lhs, V9fsPath *rhs);
 extern int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
                             const char *name, V9fsPath *path);
--- a/hw/9pfs/Makefile.objs
+++ b/hw/9pfs/Makefile.objs
@@ -1,4 +1,4 @@
-common-obj-y  = 9p.o
+common-obj-y  = 9p.o 9p-util.o
 common-obj-y += 9p-local.o 9p-xattr.o
 common-obj-y += 9p-xattr-user.o 9p-posix-acl.o
 common-obj-y += coth.o cofs.o codir.o cofile.o
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -139,10 +139,10 @@ int v9fs_co_open2(V9fsPDU *pdu, V9fsFidState *fidp, V9fsString *name, gid_t gid,
    cred.fc_gid = gid;
    /*
     * Hold the directory fid lock so that directory path name
-     * don't change. Read lock is fine because this fid cannot
-     * be used by any other operation.
+     * don't change. Take the write lock to be sure this fid
+     * cannot be used by another operation.
     */
-    v9fs_path_read_lock(s);
+    v9fs_path_write_lock(s);
    v9fs_co_run_in_worker(
        {
            err = s->ops->open2(&s->ctx, &fidp->path,
--- a/hw/9pfs/virtio-9p.h
+++ b/hw/9pfs/virtio-9p.h
@@ -10,7 +10,6 @@ typedef struct V9fsVirtioState
    VirtIODevice parent_obj;
    VirtQueue *vq;
    size_t config_size;
-    V9fsPDU pdus[MAX_REQ];
    VirtQueueElement *elems[MAX_REQ];
    V9fsState state;
 } V9fsVirtioState;
--- a/hw/acpi/core.c
+++ b/hw/acpi/core.c
@@ -462,7 +462,8 @@ static void acpi_pm_evt_write(void *opaque, hwaddr addr, uint64_t val,
 static const MemoryRegionOps acpi_pm_evt_ops = {
    .read = acpi_pm_evt_read,
    .write = acpi_pm_evt_write,
-    .valid.min_access_size = 2,
+    .impl.min_access_size = 2,
+    .valid.min_access_size = 1,
    .valid.max_access_size = 2,
    .endianness = DEVICE_LITTLE_ENDIAN,
 };
@@ -524,7 +525,8 @@ static void acpi_pm_tmr_write(void *opaque, hwaddr addr, uint64_t val,
 static const MemoryRegionOps acpi_pm_tmr_ops = {
    .read = acpi_pm_tmr_read,
    .write = acpi_pm_tmr_write,
-    .valid.min_access_size = 4,
+    .impl.min_access_size = 4,
+    .valid.min_access_size = 1,
    .valid.max_access_size = 4,
    .endianness = DEVICE_LITTLE_ENDIAN,
 };
@@ -596,7 +598,8 @@ static void acpi_pm_cnt_write(void *opaque, hwaddr addr, uint64_t val,
 static const MemoryRegionOps acpi_pm_cnt_ops = {
    .read = acpi_pm_cnt_read,
    .write = acpi_pm_cnt_write,
-    .valid.min_access_size = 2,
+    .impl.min_access_size = 2,
+    .valid.min_access_size = 1,
    .valid.max_access_size = 2,
    .endianness = DEVICE_LITTLE_ENDIAN,
 };
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -281,7 +281,7 @@ static const VMStateDescription vmstate_memhp_state = {
 static const VMStateDescription vmstate_acpi = {
    .name = "piix4_pm",
    .version_id = 3,
-    .minimum_version_id = 3,
+    .minimum_version_id = 2, /* qemu-kvm */
    .minimum_version_id_old = 1,
    .load_state_old = acpi_load_old,
    .post_load = vmstate_acpi_post_load,
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -514,6 +514,24 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type, bool secure)
    if (type == 2) {
        create_v2m(vbi, pic);
    }
+
+#ifdef CONFIG_KVM
+    if (kvm_enabled() && !kvm_irqchip_in_kernel()) {
+        for (i = 0; i < smp_cpus; i++) {
+            CPUState *cs = qemu_get_cpu(i);
+            int ret;
+
+            ret = kvm_vcpu_enable_cap(cs, KVM_CAP_ARM_TIMER, 0,
+                                      KVM_ARM_TIMER_VTIMER);
+
+            if (ret) {
+                error_report("KVM with user space irqchip only works when the "
+                             "host kernel supports KVM_CAP_ARM_TIMER");
+                exit(1);
+            }
+        }
+    }
+#endif
 }

 static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic, int uart,
@@ -950,6 +968,7 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#size-cells", 2);
    qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0,
                           nr_pcie_buses - 1);
+    qemu_fdt_setprop(vbi->fdt, nodename, "dma-coherent", NULL, 0);

    if (vbi->v2m_phandle) {
        qemu_fdt_setprop_cells(vbi->fdt, nodename, "msi-parent",
@@ -1114,10 +1133,14 @@ static void machvirt_init(MachineState *machine)
     * KVM is not available yet
     */
    if (!gic_version) {
+        if (!kvm_enabled()) {
+            error_report("gic-version=host requires KVM");
+            exit(1);
+        }
+
        gic_version = kvm_arm_vgic_probe();
        if (!gic_version) {
            error_report("Unable to determine GIC version supported by host");
-            error_printf("KVM acceleration is probably not supported\n");
            exit(1);
        }
    }
--- a/hw/audio/ac97.c
+++ b/hw/audio/ac97.c
@@ -1387,6 +1387,16 @@ static void ac97_realize(PCIDevice *dev, Error **errp)
    ac97_on_reset (&s->dev.qdev);
 }

+static void ac97_exit(PCIDevice *dev)
+{
+    AC97LinkState *s = DO_UPCAST(AC97LinkState, dev, dev);
+
+    AUD_close_in(&s->card, s->voice_pi);
+    AUD_close_out(&s->card, s->voice_po);
+    AUD_close_in(&s->card, s->voice_mc);
+    AUD_remove_card(&s->card);
+}
+
 static int ac97_init (PCIBus *bus)
 {
    pci_create_simple (bus, -1, "AC97");
@@ -1404,6 +1414,7 @@ static void ac97_class_init (ObjectClass *klass, void *data)
    PCIDeviceClass *k = PCI_DEVICE_CLASS (klass);

    k->realize = ac97_realize;
+    k->exit = ac97_exit;
    k->vendor_id = PCI_VENDOR_ID_INTEL;
    k->device_id = PCI_DEVICE_ID_INTEL_82801AA_5;
    k->revision = 0x01;
--- a/hw/audio/es1370.c
+++ b/hw/audio/es1370.c
@@ -788,6 +788,9 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel,
    int csc_bytes = (csc + 1) << d->shift;
    int cnt = d->frame_cnt >> 16;
    int size = d->frame_cnt & 0xffff;
+    if (size < cnt) {
+        return;
+    }
    int left = ((size - cnt + 1) << 2) + d->leftover;
    int transferred = 0;
    int temp = audio_MIN (max, audio_MIN (left, csc_bytes));
@@ -796,7 +799,7 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel,
    addr += (cnt << 2) + d->leftover;

    if (index == ADC_CHANNEL) {
-        while (temp) {
+        while (temp > 0) {
            int acquired, to_copy;

            to_copy = audio_MIN ((size_t) temp, sizeof (tmpbuf));
@@ -814,7 +817,7 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel,
    else {
        SWVoiceOut *voice = s->dac_voice[index];

-        while (temp) {
+        while (temp > 0) {
            int copied, to_copy;

            to_copy = audio_MIN ((size_t) temp, sizeof (tmpbuf));
@@ -1041,6 +1044,19 @@ static void es1370_realize(PCIDevice *dev, Error **errp)
    es1370_reset (s);
 }

+static void es1370_exit(PCIDevice *dev)
+{
+    ES1370State *s = ES1370(dev);
+    int i;
+
+    for (i = 0; i < 2; ++i) {
+        AUD_close_out(&s->card, s->dac_voice[i]);
+    }
+
+    AUD_close_in(&s->card, s->adc_voice);
+    AUD_remove_card(&s->card);
+}
+
 static int es1370_init (PCIBus *bus)
 {
    pci_create_simple (bus, -1, TYPE_ES1370);
@@ -1053,6 +1069,7 @@ static void es1370_class_init (ObjectClass *klass, void *data)
    PCIDeviceClass *k = PCI_DEVICE_CLASS (klass);

    k->realize = es1370_realize;
+    k->exit = es1370_exit;
    k->vendor_id = PCI_VENDOR_ID_ENSONIQ;
    k->device_id = PCI_DEVICE_ID_ENSONIQ_ES1370;
    k->class_id = PCI_CLASS_MULTIMEDIA_AUDIO;
--- a/hw/audio/intel-hda.c
+++ b/hw/audio/intel-hda.c
@@ -415,7 +415,8 @@ static bool intel_hda_xfer(HDACodecDevice *dev, uint32_t stnr, bool output,
    }

    left = len;
-    while (left > 0) {
+    s = st->bentries;
+    while (left > 0 && s-- > 0) {
        copy = left;
        if (copy > st->bsize - st->lpib)
            copy = st->bsize - st->lpib;
--- a/hw/block/hd-geometry.c
+++ b/hw/block/hd-geometry.c
@@ -66,7 +66,7 @@ static int guess_disk_lchs(BlockBackend *blk,
     * but also in async I/O mode. So the I/O throttling function has to
     * be disabled temporarily here, not permanently.
     */
-    if (blk_read_unthrottled(blk, 0, buf, 1) < 0) {
+    if (blk_pread_unthrottled(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) {
        return -1;
    }
    /* test msdos magic */
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -902,7 +902,7 @@ static int m25p80_init(SSISlave *ss)
    if (dinfo) {
        DB_PRINT_L(0, "Binding to IF_MTD drive\n");
        s->blk = blk_by_legacy_dinfo(dinfo);
-        blk_attach_dev_nofail(s->blk, s);
+        blk_attach_dev_nofail(s->blk, s, false);

        s->storage = blk_blockalign(s->blk, s->size);

--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -34,6 +34,24 @@

 static void nvme_process_sq(void *opaque);

+static inline bool nvme_addr_is_iomem(NvmeCtrl *n, hwaddr addr)
+{
+    PCIDevice *pci_dev = &n->parent_obj;
+    hwaddr regs_hi, regs_lo, msix_hi, msix_lo;
+
+    /*
+     * The purpose of this check is to guard against invalid "local" access
+     * to the iomem (i.e. controller registers, MSIX-related space).
+     */
+    regs_lo = n->iomem.addr;
+    regs_hi = regs_lo + int128_get64(n->iomem.size);
+    msix_lo = pci_dev->msix_exclusive_bar.addr;
+    msix_hi = msix_lo + int128_get64(pci_dev->msix_exclusive_bar.size);
+
+    return (addr >= regs_lo && addr < regs_hi) ||
+           (addr >= msix_lo && addr < msix_hi);
+}
+
 static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
 {
    return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1;
@@ -90,6 +108,9 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
        return NVME_INVALID_FIELD | NVME_DNR;
    }

+    if (nvme_addr_is_iomem(n, prp1)) {
+        return NVME_DATA_TRAS_ERROR;
+    }
    pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
    qemu_sglist_add(qsg, prp1, trans_len);
    len -= trans_len;
@@ -126,6 +147,9 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
                }

                trans_len = MIN(len, n->page_size);
+                if (nvme_addr_is_iomem(n, prp_ent)) {
+                    return NVME_DATA_TRAS_ERROR;
+                }
                qemu_sglist_add(qsg, prp_ent, trans_len);
                len -= trans_len;
                i++;
@@ -134,6 +158,9 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
            if (prp2 & (n->page_size - 1)) {
                goto unmap;
            }
+            if (nvme_addr_is_iomem(n, prp2)) {
+                return NVME_DATA_TRAS_ERROR;
+            }
            qemu_sglist_add(qsg, prp2, len);
        }
    }
@@ -239,7 +266,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
    uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
    uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
    uint64_t data_size = (uint64_t)nlb << data_shift;
-    uint64_t aio_slba  = slba << (data_shift - BDRV_SECTOR_BITS);
+    uint64_t data_offset = slba << data_shift;
    int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
    enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;

@@ -258,8 +285,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
    req->has_sg = true;
    dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct);
    req->aiocb = is_write ?
-        dma_blk_write(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req) :
-        dma_blk_read(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req);
+        dma_blk_write(n->conf.blk, &req->qsg, data_offset, nvme_rw_cb, req) :
+        dma_blk_read(n->conf.blk, &req->qsg, data_offset, nvme_rw_cb, req);

    return NVME_NO_COMPLETE;
 }
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -654,15 +654,20 @@ static void virtio_blk_reset(VirtIODevice *vdev)
 {
    VirtIOBlock *s = VIRTIO_BLK(vdev);
    AioContext *ctx;
+    VirtIOBlockReq *req;

-    /*
-     * This should cancel pending requests, but can't do nicely until there
-     * are per-device request lists.
-     */
    ctx = blk_get_aio_context(s->blk);
    aio_context_acquire(ctx);
    blk_drain(s->blk);

+    /* We drop queued requests after blk_drain() because blk_drain() itself can
+     * produce them. */
+    while (s->rq) {
+        req = s->rq;
+        s->rq = req->next;
+        virtio_blk_free_request(req);
+    }
+
    if (s->dataplane) {
        virtio_blk_data_plane_stop(s->dataplane);
    }
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -112,6 +112,7 @@ struct XenBlkDev {
    int                 requests_inflight;
    int                 requests_finished;

+    gboolean            cache_unsafe;
    /* Persistent grants extension */
    gboolean            feature_discard;
    gboolean            feature_persistent;
@@ -604,31 +605,30 @@ static int blk_send_response_one(struct ioreq *ioreq)
    struct XenBlkDev  *blkdev = ioreq->blkdev;
    int               send_notify   = 0;
    int               have_requests = 0;
-    blkif_response_t  resp;
-    void              *dst;
-
-    resp.id        = ioreq->req.id;
-    resp.operation = ioreq->req.operation;
-    resp.status    = ioreq->status;
+    blkif_response_t  *resp;

    /* Place on the response ring for the relevant domain. */
    switch (blkdev->protocol) {
    case BLKIF_PROTOCOL_NATIVE:
-        dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt);
+        resp = (blkif_response_t *) RING_GET_RESPONSE(&blkdev->rings.native,
+                                 blkdev->rings.native.rsp_prod_pvt);
        break;
    case BLKIF_PROTOCOL_X86_32:
-        dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part,
-                                blkdev->rings.x86_32_part.rsp_prod_pvt);
+        resp = (blkif_response_t *) RING_GET_RESPONSE(&blkdev->rings.x86_32_part,
+                                 blkdev->rings.x86_32_part.rsp_prod_pvt);
        break;
    case BLKIF_PROTOCOL_X86_64:
-        dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part,
-                                blkdev->rings.x86_64_part.rsp_prod_pvt);
+        resp = (blkif_response_t *) RING_GET_RESPONSE(&blkdev->rings.x86_64_part,
+                                 blkdev->rings.x86_64_part.rsp_prod_pvt);
        break;
    default:
-        dst = NULL;
        return 0;
    }
-    memcpy(dst, &resp, sizeof(resp));
+
+    resp->id        = ioreq->req.id;
+    resp->operation = ioreq->req.operation;
+    resp->status    = ioreq->status;
+
    blkdev->rings.common.rsp_prod_pvt++;

    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify);
@@ -793,6 +793,16 @@ static void blk_parse_discard(struct XenBlkDev *blkdev)
    }
 }

+static void blk_parse_cache_unsafe(struct XenBlkDev *blkdev)
+{
+    int enable;
+
+    blkdev->cache_unsafe = false;
+
+    if (xenstore_read_be_int(&blkdev->xendev, "suse-diskcache-disable-flush", &enable) == 0)
+	    blkdev->cache_unsafe = !!enable;
+}
+
 static int blk_init(struct XenDevice *xendev)
 {
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
@@ -864,6 +874,7 @@ static int blk_init(struct XenDevice *xendev)
    xenstore_write_be_int(&blkdev->xendev, "info", info);

    blk_parse_discard(blkdev);
+    blk_parse_cache_unsafe(blkdev);

    g_free(directiosafe);
    return 0;
@@ -890,6 +901,7 @@ static int blk_connect(struct XenDevice *xendev)
    int pers, index, qflags;
    bool readonly = true;
    bool writethrough = true;
+    Error *errp = NULL;

    /* read-only ? */
    if (blkdev->directiosafe) {
@@ -906,6 +918,9 @@ static int blk_connect(struct XenDevice *xendev)
        qflags |= BDRV_O_UNMAP;
    }

+    if (blkdev->cache_unsafe)
+        qflags |= BDRV_O_NO_FLUSH;
+
    /* init qemu block driver */
    index = (blkdev->xendev.dev - 202 * 256) / 16;
    blkdev->dinfo = drive_get(IF_XEN, 0, index);
@@ -942,7 +957,14 @@ static int blk_connect(struct XenDevice *xendev)
         * so we can blk_unref() unconditionally */
        blk_ref(blkdev->blk);
    }
-    blk_attach_dev_nofail(blkdev->blk, blkdev);
+    blk_attach_dev_nofail(blkdev->blk, blkdev, true);
+    if (!monitor_add_blk(blkdev->blk, g_strdup(blkdev->dev), &errp)) {
+        xen_be_printf(&blkdev->xendev, 0, "error: %s\n",
+                      error_get_pretty(errp));
+        error_free(errp);
+        return -1;
+    }
+
    blkdev->file_size = blk_getlength(blkdev->blk);
    if (blkdev->file_size < 0) {
        BlockDriverState *bs = blk_bs(blkdev->blk);
@@ -1045,6 +1067,7 @@ static void blk_disconnect(struct XenDevice *xendev)

    if (blkdev->blk) {
        blk_detach_dev(blkdev->blk, blkdev);
+        monitor_remove_blk(blkdev->blk);
        blk_unref(blkdev->blk);
        blkdev->blk = NULL;
    }
@@ -1109,6 +1132,16 @@ static void blk_event(struct XenDevice *xendev)
    qemu_bh_schedule(blkdev->bh);
 }

+extern void xen_blk_resize_update(void *dev);
+void xen_blk_resize_update(void *dev)
+{
+    struct XenBlkDev *blkdev = dev;
+    blkdev->file_size = blk_getlength(blkdev->blk);
+    xenstore_write_be_int64(&blkdev->xendev, "sectors",
+                            blkdev->file_size / blkdev->file_blk);
+    xen_be_set_state(&blkdev->xendev, blkdev->xendev.be_state);
+}
+
 struct XenDevOps xen_blkdev_ops = {
    .size       = sizeof(struct XenBlkDev),
    .flags      = DEVOPS_FLAG_NEED_GNTDEV,
--- a/hw/char/bcm2835_aux.c
+++ b/hw/char/bcm2835_aux.c
@@ -245,7 +245,9 @@ static const MemoryRegionOps bcm2835_aux_ops = {
    .read = bcm2835_aux_read,
    .write = bcm2835_aux_write,
    .endianness = DEVICE_NATIVE_ENDIAN,
-    .valid.min_access_size = 4,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
+    .valid.min_access_size = 1,
    .valid.max_access_size = 4,
 };

--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -152,8 +152,9 @@ static void serial_update_parameters(SerialState *s)
    int speed, parity, data_bits, stop_bits, frame_size;
    QEMUSerialSetParams ssp;

-    if (s->divider == 0)
+    if (s->divider == 0 || s->divider > s->baudbase) {
        return;
+    }

    /* Start bit. */
    frame_size = 1;
@@ -868,6 +869,16 @@ void serial_realize_core(SerialState *s, Error **errp)
 void serial_exit_core(SerialState *s)
 {
    qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, NULL);
+
+    timer_del(s->modem_status_poll);
+    timer_free(s->modem_status_poll);
+
+    timer_del(s->fifo_timeout_timer);
+    timer_free(s->fifo_timeout_timer);
+
+    fifo8_destroy(&s->recv_fifo);
+    fifo8_destroy(&s->xmit_fifo);
+
    qemu_unregister_reset(serial_reset, s);
 }

--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -55,6 +55,7 @@
 #include "exec/address-spaces.h"
 #include "hw/boards.h"
 #include "qemu/cutils.h"
+#include "hw/xen/xen.h"

 #include <zlib.h>

@@ -818,7 +819,10 @@ static void *rom_set_mr(Rom *rom, Object *owner, const char *name)
    void *data;

    rom->mr = g_malloc(sizeof(*rom->mr));
-    memory_region_init_resizeable_ram(rom->mr, owner, name,
+    if (xen_enabled())
+        memory_region_init_ram(rom->mr, owner, name, rom->datasize, &error_fatal);
+    else
+        memory_region_init_resizeable_ram(rom->mr, owner, name,
                                      rom->datasize, rom->romsize,
                                      fw_cfg_resized,
                                      &error_fatal);
@@ -1111,7 +1115,7 @@ int rom_copy(uint8_t *dest, hwaddr addr, size_t size)
        if (rom->addr + rom->romsize < addr) {
            continue;
        }
-        if (rom->addr > end) {
+        if (rom->addr > end || rom->addr < addr) {
            break;
        }

--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -177,11 +177,12 @@

 struct CirrusVGAState;
 typedef void (*cirrus_bitblt_rop_t) (struct CirrusVGAState *s,
-                                     uint8_t * dst, const uint8_t * src,
+                                     uint32_t dstaddr, uint32_t srcaddr,
 				     int dstpitch, int srcpitch,
 				     int bltwidth, int bltheight);
 typedef void (*cirrus_fill_t)(struct CirrusVGAState *s,
-                              uint8_t *dst, int dst_pitch, int width, int height);
+                              uint32_t dstaddr, int dst_pitch,
+                              int width, int height);

 typedef struct CirrusVGAState {
    VGACommonState vga;
@@ -272,12 +273,14 @@ static void cirrus_update_memory_access(CirrusVGAState *s);
 static bool blit_region_is_unsafe(struct CirrusVGAState *s,
                                  int32_t pitch, int32_t addr)
 {
+    if (!pitch) {
+        return true;
+    }
    if (pitch < 0) {
        int64_t min = addr
-            + ((int64_t)s->cirrus_blt_height-1) * pitch;
-        int32_t max = addr
-            + s->cirrus_blt_width;
-        if (min < 0 || max > s->vga.vram_size) {
+            + ((int64_t)s->cirrus_blt_height - 1) * pitch
+            - s->cirrus_blt_width;
+        if (min < -1 || addr >= s->vga.vram_size) {
            return true;
        }
    } else {
@@ -291,7 +294,7 @@ static bool blit_region_is_unsafe(struct CirrusVGAState *s,
    return false;
 }

-static bool blit_is_unsafe(struct CirrusVGAState *s)
+static bool blit_is_unsafe(struct CirrusVGAState *s, bool dst_only)
 {
    /* should be the case, see cirrus_bitblt_start */
    assert(s->cirrus_blt_width > 0);
@@ -302,11 +305,14 @@ static bool blit_is_unsafe(struct CirrusVGAState *s)
    }

    if (blit_region_is_unsafe(s, s->cirrus_blt_dstpitch,
-                              s->cirrus_blt_dstaddr & s->cirrus_addr_mask)) {
+                              s->cirrus_blt_dstaddr)) {
        return true;
    }
+    if (dst_only) {
+        return false;
+    }
    if (blit_region_is_unsafe(s, s->cirrus_blt_srcpitch,
-                              s->cirrus_blt_srcaddr & s->cirrus_addr_mask)) {
+                              s->cirrus_blt_srcaddr)) {
        return true;
    }

@@ -314,18 +320,57 @@ static bool blit_is_unsafe(struct CirrusVGAState *s)
 }

 static void cirrus_bitblt_rop_nop(CirrusVGAState *s,
-                                  uint8_t *dst,const uint8_t *src,
+                                  uint32_t dstaddr, uint32_t srcaddr,
                                  int dstpitch,int srcpitch,
                                  int bltwidth,int bltheight)
 {
 }

 static void cirrus_bitblt_fill_nop(CirrusVGAState *s,
-                                   uint8_t *dst,
+                                   uint32_t dstaddr,
                                   int dstpitch, int bltwidth,int bltheight)
 {
 }

+static inline uint8_t cirrus_src(CirrusVGAState *s, uint32_t srcaddr)
+{
+    if (s->cirrus_srccounter) {
+        /* cputovideo */
+        return s->cirrus_bltbuf[srcaddr & (CIRRUS_BLTBUFSIZE - 1)];
+    } else {
+        /* videotovideo */
+        return s->vga.vram_ptr[srcaddr & s->cirrus_addr_mask];
+    }
+}
+
+static inline uint16_t cirrus_src16(CirrusVGAState *s, uint32_t srcaddr)
+{
+    uint16_t *src;
+
+    if (s->cirrus_srccounter) {
+        /* cputovideo */
+        src = (void *)&s->cirrus_bltbuf[srcaddr & (CIRRUS_BLTBUFSIZE - 1) & ~1];
+    } else {
+        /* videotovideo */
+        src = (void *)&s->vga.vram_ptr[srcaddr & s->cirrus_addr_mask & ~1];
+    }
+    return *src;
+}
+
+static inline uint32_t cirrus_src32(CirrusVGAState *s, uint32_t srcaddr)
+{
+    uint32_t *src;
+
+    if (s->cirrus_srccounter) {
+        /* cputovideo */
+        src = (void *)&s->cirrus_bltbuf[srcaddr & (CIRRUS_BLTBUFSIZE - 1) & ~3];
+    } else {
+        /* videotovideo */
+        src = (void *)&s->vga.vram_ptr[srcaddr & s->cirrus_addr_mask & ~3];
+    }
+    return *src;
+}
+
 #define ROP_NAME 0
 #define ROP_FN(d, s) 0
 #include "cirrus_vga_rop.h"
@@ -655,25 +700,50 @@ static void cirrus_invalidate_region(CirrusVGAState * s, int off_begin,
    int off_cur;
    int off_cur_end;

+    if (off_pitch < 0) {
+        off_begin -= bytesperline - 1;
+    }
+
    for (y = 0; y < lines; y++) {
 	off_cur = off_begin;
 	off_cur_end = (off_cur + bytesperline) & s->cirrus_addr_mask;
+        assert(off_cur_end >= off_cur);
        memory_region_set_dirty(&s->vga.vram, off_cur, off_cur_end - off_cur);
 	off_begin += off_pitch;
    }
 }

-static int cirrus_bitblt_common_patterncopy(CirrusVGAState * s,
-					    const uint8_t * src)
+static int cirrus_bitblt_common_patterncopy(CirrusVGAState *s)
 {
-    uint8_t *dst;
+    uint32_t patternsize;
+    bool videosrc = !s->cirrus_srccounter;

-    dst = s->vga.vram_ptr + (s->cirrus_blt_dstaddr & s->cirrus_addr_mask);
+    if (videosrc) {
+        switch (s->vga.get_bpp(&s->vga)) {
+        case 8:
+            patternsize = 64;
+            break;
+        case 15:
+        case 16:
+            patternsize = 128;
+            break;
+        case 24:
+        case 32:
+        default:
+            patternsize = 256;
+            break;
+        }
+        s->cirrus_blt_srcaddr &= ~(patternsize - 1);
+        if (s->cirrus_blt_srcaddr + patternsize > s->vga.vram_size) {
+            return 0;
+        }
+    }

-    if (blit_is_unsafe(s))
+    if (blit_is_unsafe(s, true))
        return 0;

-    (*s->cirrus_rop) (s, dst, src,
+    (*s->cirrus_rop) (s, s->cirrus_blt_dstaddr,
+                      videosrc ? s->cirrus_blt_srcaddr : 0,
                      s->cirrus_blt_dstpitch, 0,
                      s->cirrus_blt_width, s->cirrus_blt_height);
    cirrus_invalidate_region(s, s->cirrus_blt_dstaddr,
@@ -688,11 +758,11 @@ static int cirrus_bitblt_solidfill(CirrusVGAState *s, int blt_rop)
 {
    cirrus_fill_t rop_func;

-    if (blit_is_unsafe(s)) {
+    if (blit_is_unsafe(s, true)) {
        return 0;
    }
    rop_func = cirrus_fill[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
-    rop_func(s, s->vga.vram_ptr + (s->cirrus_blt_dstaddr & s->cirrus_addr_mask),
+    rop_func(s, s->cirrus_blt_dstaddr,
             s->cirrus_blt_dstpitch,
             s->cirrus_blt_width, s->cirrus_blt_height);
    cirrus_invalidate_region(s, s->cirrus_blt_dstaddr,
@@ -710,12 +780,10 @@ static int cirrus_bitblt_solidfill(CirrusVGAState *s, int blt_rop)

 static int cirrus_bitblt_videotovideo_patterncopy(CirrusVGAState * s)
 {
-    return cirrus_bitblt_common_patterncopy(s,
-					    s->vga.vram_ptr + ((s->cirrus_blt_srcaddr & ~7) &
-                                            s->cirrus_addr_mask));
+    return cirrus_bitblt_common_patterncopy(s);
 }

-static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h)
+static int cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h)
 {
    int sx = 0, sy = 0;
    int dx = 0, dy = 0;
@@ -729,6 +797,9 @@ static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h)
        int width, height;

        depth = s->vga.get_bpp(&s->vga) / 8;
+        if (!depth) {
+            return 0;
+        }
        s->vga.get_resolution(&s->vga, &width, &height);

        /* extra x, y */
@@ -758,23 +829,15 @@ static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h)
        }
    }

-    /* we have to flush all pending changes so that the copy
-       is generated at the appropriate moment in time */
-    if (notify)
-        graphic_hw_update(s->vga.con);
-
-    (*s->cirrus_rop) (s, s->vga.vram_ptr +
-		      (s->cirrus_blt_dstaddr & s->cirrus_addr_mask),
-		      s->vga.vram_ptr +
-		      (s->cirrus_blt_srcaddr & s->cirrus_addr_mask),
+    (*s->cirrus_rop) (s, s->cirrus_blt_dstaddr,
+                      s->cirrus_blt_srcaddr,
 		      s->cirrus_blt_dstpitch, s->cirrus_blt_srcpitch,
 		      s->cirrus_blt_width, s->cirrus_blt_height);

    if (notify) {
-        qemu_console_copy(s->vga.con,
-			  sx, sy, dx, dy,
-			  s->cirrus_blt_width / depth,
-			  s->cirrus_blt_height);
+        dpy_gfx_update(s->vga.con, dx, dy,
+                       s->cirrus_blt_width / depth,
+                       s->cirrus_blt_height);
    }

    /* we don't have to notify the display that this portion has
@@ -783,18 +846,18 @@ static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h)
    cirrus_invalidate_region(s, s->cirrus_blt_dstaddr,
 				s->cirrus_blt_dstpitch, s->cirrus_blt_width,
 				s->cirrus_blt_height);
+
+    return 1;
 }

 static int cirrus_bitblt_videotovideo_copy(CirrusVGAState * s)
 {
-    if (blit_is_unsafe(s))
+    if (blit_is_unsafe(s, false))
        return 0;

-    cirrus_do_copy(s, s->cirrus_blt_dstaddr - s->vga.start_addr,
+    return cirrus_do_copy(s, s->cirrus_blt_dstaddr - s->vga.start_addr,
            s->cirrus_blt_srcaddr - s->vga.start_addr,
            s->cirrus_blt_width, s->cirrus_blt_height);
-
-    return 1;
 }

 /***************************************
@@ -810,16 +873,15 @@ static void cirrus_bitblt_cputovideo_next(CirrusVGAState * s)

    if (s->cirrus_srccounter > 0) {
        if (s->cirrus_blt_mode & CIRRUS_BLTMODE_PATTERNCOPY) {
-            cirrus_bitblt_common_patterncopy(s, s->cirrus_bltbuf);
+            cirrus_bitblt_common_patterncopy(s);
        the_end:
            s->cirrus_srccounter = 0;
            cirrus_bitblt_reset(s);
        } else {
            /* at least one scan line */
            do {
-                (*s->cirrus_rop)(s, s->vga.vram_ptr +
-                                 (s->cirrus_blt_dstaddr & s->cirrus_addr_mask),
-                                  s->cirrus_bltbuf, 0, 0, s->cirrus_blt_width, 1);
+                (*s->cirrus_rop)(s, s->cirrus_blt_dstaddr,
+                                 0, 0, 0, s->cirrus_blt_width, 1);
                cirrus_invalidate_region(s, s->cirrus_blt_dstaddr, 0,
                                         s->cirrus_blt_width, 1);
                s->cirrus_blt_dstaddr += s->cirrus_blt_dstpitch;
@@ -865,6 +927,10 @@ static int cirrus_bitblt_cputovideo(CirrusVGAState * s)
 {
    int w;

+    if (blit_is_unsafe(s, true)) {
+        return 0;
+    }
+
    s->cirrus_blt_mode &= ~CIRRUS_BLTMODE_MEMSYSSRC;
    s->cirrus_srcptr = &s->cirrus_bltbuf[0];
    s->cirrus_srcptr_end = &s->cirrus_bltbuf[0];
@@ -890,6 +956,10 @@ static int cirrus_bitblt_cputovideo(CirrusVGAState * s)
 	}
        s->cirrus_srccounter = s->cirrus_blt_srcpitch * s->cirrus_blt_height;
    }
+
+    /* the blit_is_unsafe call above should catch this */
+    assert(s->cirrus_blt_srcpitch <= CIRRUS_BLTBUFSIZE);
+
    s->cirrus_srcptr = s->cirrus_bltbuf;
    s->cirrus_srcptr_end = s->cirrus_bltbuf + s->cirrus_blt_srcpitch;
    cirrus_update_memory_access(s);
@@ -937,6 +1007,9 @@ static void cirrus_bitblt_start(CirrusVGAState * s)
    s->cirrus_blt_modeext = s->vga.gr[0x33];
    blt_rop = s->vga.gr[0x32];

+    s->cirrus_blt_dstaddr &= s->cirrus_addr_mask;
+    s->cirrus_blt_srcaddr &= s->cirrus_addr_mask;
+
 #ifdef DEBUG_BITBLT
    printf("rop=0x%02x mode=0x%02x modeext=0x%02x w=%d h=%d dpitch=%d spitch=%d daddr=0x%08x saddr=0x%08x writemask=0x%02x\n",
           blt_rop,
@@ -1956,15 +2029,14 @@ static void cirrus_mem_writeb_mode4and5_8bpp(CirrusVGAState * s,
    unsigned val = mem_value;
    uint8_t *dst;

-    dst = s->vga.vram_ptr + (offset &= s->cirrus_addr_mask);
    for (x = 0; x < 8; x++) {
+        dst = s->vga.vram_ptr + ((offset + x) & s->cirrus_addr_mask);
 	if (val & 0x80) {
 	    *dst = s->cirrus_shadow_gr1;
 	} else if (mode == 5) {
 	    *dst = s->cirrus_shadow_gr0;
 	}
 	val <<= 1;
-	dst++;
    }
    memory_region_set_dirty(&s->vga.vram, offset, 8);
 }
@@ -1978,8 +2050,8 @@ static void cirrus_mem_writeb_mode4and5_16bpp(CirrusVGAState * s,
    unsigned val = mem_value;
    uint8_t *dst;

-    dst = s->vga.vram_ptr + (offset &= s->cirrus_addr_mask);
    for (x = 0; x < 8; x++) {
+        dst = s->vga.vram_ptr + ((offset + 2 * x) & s->cirrus_addr_mask & ~1);
 	if (val & 0x80) {
 	    *dst = s->cirrus_shadow_gr1;
 	    *(dst + 1) = s->vga.gr[0x11];
@@ -1988,7 +2060,6 @@ static void cirrus_mem_writeb_mode4and5_16bpp(CirrusVGAState * s,
 	    *(dst + 1) = s->vga.gr[0x10];
 	}
 	val <<= 1;
-	dst += 2;
    }
    memory_region_set_dirty(&s->vga.vram, offset, 16);
 }
--- a/hw/display/cirrus_vga_rop.h
+++ b/hw/display/cirrus_vga_rop.h
@@ -22,31 +22,65 @@
 * THE SOFTWARE.
 */

-static inline void glue(rop_8_,ROP_NAME)(uint8_t *dst, uint8_t src)
+static inline void glue(rop_8_, ROP_NAME)(CirrusVGAState *s,
+                                          uint32_t dstaddr, uint8_t src)
 {
+    uint8_t *dst = &s->vga.vram_ptr[dstaddr & s->cirrus_addr_mask];
    *dst = ROP_FN(*dst, src);
 }

-static inline void glue(rop_16_,ROP_NAME)(uint16_t *dst, uint16_t src)
+static inline void glue(rop_tr_8_, ROP_NAME)(CirrusVGAState *s,
+                                             uint32_t dstaddr, uint8_t src,
+                                             uint8_t transp)
 {
+    uint8_t *dst = &s->vga.vram_ptr[dstaddr & s->cirrus_addr_mask];
+    uint8_t pixel = ROP_FN(*dst, src);
+    if (pixel != transp) {
+        *dst = pixel;
+    }
+}
+
+static inline void glue(rop_16_, ROP_NAME)(CirrusVGAState *s,
+                                           uint32_t dstaddr, uint16_t src)
+{
+    uint16_t *dst = (uint16_t *)
+        (&s->vga.vram_ptr[dstaddr & s->cirrus_addr_mask & ~1]);
    *dst = ROP_FN(*dst, src);
 }

-static inline void glue(rop_32_,ROP_NAME)(uint32_t *dst, uint32_t src)
+static inline void glue(rop_tr_16_, ROP_NAME)(CirrusVGAState *s,
+                                              uint32_t dstaddr, uint16_t src,
+                                              uint16_t transp)
 {
+    uint16_t *dst = (uint16_t *)
+        (&s->vga.vram_ptr[dstaddr & s->cirrus_addr_mask & ~1]);
+    uint16_t pixel = ROP_FN(*dst, src);
+    if (pixel != transp) {
+        *dst = pixel;
+    }
+}
+
+static inline void glue(rop_32_, ROP_NAME)(CirrusVGAState *s,
+                                           uint32_t dstaddr, uint32_t src)
+{
+    uint32_t *dst = (uint32_t *)
+        (&s->vga.vram_ptr[dstaddr & s->cirrus_addr_mask & ~3]);
    *dst = ROP_FN(*dst, src);
 }

-#define ROP_OP(d, s) glue(rop_8_,ROP_NAME)(d, s)
-#define ROP_OP_16(d, s) glue(rop_16_,ROP_NAME)(d, s)
-#define ROP_OP_32(d, s) glue(rop_32_,ROP_NAME)(d, s)
+#define ROP_OP(st, d, s)           glue(rop_8_, ROP_NAME)(st, d, s)
+#define ROP_OP_TR(st, d, s, t)     glue(rop_tr_8_, ROP_NAME)(st, d, s, t)
+#define ROP_OP_16(st, d, s)        glue(rop_16_, ROP_NAME)(st, d, s)
+#define ROP_OP_TR_16(st, d, s, t)  glue(rop_tr_16_, ROP_NAME)(st, d, s, t)
+#define ROP_OP_32(st, d, s)        glue(rop_32_, ROP_NAME)(st, d, s)
 #undef ROP_FN

 static void
 glue(cirrus_bitblt_rop_fwd_, ROP_NAME)(CirrusVGAState *s,
-                             uint8_t *dst,const uint8_t *src,
-                             int dstpitch,int srcpitch,
-                             int bltwidth,int bltheight)
+                                       uint32_t dstaddr,
+                                       uint32_t srcaddr,
+                                       int dstpitch, int srcpitch,
+                                       int bltwidth, int bltheight)
 {
    int x,y;
    dstpitch -= bltwidth;
@@ -58,134 +92,139 @@ glue(cirrus_bitblt_rop_fwd_, ROP_NAME)(CirrusVGAState *s,

    for (y = 0; y < bltheight; y++) {
        for (x = 0; x < bltwidth; x++) {
-            ROP_OP(dst, *src);
-            dst++;
-            src++;
+            ROP_OP(s, dstaddr, cirrus_src(s, srcaddr));
+            dstaddr++;
+            srcaddr++;
        }
-        dst += dstpitch;
-        src += srcpitch;
+        dstaddr += dstpitch;
+        srcaddr += srcpitch;
    }
 }

 static void
 glue(cirrus_bitblt_rop_bkwd_, ROP_NAME)(CirrusVGAState *s,
-                                        uint8_t *dst,const uint8_t *src,
-                                        int dstpitch,int srcpitch,
-                                        int bltwidth,int bltheight)
+                                        uint32_t dstaddr,
+                                        uint32_t srcaddr,
+                                        int dstpitch, int srcpitch,
+                                        int bltwidth, int bltheight)
 {
    int x,y;
    dstpitch += bltwidth;
    srcpitch += bltwidth;
    for (y = 0; y < bltheight; y++) {
        for (x = 0; x < bltwidth; x++) {
-            ROP_OP(dst, *src);
-            dst--;
-            src--;
+            ROP_OP(s, dstaddr, cirrus_src(s, srcaddr));
+            dstaddr--;
+            srcaddr--;
        }
-        dst += dstpitch;
-        src += srcpitch;
+        dstaddr += dstpitch;
+        srcaddr += srcpitch;
    }
 }

 static void
 glue(glue(cirrus_bitblt_rop_fwd_transp_, ROP_NAME),_8)(CirrusVGAState *s,
-						       uint8_t *dst,const uint8_t *src,
-						       int dstpitch,int srcpitch,
-						       int bltwidth,int bltheight)
+                                                       uint32_t dstaddr,
+                                                       uint32_t srcaddr,
+                                                       int dstpitch,
+                                                       int srcpitch,
+                                                       int bltwidth,
+                                                       int bltheight)
 {
    int x,y;
-    uint8_t p;
+    uint8_t transp = s->vga.gr[0x34];
    dstpitch -= bltwidth;
    srcpitch -= bltwidth;
+
+    if (bltheight > 1 && (dstpitch < 0 || srcpitch < 0)) {
+        return;
+    }
+
    for (y = 0; y < bltheight; y++) {
        for (x = 0; x < bltwidth; x++) {
-	    p = *dst;
-            ROP_OP(&p, *src);
-	    if (p != s->vga.gr[0x34]) *dst = p;
-            dst++;
-            src++;
+            ROP_OP_TR(s, dstaddr, cirrus_src(s, srcaddr), transp);
+            dstaddr++;
+            srcaddr++;
        }
-        dst += dstpitch;
-        src += srcpitch;
+        dstaddr += dstpitch;
+        srcaddr += srcpitch;
    }
 }

 static void
 glue(glue(cirrus_bitblt_rop_bkwd_transp_, ROP_NAME),_8)(CirrusVGAState *s,
-							uint8_t *dst,const uint8_t *src,
-							int dstpitch,int srcpitch,
-							int bltwidth,int bltheight)
+                                                        uint32_t dstaddr,
+                                                        uint32_t srcaddr,
+                                                        int dstpitch,
+                                                        int srcpitch,
+                                                        int bltwidth,
+                                                        int bltheight)
 {
    int x,y;
-    uint8_t p;
+    uint8_t transp = s->vga.gr[0x34];
    dstpitch += bltwidth;
    srcpitch += bltwidth;
    for (y = 0; y < bltheight; y++) {
        for (x = 0; x < bltwidth; x++) {
-	    p = *dst;
-            ROP_OP(&p, *src);
-	    if (p != s->vga.gr[0x34]) *dst = p;
-            dst--;
-            src--;
+            ROP_OP_TR(s, dstaddr, cirrus_src(s, srcaddr), transp);
+            dstaddr--;
+            srcaddr--;
        }
-        dst += dstpitch;
-        src += srcpitch;
+        dstaddr += dstpitch;
+        srcaddr += srcpitch;
    }
 }

 static void
 glue(glue(cirrus_bitblt_rop_fwd_transp_, ROP_NAME),_16)(CirrusVGAState *s,
-							uint8_t *dst,const uint8_t *src,
-							int dstpitch,int srcpitch,
-							int bltwidth,int bltheight)
+                                                        uint32_t dstaddr,
+                                                        uint32_t srcaddr,
+                                                        int dstpitch,
+                                                        int srcpitch,
+                                                        int bltwidth,
+                                                        int bltheight)
 {
    int x,y;
-    uint8_t p1, p2;
+    uint16_t transp = s->vga.gr[0x34] | (uint16_t)s->vga.gr[0x35] << 8;
    dstpitch -= bltwidth;
    srcpitch -= bltwidth;
+
+    if (bltheight > 1 && (dstpitch < 0 || srcpitch < 0)) {
+        return;
+    }
+
    for (y = 0; y < bltheight; y++) {
        for (x = 0; x < bltwidth; x+=2) {
-	    p1 = *dst;
-	    p2 = *(dst+1);
-            ROP_OP(&p1, *src);
-            ROP_OP(&p2, *(src + 1));
-	    if ((p1 != s->vga.gr[0x34]) || (p2 != s->vga.gr[0x35])) {
-		*dst = p1;
-		*(dst+1) = p2;
-	    }
-            dst+=2;
-            src+=2;
+            ROP_OP_TR_16(s, dstaddr, cirrus_src16(s, srcaddr), transp);
+            dstaddr += 2;
+            srcaddr += 2;
        }
-        dst += dstpitch;
-        src += srcpitch;
+        dstaddr += dstpitch;
+        srcaddr += srcpitch;
    }
 }

 static void
 glue(glue(cirrus_bitblt_rop_bkwd_transp_, ROP_NAME),_16)(CirrusVGAState *s,
-							 uint8_t *dst,const uint8_t *src,
-							 int dstpitch,int srcpitch,
-							 int bltwidth,int bltheight)
+                                                         uint32_t dstaddr,
+                                                         uint32_t srcaddr,
+                                                         int dstpitch,
+                                                         int srcpitch,
+                                                         int bltwidth,
+                                                         int bltheight)
 {
    int x,y;
-    uint8_t p1, p2;
+    uint16_t transp = s->vga.gr[0x34] | (uint16_t)s->vga.gr[0x35] << 8;
    dstpitch += bltwidth;
    srcpitch += bltwidth;
    for (y = 0; y < bltheight; y++) {
        for (x = 0; x < bltwidth; x+=2) {
-	    p1 = *(dst-1);
-	    p2 = *dst;
-            ROP_OP(&p1, *(src - 1));
-            ROP_OP(&p2, *src);
-	    if ((p1 != s->vga.gr[0x34]) || (p2 != s->vga.gr[0x35])) {
-		*(dst-1) = p1;
-		*dst = p2;
-	    }
-            dst-=2;
-            src-=2;
+            ROP_OP_TR_16(s, dstaddr - 1, cirrus_src16(s, srcaddr - 1), transp);
+            dstaddr -= 2;
+            srcaddr -= 2;
        }
-        dst += dstpitch;
-        src += srcpitch;
+        dstaddr += dstpitch;
+        srcaddr += srcpitch;
    }
 }

--- a/hw/display/cirrus_vga_rop2.h
+++ b/hw/display/cirrus_vga_rop2.h
@@ -23,30 +23,32 @@
 */

 #if DEPTH == 8
-#define PUTPIXEL()    ROP_OP(&d[0], col)
+#define PUTPIXEL(s, a, c)    ROP_OP(s, a, c)
 #elif DEPTH == 16
-#define PUTPIXEL()    ROP_OP_16((uint16_t *)&d[0], col)
+#define PUTPIXEL(s, a, c)    ROP_OP_16(s, a, c)
 #elif DEPTH == 24
-#define PUTPIXEL()    ROP_OP(&d[0], col);        \
-                      ROP_OP(&d[1], (col >> 8)); \
-                      ROP_OP(&d[2], (col >> 16))
+#define PUTPIXEL(s, a, c)    do {          \
+        ROP_OP(s, a,     c);               \
+        ROP_OP(s, a + 1, (col >> 8));      \
+        ROP_OP(s, a + 2, (col >> 16));     \
+    } while (0)
 #elif DEPTH == 32
-#define PUTPIXEL()    ROP_OP_32(((uint32_t *)&d[0]), col)
+#define PUTPIXEL(s, a, c)    ROP_OP_32(s, a, c)
 #else
 #error unsupported DEPTH
 #endif

 static void
 glue(glue(glue(cirrus_patternfill_, ROP_NAME), _),DEPTH)
-     (CirrusVGAState * s, uint8_t * dst,
-      const uint8_t * src,
+     (CirrusVGAState *s, uint32_t dstaddr,
+      uint32_t srcaddr,
      int dstpitch, int srcpitch,
      int bltwidth, int bltheight)
 {
-    uint8_t *d;
+    uint32_t addr;
    int x, y, pattern_y, pattern_pitch, pattern_x;
    unsigned int col;
-    const uint8_t *src1;
+    uint32_t src1addr;
 #if DEPTH == 24
    int skipleft = s->vga.gr[0x2f] & 0x1f;
 #else
@@ -63,42 +65,44 @@ glue(glue(glue(cirrus_patternfill_, ROP_NAME), _),DEPTH)
    pattern_y = s->cirrus_blt_srcaddr & 7;
    for(y = 0; y < bltheight; y++) {
        pattern_x = skipleft;
-        d = dst + skipleft;
-        src1 = src + pattern_y * pattern_pitch;
+        addr = dstaddr + skipleft;
+        src1addr = srcaddr + pattern_y * pattern_pitch;
        for (x = skipleft; x < bltwidth; x += (DEPTH / 8)) {
 #if DEPTH == 8
-            col = src1[pattern_x];
+            col = cirrus_src(s, src1addr + pattern_x);
            pattern_x = (pattern_x + 1) & 7;
 #elif DEPTH == 16
-            col = ((uint16_t *)(src1 + pattern_x))[0];
+            col = cirrus_src16(s, src1addr + pattern_x);
            pattern_x = (pattern_x + 2) & 15;
 #elif DEPTH == 24
            {
-                const uint8_t *src2 = src1 + pattern_x * 3;
-                col = src2[0] | (src2[1] << 8) | (src2[2] << 16);
+                uint32_t src2addr = src1addr + pattern_x * 3;
+                col = cirrus_src(s, src2addr) |
+                    (cirrus_src(s, src2addr + 1) << 8) |
+                    (cirrus_src(s, src2addr + 2) << 16);
                pattern_x = (pattern_x + 1) & 7;
            }
 #else
-            col = ((uint32_t *)(src1 + pattern_x))[0];
+            col = cirrus_src32(s, src1addr + pattern_x);
            pattern_x = (pattern_x + 4) & 31;
 #endif
-            PUTPIXEL();
-            d += (DEPTH / 8);
+            PUTPIXEL(s, addr, col);
+            addr += (DEPTH / 8);
        }
        pattern_y = (pattern_y + 1) & 7;
-        dst += dstpitch;
+        dstaddr += dstpitch;
    }
 }

 /* NOTE: srcpitch is ignored */
 static void
 glue(glue(glue(cirrus_colorexpand_transp_, ROP_NAME), _),DEPTH)
-     (CirrusVGAState * s, uint8_t * dst,
-      const uint8_t * src,
+     (CirrusVGAState *s, uint32_t dstaddr,
+      uint32_t srcaddr,
      int dstpitch, int srcpitch,
      int bltwidth, int bltheight)
 {
-    uint8_t *d;
+    uint32_t addr;
    int x, y;
    unsigned bits, bits_xor;
    unsigned int col;
@@ -122,33 +126,33 @@ glue(glue(glue(cirrus_colorexpand_transp_, ROP_NAME), _),DEPTH)

    for(y = 0; y < bltheight; y++) {
        bitmask = 0x80 >> srcskipleft;
-        bits = *src++ ^ bits_xor;
-        d = dst + dstskipleft;
+        bits = cirrus_src(s, srcaddr++) ^ bits_xor;
+        addr = dstaddr + dstskipleft;
        for (x = dstskipleft; x < bltwidth; x += (DEPTH / 8)) {
            if ((bitmask & 0xff) == 0) {
                bitmask = 0x80;
-                bits = *src++ ^ bits_xor;
+                bits = cirrus_src(s, srcaddr++) ^ bits_xor;
            }
            index = (bits & bitmask);
            if (index) {
-                PUTPIXEL();
+                PUTPIXEL(s, addr, col);
            }
-            d += (DEPTH / 8);
+            addr += (DEPTH / 8);
            bitmask >>= 1;
        }
-        dst += dstpitch;
+        dstaddr += dstpitch;
    }
 }

 static void
 glue(glue(glue(cirrus_colorexpand_, ROP_NAME), _),DEPTH)
-     (CirrusVGAState * s, uint8_t * dst,
-      const uint8_t * src,
+     (CirrusVGAState *s, uint32_t dstaddr,
+      uint32_t srcaddr,
      int dstpitch, int srcpitch,
      int bltwidth, int bltheight)
 {
    uint32_t colors[2];
-    uint8_t *d;
+    uint32_t addr;
    int x, y;
    unsigned bits;
    unsigned int col;
@@ -160,30 +164,30 @@ glue(glue(glue(cirrus_colorexpand_, ROP_NAME), _),DEPTH)
    colors[1] = s->cirrus_blt_fgcol;
    for(y = 0; y < bltheight; y++) {
        bitmask = 0x80 >> srcskipleft;
-        bits = *src++;
-        d = dst + dstskipleft;
+        bits = cirrus_src(s, srcaddr++);
+        addr = dstaddr + dstskipleft;
        for (x = dstskipleft; x < bltwidth; x += (DEPTH / 8)) {
            if ((bitmask & 0xff) == 0) {
                bitmask = 0x80;
-                bits = *src++;
+                bits = cirrus_src(s, srcaddr++);
            }
            col = colors[!!(bits & bitmask)];
-            PUTPIXEL();
-            d += (DEPTH / 8);
+            PUTPIXEL(s, addr, col);
+            addr += (DEPTH / 8);
            bitmask >>= 1;
        }
-        dst += dstpitch;
+        dstaddr += dstpitch;
    }
 }

 static void
 glue(glue(glue(cirrus_colorexpand_pattern_transp_, ROP_NAME), _),DEPTH)
-     (CirrusVGAState * s, uint8_t * dst,
-      const uint8_t * src,
+     (CirrusVGAState *s, uint32_t dstaddr,
+      uint32_t srcaddr,
      int dstpitch, int srcpitch,
      int bltwidth, int bltheight)
 {
-    uint8_t *d;
+    uint32_t addr;
    int x, y, bitpos, pattern_y;
    unsigned int bits, bits_xor;
    unsigned int col;
@@ -205,30 +209,30 @@ glue(glue(glue(cirrus_colorexpand_pattern_transp_, ROP_NAME), _),DEPTH)
    pattern_y = s->cirrus_blt_srcaddr & 7;

    for(y = 0; y < bltheight; y++) {
-        bits = src[pattern_y] ^ bits_xor;
+        bits = cirrus_src(s, srcaddr + pattern_y) ^ bits_xor;
        bitpos = 7 - srcskipleft;
-        d = dst + dstskipleft;
+        addr = dstaddr + dstskipleft;
        for (x = dstskipleft; x < bltwidth; x += (DEPTH / 8)) {
            if ((bits >> bitpos) & 1) {
-                PUTPIXEL();
+                PUTPIXEL(s, addr, col);
            }
-            d += (DEPTH / 8);
+            addr += (DEPTH / 8);
            bitpos = (bitpos - 1) & 7;
        }
        pattern_y = (pattern_y + 1) & 7;
-        dst += dstpitch;
+        dstaddr += dstpitch;
    }
 }

 static void
 glue(glue(glue(cirrus_colorexpand_pattern_, ROP_NAME), _),DEPTH)
-     (CirrusVGAState * s, uint8_t * dst,
-      const uint8_t * src,
+     (CirrusVGAState *s, uint32_t dstaddr,
+      uint32_t srcaddr,
      int dstpitch, int srcpitch,
      int bltwidth, int bltheight)
 {
    uint32_t colors[2];
-    uint8_t *d;
+    uint32_t addr;
    int x, y, bitpos, pattern_y;
    unsigned int bits;
    unsigned int col;
@@ -240,40 +244,39 @@ glue(glue(glue(cirrus_colorexpand_pattern_, ROP_NAME), _),DEPTH)
    pattern_y = s->cirrus_blt_srcaddr & 7;

    for(y = 0; y < bltheight; y++) {
-        bits = src[pattern_y];
+        bits = cirrus_src(s, srcaddr + pattern_y);
        bitpos = 7 - srcskipleft;
-        d = dst + dstskipleft;
+        addr = dstaddr + dstskipleft;
        for (x = dstskipleft; x < bltwidth; x += (DEPTH / 8)) {
            col = colors[(bits >> bitpos) & 1];
-            PUTPIXEL();
-            d += (DEPTH / 8);
+            PUTPIXEL(s, addr, col);
+            addr += (DEPTH / 8);
            bitpos = (bitpos - 1) & 7;
        }
        pattern_y = (pattern_y + 1) & 7;
-        dst += dstpitch;
+        dstaddr += dstpitch;
    }
 }

 static void
 glue(glue(glue(cirrus_fill_, ROP_NAME), _),DEPTH)
     (CirrusVGAState *s,
-      uint8_t *dst, int dst_pitch,
+      uint32_t dstaddr, int dst_pitch,
      int width, int height)
 {
-    uint8_t *d, *d1;
+    uint32_t addr;
    uint32_t col;
    int x, y;

    col = s->cirrus_blt_fgcol;

-    d1 = dst;
    for(y = 0; y < height; y++) {
-        d = d1;
+        addr = dstaddr;
        for(x = 0; x < width; x += (DEPTH / 8)) {
-            PUTPIXEL();
-            d += (DEPTH / 8);
+            PUTPIXEL(s, addr, col);
+            addr += (DEPTH / 8);
        }
-        d1 += dst_pitch;
+        dstaddr += dst_pitch;
    }
 }

--- a/hw/display/qxl-render.c
+++ b/hw/display/qxl-render.c
@@ -211,11 +211,18 @@ static QEMUCursor *qxl_cursor(PCIQXLDevice *qxl, QXLCursor *cursor)
    size_t size;

    c = cursor_alloc(cursor->header.width, cursor->header.height);
+
+    if (!c) {
+        qxl_set_guest_bug(qxl, "%s: cursor %ux%u alloc error", __func__,
+                cursor->header.width, cursor->header.height);
+        goto fail;
+    }
+
    c->hot_x = cursor->header.hot_spot_x;
    c->hot_y = cursor->header.hot_spot_y;
    switch (cursor->header.type) {
    case SPICE_CURSOR_TYPE_ALPHA:
-        size = sizeof(uint32_t) * cursor->header.width * cursor->header.height;
+        size = sizeof(uint32_t) * c->width * c->height;
        memcpy(c->data, cursor->chunk.data, size);
        if (qxl->debug > 2) {
            cursor_print_ascii_art(c, "qxl/alpha");
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -710,6 +710,9 @@ static void interface_release_resource(QXLInstance *sin,
    QXLReleaseRing *ring;
    uint64_t *item, id;

+    if (!ext.info) {
+        return;
+    }
    if (ext.group_id == MEMSLOT_GROUP_HOST) {
        /* host group -> vga mode update request */
        QXLCommandExt *cmdext = (void *)(intptr_t)(ext.info->id);
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -2,6 +2,7 @@
 * QEMU SM501 Device
 *
 * Copyright (c) 2008 Shin-ichiro KAWASAKI
+ * Copyright (c) 2016 BALATON Zoltan
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -23,6 +24,7 @@
 */

 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -34,14 +36,18 @@
 #include "qemu/range.h"
 #include "ui/pixel_ops.h"
 #include "exec/address-spaces.h"
+#include "qemu/bswap.h"

 /*
 * Status: 2010/05/07
 *   - Minimum implementation for Linux console : mmio regs and CRT layer.
 *   - 2D grapihcs acceleration partially supported : only fill rectangle.
 *
- * TODO:
+ * Status: 2016/12/04
+ *   - Misc fixes: endianness, hardware cursor
 *   - Panel support
+ *
+ * TODO:
 *   - Touch panel support
 *   - USB support
 *   - UART support
@@ -548,6 +554,29 @@ static uint32_t get_local_mem_size_index(uint32_t size)
    return index;
 }

+static inline int get_width(SM501State *s, int crt)
+{
+    int width = crt ? s->dc_crt_h_total : s->dc_panel_h_total;
+    return (width & 0x00000FFF) + 1;
+}
+
+static inline int get_height(SM501State *s, int crt)
+{
+    int height = crt ? s->dc_crt_v_total : s->dc_panel_v_total;
+    return (height & 0x00000FFF) + 1;
+}
+
+static inline int get_bpp(SM501State *s, int crt)
+{
+    int bpp = crt ? s->dc_crt_control : s->dc_panel_control;
+    return 1 << (bpp & 3);
+}
+
+static ram_addr_t get_fb_addr(SM501State *s, int crt)
+{
+    return (crt ? s->dc_crt_fb_addr : s->dc_panel_fb_addr) & 0x3FFFFF0;
+}
+
 /**
 * Check the availability of hardware cursor.
 * @param crt  0 for PANEL, 1 for CRT.
@@ -562,10 +591,10 @@ static inline int is_hwc_enabled(SM501State *state, int crt)
 * Get the address which holds cursor pattern data.
 * @param crt  0 for PANEL, 1 for CRT.
 */
-static inline uint32_t get_hwc_address(SM501State *state, int crt)
+static inline uint8_t *get_hwc_address(SM501State *state, int crt)
 {
    uint32_t addr = crt ? state->dc_crt_hwc_addr : state->dc_panel_hwc_addr;
-    return (addr & 0x03FFFFF0)/* >> 4*/;
+    return state->local_mem + (addr & 0x03FFFFF0);
 }

 /**
@@ -591,142 +620,218 @@ static inline uint32_t get_hwc_x(SM501State *state, int crt)
 }

 /**
- * Get the cursor position in x coordinate.
+ * Get the hardware cursor palette.
 * @param crt  0 for PANEL, 1 for CRT.
- * @param index  0, 1, 2 or 3 which specifies color of corsor dot.
+ * @param palette  pointer to a [3 * 3] array to store color values in
 */
-static inline uint16_t get_hwc_color(SM501State *state, int crt, int index)
+static inline void get_hwc_palette(SM501State *state, int crt, uint8_t *palette)
 {
-    uint32_t color_reg = 0;
-    uint16_t color_565 = 0;
+    int i;
+    uint32_t color_reg;
+    uint16_t rgb565;

-    if (index == 0) {
-        return 0;
-    }
+    for (i = 0; i < 3; i++) {
+        if (i + 1 == 3) {
+            color_reg = crt ? state->dc_crt_hwc_color_3
+                            : state->dc_panel_hwc_color_3;
+        } else {
+            color_reg = crt ? state->dc_crt_hwc_color_1_2
+                            : state->dc_panel_hwc_color_1_2;
+        }

-    switch (index) {
-    case 1:
-    case 2:
-        color_reg = crt ? state->dc_crt_hwc_color_1_2
-                        : state->dc_panel_hwc_color_1_2;
-        break;
-    case 3:
-        color_reg = crt ? state->dc_crt_hwc_color_3
-                        : state->dc_panel_hwc_color_3;
-        break;
-    default:
-        printf("invalid hw cursor color.\n");
-        abort();
+        if (i + 1 == 2) {
+            rgb565 = (color_reg >> 16) & 0xFFFF;
+        } else {
+            rgb565 = color_reg & 0xFFFF;
+        }
+        palette[i * 3 + 0] = (rgb565 << 3) & 0xf8; /* red */
+        palette[i * 3 + 1] = (rgb565 >> 3) & 0xfc; /* green */
+        palette[i * 3 + 2] = (rgb565 >> 8) & 0xf8; /* blue */
    }
-
-    switch (index) {
-    case 1:
-    case 3:
-        color_565 = (uint16_t)(color_reg & 0xFFFF);
-        break;
-    case 2:
-        color_565 = (uint16_t)((color_reg >> 16) & 0xFFFF);
-        break;
-    }
-    return color_565;
 }

-static int within_hwc_y_range(SM501State *state, int y, int crt)
+static inline void hwc_invalidate(SM501State *s, int crt)
 {
-    int hwc_y = get_hwc_y(state, crt);
-    return (hwc_y <= y && y < hwc_y + SM501_HWC_HEIGHT);
+    int w = get_width(s, crt);
+    int h = get_height(s, crt);
+    int bpp = get_bpp(s, crt);
+    int start = get_hwc_y(s, crt);
+    int end = MIN(h, start + SM501_HWC_HEIGHT) + 1;
+
+    start *= w * bpp;
+    end *= w * bpp;
+
+    memory_region_set_dirty(&s->local_mem_region, start, end - start);
 }

 static void sm501_2d_operation(SM501State * s)
 {
-    /* obtain operation parameters */
-    int operation = (s->twoD_control >> 16) & 0x1f;
-    int rtl = s->twoD_control & 0x8000000;
-    int src_x = (s->twoD_source >> 16) & 0x01FFF;
-    int src_y = s->twoD_source & 0xFFFF;
-    int dst_x = (s->twoD_destination >> 16) & 0x01FFF;
-    int dst_y = s->twoD_destination & 0xFFFF;
-    int operation_width = (s->twoD_dimension >> 16) & 0x1FFF;
-    int operation_height = s->twoD_dimension & 0xFFFF;
-    uint32_t color = s->twoD_foreground;
-    int format_flags = (s->twoD_stretch >> 20) & 0x3;
-    int addressing = (s->twoD_stretch >> 16) & 0xF;
+    int cmd = (s->twoD_control >> 16) & 0x1F;
+    int rtl = s->twoD_control & BIT(27);
+    int format = (s->twoD_stretch >> 20) & 0x3;
+    int rop_mode = (s->twoD_control >> 15) & 0x1; /* 1 for rop2, else rop3 */
+    /* 1 if rop2 source is the pattern, otherwise the source is the bitmap */
+    int rop2_source_is_pattern = (s->twoD_control >> 14) & 0x1;
+    int rop = s->twoD_control & 0xFF;
+    unsigned int dst_x = (s->twoD_destination >> 16) & 0x01FFF;
+    unsigned int dst_y = s->twoD_destination & 0xFFFF;
+    unsigned int width = (s->twoD_dimension >> 16) & 0x1FFF;
+    unsigned int height = s->twoD_dimension & 0xFFFF;
+    uint32_t dst_base = s->twoD_destination_base & 0x03FFFFFF;
+    unsigned int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF;
+    int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0;
+    int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt);

-    /* get frame buffer info */
-    uint8_t * src = s->local_mem + (s->twoD_source_base & 0x03FFFFFF);
-    uint8_t * dst = s->local_mem + (s->twoD_destination_base & 0x03FFFFFF);
-    int src_width = (s->dc_crt_h_total & 0x00000FFF) + 1;
-    int dst_width = (s->dc_crt_h_total & 0x00000FFF) + 1;
-
-    if (addressing != 0x0) {
-        printf("%s: only XY addressing is supported.\n", __func__);
-        abort();
+    if ((s->twoD_stretch >> 16) & 0xF) {
+        qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n");
+        return;
    }

-    if ((s->twoD_source_base & 0x08000000) ||
-        (s->twoD_destination_base & 0x08000000)) {
-        printf("%s: only local memory is supported.\n", __func__);
-        abort();
+    if (s->twoD_source_base & BIT(27) || s->twoD_destination_base & BIT(27)) {
+        qemu_log_mask(LOG_UNIMP, "sm501: only local memory is supported.\n");
+        return;
    }

-    switch (operation) {
-    case 0x00: /* copy area */
-#define COPY_AREA(_bpp, _pixel_type, rtl) {                                 \
-        int y, x, index_d, index_s;                                         \
-        for (y = 0; y < operation_height; y++) {                            \
-            for (x = 0; x < operation_width; x++) {                         \
-                if (rtl) {                                                  \
-                    index_s = ((src_y - y) * src_width + src_x - x) * _bpp; \
-                    index_d = ((dst_y - y) * dst_width + dst_x - x) * _bpp; \
-                } else {                                                    \
-                    index_s = ((src_y + y) * src_width + src_x + x) * _bpp; \
-                    index_d = ((dst_y + y) * dst_width + dst_x + x) * _bpp; \
-                }                                                           \
-                *(_pixel_type*)&dst[index_d] = *(_pixel_type*)&src[index_s];\
-            }                                                               \
-        }                                                                   \
+    if (!dst_pitch) {
+        qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero dest pitch.\n");
+        return;
    }
-        switch (format_flags) {
-        case 0:
-            COPY_AREA(1, uint8_t, rtl);
-            break;
-        case 1:
-            COPY_AREA(2, uint16_t, rtl);
-            break;
-        case 2:
-            COPY_AREA(4, uint32_t, rtl);
-            break;
+
+    if (!width || !height) {
+        qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero size 2D op.\n");
+        return;
+    }
+
+    if (rtl) {
+        dst_x -= width - 1;
+        dst_y -= height - 1;
+    }
+
+    if (dst_base >= get_local_mem_size(s) || dst_base +
+        (dst_x + width + (dst_y + height) * (dst_pitch + width)) *
+        (1 << format) >= get_local_mem_size(s)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "sm501: 2D op dest is outside vram.\n");
+        return;
+    }
+
+    switch (cmd) {
+    case 0: /* BitBlt */
+    {
+        unsigned int src_x = (s->twoD_source >> 16) & 0x01FFF;
+        unsigned int src_y = s->twoD_source & 0xFFFF;
+        uint32_t src_base = s->twoD_source_base & 0x03FFFFFF;
+        unsigned int src_pitch = s->twoD_pitch & 0x1FFF;
+
+        if (!src_pitch) {
+            qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero src pitch.\n");
+            return;
+        }
+
+        if (rtl) {
+            src_x -= width - 1;
+            src_y -= height - 1;
+        }
+
+        if (src_base >= get_local_mem_size(s) || src_base +
+            (src_x + width + (src_y + height) * (src_pitch + width)) *
+            (1 << format) >= get_local_mem_size(s)) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "sm501: 2D op src is outside vram.\n");
+            return;
+        }
+
+        if ((rop_mode && rop == 0x5) || (!rop_mode && rop == 0x55)) {
+            /* Invert dest, is there a way to do this with pixman? */
+            unsigned int x, y, i;
+            uint8_t *d = s->local_mem + dst_base;
+
+            for (y = 0; y < height; y++) {
+                i = (dst_x + (dst_y + y) * dst_pitch) * (1 << format);
+                for (x = 0; x < width; x++, i += (1 << format)) {
+                    switch (format) {
+                    case 0:
+                        d[i] = ~d[i];
+                        break;
+                    case 1:
+                        *(uint16_t *)&d[i] = ~*(uint16_t *)&d[i];
+                        break;
+                    case 2:
+                        *(uint32_t *)&d[i] = ~*(uint32_t *)&d[i];
+                        break;
+                    }
+                }
+            }
+        } else {
+            /* Do copy src for unimplemented ops, better than unpainted area */
+            if ((rop_mode && (rop != 0xc || rop2_source_is_pattern)) ||
+                (!rop_mode && rop != 0xcc)) {
+                qemu_log_mask(LOG_UNIMP,
+                              "sm501: rop%d op %x%s not implemented\n",
+                              (rop_mode ? 2 : 3), rop,
+                              (rop2_source_is_pattern ?
+                                  " with pattern source" : ""));
+            }
+            /* Check for overlaps, this could be made more exact */
+            uint32_t sb, se, db, de;
+            sb = src_base + src_x + src_y * (width + src_pitch);
+            se = sb + width + height * (width + src_pitch);
+            db = dst_base + dst_x + dst_y * (width + dst_pitch);
+            de = db + width + height * (width + dst_pitch);
+            if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) {
+                /* regions may overlap: copy via temporary */
+                int llb = width * (1 << format);
+                int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
+                uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
+                                         height);
+                pixman_blt((uint32_t *)&s->local_mem[src_base], tmp,
+                           src_pitch * (1 << format) / sizeof(uint32_t),
+                           tmp_stride, 8 * (1 << format), 8 * (1 << format),
+                           src_x, src_y, 0, 0, width, height);
+                pixman_blt(tmp, (uint32_t *)&s->local_mem[dst_base],
+                           tmp_stride,
+                           dst_pitch * (1 << format) / sizeof(uint32_t),
+                           8 * (1 << format), 8 * (1 << format),
+                           0, 0, dst_x, dst_y, width, height);
+                g_free(tmp);
+            } else {
+                pixman_blt((uint32_t *)&s->local_mem[src_base],
+                           (uint32_t *)&s->local_mem[dst_base],
+                           src_pitch * (1 << format) / sizeof(uint32_t),
+                           dst_pitch * (1 << format) / sizeof(uint32_t),
+                           8 * (1 << format), 8 * (1 << format),
+                           src_x, src_y, dst_x, dst_y, width, height);
+            }
        }
        break;
-
-    case 0x01: /* fill rectangle */
-#define FILL_RECT(_bpp, _pixel_type) {                                      \
-        int y, x;                                                           \
-        for (y = 0; y < operation_height; y++) {                            \
-            for (x = 0; x < operation_width; x++) {                         \
-                int index = ((dst_y + y) * dst_width + dst_x + x) * _bpp;   \
-                *(_pixel_type*)&dst[index] = (_pixel_type)color;            \
-            }                                                               \
-        }                                                                   \
    }
+    case 1: /* Rectangle Fill */
+    {
+        uint32_t color = s->twoD_foreground;

-        switch (format_flags) {
-        case 0:
-            FILL_RECT(1, uint8_t);
-            break;
-        case 1:
-            FILL_RECT(2, uint16_t);
-            break;
-        case 2:
-            FILL_RECT(4, uint32_t);
-            break;
+        if (format == 2) {
+            color = cpu_to_le32(color);
+        } else if (format == 1) {
+            color = cpu_to_le16(color);
        }
-        break;

+        pixman_fill((uint32_t *)&s->local_mem[dst_base],
+                    dst_pitch * (1 << format) / sizeof(uint32_t),
+                    8 * (1 << format), dst_x, dst_y, width, height, color);
+        break;
+    }
    default:
-        printf("non-implemented SM501 2D operation. %d\n", operation);
-        abort();
-        break;
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2D operation: %d\n",
+                      cmd);
+        return;
+    }
+
+    if (dst_base >= get_fb_addr(s, crt) &&
+        dst_base <= get_fb_addr(s, crt) + fb_len) {
+        int dst_len = MIN(fb_len, ((dst_y + height - 1) * dst_pitch +
+                          dst_x + width) * (1 << format));
+        if (dst_len) {
+            memory_region_set_dirty(&s->local_mem_region, dst_base, dst_len);
+        }
    }
 }

@@ -756,6 +861,8 @@ static uint64_t sm501_system_config_read(void *opaque, hwaddr addr,
    case SM501_DRAM_CONTROL:
 	ret = (s->dram_control & 0x07F107C0) | s->local_mem_size_index << 13;
 	break;
+    case SM501_COMMAND_LIST_STATUS:
+        ret = 0x00180002; /* FIFOs are empty, everything idle */
    case SM501_IRQ_MASK:
 	ret = s->irq_mask;
 	break;
@@ -773,11 +880,13 @@ static uint64_t sm501_system_config_read(void *opaque, hwaddr addr,
    case SM501_POWER_MODE_CONTROL:
 	ret = s->power_mode_control;
 	break;
+    case SM501_ENDIAN_CONTROL:
+        ret = 0; /* Only default little endian mode is supported */
+        break;

    default:
-	printf("sm501 system config : not implemented register read."
-	       " addr=%x\n", (int)addr);
-        abort();
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented system config"
+                      "register read. addr=%" HWADDR_PRIx "\n", addr);
    }

    return ret;
@@ -823,11 +932,17 @@ static void sm501_system_config_write(void *opaque, hwaddr addr,
    case SM501_POWER_MODE_CONTROL:
 	s->power_mode_control = value & 0x00000003;
 	break;
+    case SM501_ENDIAN_CONTROL:
+        if (value & 0x00000001) {
+            printf("sm501 system config : big endian mode not implemented.\n");
+            abort();
+        }
+        break;

    default:
-	printf("sm501 system config : not implemented register write."
-	       " addr=%x, val=%x\n", (int)addr, (uint32_t)value);
-        abort();
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented system config"
+                      "register write. addr=%" HWADDR_PRIx
+                      ", val=%" PRIx64 "\n", addr, value);
    }
 }

@@ -882,6 +997,9 @@ static uint64_t sm501_disp_ctrl_read(void *opaque, hwaddr addr,
    case SM501_DC_PANEL_PANNING_CONTROL:
 	ret = s->dc_panel_panning_control;
 	break;
+    case SM501_DC_PANEL_COLOR_KEY:
+        /* Not implemented yet */
+        break;
    case SM501_DC_PANEL_FB_ADDR:
 	ret = s->dc_panel_fb_addr;
 	break;
@@ -954,9 +1072,8 @@ static uint64_t sm501_disp_ctrl_read(void *opaque, hwaddr addr,
        break;

    default:
-	printf("sm501 disp ctrl : not implemented register read."
-	       " addr=%x\n", (int)addr);
-        abort();
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register "
+                      "read. addr=%" HWADDR_PRIx "\n", addr);
    }

    return ret;
@@ -976,8 +1093,14 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr,
    case SM501_DC_PANEL_PANNING_CONTROL:
 	s->dc_panel_panning_control = value & 0xFF3FFF3F;
 	break;
+    case SM501_DC_PANEL_COLOR_KEY:
+        /* Not implemented yet */
+        break;
    case SM501_DC_PANEL_FB_ADDR:
 	s->dc_panel_fb_addr = value & 0x8FFFFFF0;
+        if (value & 0x8000000) {
+            qemu_log_mask(LOG_UNIMP, "Panel external memory not supported\n");
+        }
 	break;
    case SM501_DC_PANEL_FB_OFFSET:
 	s->dc_panel_fb_offset = value & 0x3FF03FF0;
@@ -1009,11 +1132,19 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr,
 	break;

    case SM501_DC_PANEL_HWC_ADDR:
-	s->dc_panel_hwc_addr = value & 0x8FFFFFF0;
-	break;
+        value &= 0x8FFFFFF0;
+        if (value != s->dc_panel_hwc_addr) {
+            hwc_invalidate(s, 0);
+            s->dc_panel_hwc_addr = value;
+        }
+        break;
    case SM501_DC_PANEL_HWC_LOC:
-	s->dc_panel_hwc_location = value & 0x0FFF0FFF;
-	break;
+        value &= 0x0FFF0FFF;
+        if (value != s->dc_panel_hwc_location) {
+            hwc_invalidate(s, 0);
+            s->dc_panel_hwc_location = value;
+        }
+        break;
    case SM501_DC_PANEL_HWC_COLOR_1_2:
 	s->dc_panel_hwc_color_1_2 = value;
 	break;
@@ -1026,6 +1157,9 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr,
 	break;
    case SM501_DC_CRT_FB_ADDR:
 	s->dc_crt_fb_addr = value & 0x8FFFFFF0;
+        if (value & 0x8000000) {
+            qemu_log_mask(LOG_UNIMP, "CRT external memory not supported\n");
+        }
 	break;
    case SM501_DC_CRT_FB_OFFSET:
 	s->dc_crt_fb_offset = value & 0x3FF03FF0;
@@ -1044,11 +1178,19 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr,
 	break;

    case SM501_DC_CRT_HWC_ADDR:
-	s->dc_crt_hwc_addr = value & 0x8FFFFFF0;
-	break;
+        value &= 0x8FFFFFF0;
+        if (value != s->dc_crt_hwc_addr) {
+            hwc_invalidate(s, 1);
+            s->dc_crt_hwc_addr = value;
+        }
+        break;
    case SM501_DC_CRT_HWC_LOC:
-	s->dc_crt_hwc_location = value & 0x0FFF0FFF;
-	break;
+        value &= 0x0FFF0FFF;
+        if (value != s->dc_crt_hwc_location) {
+            hwc_invalidate(s, 1);
+            s->dc_crt_hwc_location = value;
+        }
+        break;
    case SM501_DC_CRT_HWC_COLOR_1_2:
 	s->dc_crt_hwc_color_1_2 = value;
 	break;
@@ -1061,9 +1203,9 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr,
        break;

    default:
-	printf("sm501 disp ctrl : not implemented register write."
-	       " addr=%x, val=%x\n", (int)addr, (unsigned)value);
-        abort();
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register "
+                      "write. addr=%" HWADDR_PRIx
+                      ", val=%" PRIx64 "\n", addr, value);
    }
 }

@@ -1089,9 +1231,8 @@ static uint64_t sm501_2d_engine_read(void *opaque, hwaddr addr,
        ret = s->twoD_source_base;
        break;
    default:
-        printf("sm501 disp ctrl : not implemented register read."
-               " addr=%x\n", (int)addr);
-        abort();
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register "
+                      "read. addr=%" HWADDR_PRIx "\n", addr);
    }

    return ret;
@@ -1149,9 +1290,9 @@ static void sm501_2d_engine_write(void *opaque, hwaddr addr,
        s->twoD_destination_base = value;
        break;
    default:
-        printf("sm501 2d engine : not implemented register write."
-               " addr=%x, val=%x\n", (int)addr, (unsigned)value);
-        abort();
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2d engine register "
+                      "write. addr=%" HWADDR_PRIx
+                      ", val=%" PRIx64 "\n", addr, value);
    }
 }

@@ -1170,8 +1311,9 @@ static const MemoryRegionOps sm501_2d_engine_ops = {
 typedef void draw_line_func(uint8_t *d, const uint8_t *s,
 			    int width, const uint32_t *pal);

-typedef void draw_hwc_line_func(SM501State * s, int crt, uint8_t * palette,
-                                int c_y, uint8_t *d, int width);
+typedef void draw_hwc_line_func(uint8_t *d, const uint8_t *s,
+                                int width, const uint8_t *palette,
+                                int c_x, int c_y);

 #define DEPTH 8
 #include "sm501_template.h"
@@ -1256,19 +1398,16 @@ static inline int get_depth_index(DisplaySurface *surface)
    }
 }

-static void sm501_draw_crt(SM501State * s)
+static void sm501_update_display(void *opaque)
 {
+    SM501State *s = (SM501State *)opaque;
    DisplaySurface *surface = qemu_console_surface(s->con);
-    int y;
-    int width = (s->dc_crt_h_total & 0x00000FFF) + 1;
-    int height = (s->dc_crt_v_total & 0x00000FFF) + 1;
-
-    uint8_t  * src = s->local_mem;
-    int src_bpp = 0;
+    int y, c_x = 0, c_y = 0;
+    int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0;
+    int width = get_width(s, crt);
+    int height = get_height(s, crt);
+    int src_bpp = get_bpp(s, crt);
    int dst_bpp = surface_bytes_per_pixel(surface);
-    uint32_t * palette = (uint32_t *)&s->dc_palette[SM501_DC_CRT_PALETTE
-						    - SM501_DC_PANEL_PALETTE];
-    uint8_t hwc_palette[3 * 3];
    int ds_depth_index = get_depth_index(surface);
    draw_line_func * draw_line = NULL;
    draw_hwc_line_func * draw_hwc_line = NULL;
@@ -1276,43 +1415,46 @@ static void sm501_draw_crt(SM501State * s)
    int y_start = -1;
    ram_addr_t page_min = ~0l;
    ram_addr_t page_max = 0l;
-    ram_addr_t offset = 0;
+    ram_addr_t offset;
+    uint32_t *palette;
+    uint8_t hwc_palette[3 * 3];
+    uint8_t *hwc_src = NULL;
+
+    if (!((crt ? s->dc_crt_control : s->dc_panel_control)
+          & SM501_DC_CRT_CONTROL_ENABLE)) {
+        return;
+    }
+
+    palette = (uint32_t *)(crt ? &s->dc_palette[SM501_DC_CRT_PALETTE -
+                                                SM501_DC_PANEL_PALETTE]
+                               : &s->dc_palette[0]);

    /* choose draw_line function */
-    switch (s->dc_crt_control & 3) {
-    case SM501_DC_CRT_CONTROL_8BPP:
-	src_bpp = 1;
-	draw_line = draw_line8_funcs[ds_depth_index];
-	break;
-    case SM501_DC_CRT_CONTROL_16BPP:
-	src_bpp = 2;
-	draw_line = draw_line16_funcs[ds_depth_index];
-	break;
-    case SM501_DC_CRT_CONTROL_32BPP:
-	src_bpp = 4;
-	draw_line = draw_line32_funcs[ds_depth_index];
-	break;
+    switch (src_bpp) {
+    case 1:
+        draw_line = draw_line8_funcs[ds_depth_index];
+        break;
+    case 2:
+        draw_line = draw_line16_funcs[ds_depth_index];
+        break;
+    case 4:
+        draw_line = draw_line32_funcs[ds_depth_index];
+        break;
    default:
-	printf("sm501 draw crt : invalid DC_CRT_CONTROL=%x.\n",
-	       s->dc_crt_control);
-        abort();
+        qemu_log_mask(LOG_GUEST_ERROR, "sm501 draw crt"
+		      "invalid DC_CRT_CONTROL=%x.\n",
+	       	      s->dc_crt_control);
 	break;
    }

    /* set up to draw hardware cursor */
-    if (is_hwc_enabled(s, 1)) {
-        int i;
-
-        /* get cursor palette */
-        for (i = 0; i < 3; i++) {
-            uint16_t rgb565 = get_hwc_color(s, 1, i + 1);
-            hwc_palette[i * 3 + 0] = (rgb565 & 0xf800) >> 8; /* red */
-            hwc_palette[i * 3 + 1] = (rgb565 & 0x07e0) >> 3; /* green */
-            hwc_palette[i * 3 + 2] = (rgb565 & 0x001f) << 3; /* blue */
-        }
-
+    if (is_hwc_enabled(s, crt)) {
        /* choose cursor draw line function */
        draw_hwc_line = draw_hwc_line_funcs[ds_depth_index];
+        hwc_src = get_hwc_address(s, crt);
+        c_x = get_hwc_x(s, crt);
+        c_y = get_hwc_y(s, crt);
+        get_hwc_palette(s, crt, hwc_palette);
    }

    /* adjust console size */
@@ -1326,15 +1468,18 @@ static void sm501_draw_crt(SM501State * s)

    /* draw each line according to conditions */
    memory_region_sync_dirty_bitmap(&s->local_mem_region);
-    for (y = 0; y < height; y++) {
-	int update_hwc = draw_hwc_line ? within_hwc_y_range(s, y, 1) : 0;
-	int update = full_update || update_hwc;
+    offset = get_fb_addr(s, crt);
+    for (y = 0; y < height; y++, offset += width * src_bpp) {
+        int update, update_hwc;
        ram_addr_t page0 = offset;
        ram_addr_t page1 = offset + width * src_bpp - 1;

-	/* check dirty flags for each line */
-        update = memory_region_get_dirty(&s->local_mem_region, page0,
-                                         page1 - page0, DIRTY_MEMORY_VGA);
+        /* check if hardware cursor is enabled and we're within its range */
+        update_hwc = draw_hwc_line && c_y <= y && y < c_y + SM501_HWC_HEIGHT;
+        update = full_update || update_hwc;
+        /* check dirty flags for each line */
+        update |= memory_region_get_dirty(&s->local_mem_region, page0,
+                                          page1 - page0, DIRTY_MEMORY_VGA);

 	/* draw line and change status */
 	if (update) {
@@ -1342,11 +1487,11 @@ static void sm501_draw_crt(SM501State * s)
            d +=  y * width * dst_bpp;

            /* draw graphics layer */
-            draw_line(d, src, width, palette);
+            draw_line(d, s->local_mem + offset, width, palette);

            /* draw haredware cursor */
            if (update_hwc) {
-                draw_hwc_line(s, 1, hwc_palette, y - get_hwc_y(s, 1), d, width);
+                draw_hwc_line(d, hwc_src, width, hwc_palette, c_x, y - c_y);
            }

 	    if (y_start < 0)
@@ -1362,9 +1507,6 @@ static void sm501_draw_crt(SM501State * s)
 		y_start = -1;
 	    }
 	}
-
-	src += width * src_bpp;
-	offset += width * src_bpp;
    }

    /* complete flush to display */
@@ -1379,14 +1521,6 @@ static void sm501_draw_crt(SM501State * s)
    }
 }

-static void sm501_update_display(void *opaque)
-{
-    SM501State * s = (SM501State *)opaque;
-
-    if (s->dc_crt_control & SM501_DC_CRT_CONTROL_ENABLE)
-	sm501_draw_crt(s);
-}
-
 static const GraphicHwOps sm501_ops = {
    .gfx_update  = sm501_update_display,
 };
--- a/hw/display/sm501_template.h
+++ b/hw/display/sm501_template.h
@@ -99,29 +99,24 @@ static void glue(draw_line32_, PIXEL_NAME)(
 /**
 * Draw hardware cursor image on the given line.
 */
-static void glue(draw_hwc_line_, PIXEL_NAME)(SM501State * s, int crt,
-                         uint8_t * palette, int c_y, uint8_t *d, int width)
+static void glue(draw_hwc_line_, PIXEL_NAME)(uint8_t *d, const uint8_t *s,
+                 int width, const uint8_t *palette, int c_x, int c_y)
 {
-    int x, i;
+    int i;
    uint8_t bitset = 0;

-    /* get hardware cursor pattern */
-    uint32_t cursor_addr = get_hwc_address(s, crt);
-    assert(0 <= c_y && c_y < SM501_HWC_HEIGHT);
-    cursor_addr += 64 * c_y / 4;  /* 4 pixels per byte */
-    cursor_addr += s->base;
-
    /* get cursor position */
-    x = get_hwc_x(s, crt);
-    d += x * BPP;
+    assert(0 <= c_y && c_y < SM501_HWC_HEIGHT);
+    s += SM501_HWC_WIDTH * c_y / 4;  /* 4 pixels per byte */
+    d += c_x * BPP;

-    for (i = 0; i < SM501_HWC_WIDTH && x + i < width; i++) {
+    for (i = 0; i < SM501_HWC_WIDTH && c_x + i < width; i++) {
        uint8_t v;

        /* get pixel value */
        if (i % 4 == 0) {
-            bitset = ldub_phys(&address_space_memory, cursor_addr);
-            cursor_addr++;
+            bitset = ldub_p(s);
+            s++;
        }
        v = bitset & 3;
        bitset >>= 2;
--- a/hw/display/vga-helpers.h
+++ b/hw/display/vga-helpers.h
@@ -95,20 +95,46 @@ static void vga_draw_glyph9(uint8_t *d, int linesize,
    } while (--h);
 }

+static inline uint8_t vga_read_byte(VGACommonState *vga, uint32_t addr)
+{
+    return vga->vram_ptr[addr & vga->vbe_size_mask];
+}
+
+static inline uint16_t vga_read_word_le(VGACommonState *vga, uint32_t addr)
+{
+    uint32_t offset = addr & vga->vbe_size_mask & ~1;
+    uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset);
+    return lduw_le_p(ptr);
+}
+
+static inline uint16_t vga_read_word_be(VGACommonState *vga, uint32_t addr)
+{
+    uint32_t offset = addr & vga->vbe_size_mask & ~1;
+    uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset);
+    return lduw_be_p(ptr);
+}
+
+static inline uint32_t vga_read_dword_le(VGACommonState *vga, uint32_t addr)
+{
+    uint32_t offset = addr & vga->vbe_size_mask & ~3;
+    uint32_t *ptr = (uint32_t *)(vga->vram_ptr + offset);
+    return ldl_le_p(ptr);
+}
+
 /*
 * 4 color mode
 */
-static void vga_draw_line2(VGACommonState *s1, uint8_t *d,
-                           const uint8_t *s, int width)
+static void vga_draw_line2(VGACommonState *vga, uint8_t *d,
+                           uint32_t addr, int width)
 {
    uint32_t plane_mask, *palette, data, v;
    int x;

-    palette = s1->last_palette;
-    plane_mask = mask16[s1->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
+    palette = vga->last_palette;
+    plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
    width >>= 3;
    for(x = 0; x < width; x++) {
-        data = ((uint32_t *)s)[0];
+        data = vga_read_dword_le(vga, addr);
        data &= plane_mask;
        v = expand2[GET_PLANE(data, 0)];
        v |= expand2[GET_PLANE(data, 2)] << 2;
@@ -124,7 +150,7 @@ static void vga_draw_line2(VGACommonState *s1, uint8_t *d,
        ((uint32_t *)d)[6] = palette[(v >> 4) & 0xf];
        ((uint32_t *)d)[7] = palette[(v >> 0) & 0xf];
        d += 32;
-        s += 4;
+        addr += 4;
    }
 }

@@ -134,17 +160,17 @@ static void vga_draw_line2(VGACommonState *s1, uint8_t *d,
 /*
 * 4 color mode, dup2 horizontal
 */
-static void vga_draw_line2d2(VGACommonState *s1, uint8_t *d,
-                             const uint8_t *s, int width)
+static void vga_draw_line2d2(VGACommonState *vga, uint8_t *d,
+                             uint32_t addr, int width)
 {
    uint32_t plane_mask, *palette, data, v;
    int x;

-    palette = s1->last_palette;
-    plane_mask = mask16[s1->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
+    palette = vga->last_palette;
+    plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
    width >>= 3;
    for(x = 0; x < width; x++) {
-        data = ((uint32_t *)s)[0];
+        data = vga_read_dword_le(vga, addr);
        data &= plane_mask;
        v = expand2[GET_PLANE(data, 0)];
        v |= expand2[GET_PLANE(data, 2)] << 2;
@@ -160,24 +186,24 @@ static void vga_draw_line2d2(VGACommonState *s1, uint8_t *d,
        PUT_PIXEL2(d, 6, palette[(v >> 4) & 0xf]);
        PUT_PIXEL2(d, 7, palette[(v >> 0) & 0xf]);
        d += 64;
-        s += 4;
+        addr += 4;
    }
 }

 /*
 * 16 color mode
 */
-static void vga_draw_line4(VGACommonState *s1, uint8_t *d,
-                           const uint8_t *s, int width)
+static void vga_draw_line4(VGACommonState *vga, uint8_t *d,
+                           uint32_t addr, int width)
 {
    uint32_t plane_mask, data, v, *palette;
    int x;

-    palette = s1->last_palette;
-    plane_mask = mask16[s1->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
+    palette = vga->last_palette;
+    plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
    width >>= 3;
    for(x = 0; x < width; x++) {
-        data = ((uint32_t *)s)[0];
+        data = vga_read_dword_le(vga, addr);
        data &= plane_mask;
        v = expand4[GET_PLANE(data, 0)];
        v |= expand4[GET_PLANE(data, 1)] << 1;
@@ -192,24 +218,24 @@ static void vga_draw_line4(VGACommonState *s1, uint8_t *d,
        ((uint32_t *)d)[6] = palette[(v >> 4) & 0xf];
        ((uint32_t *)d)[7] = palette[(v >> 0) & 0xf];
        d += 32;
-        s += 4;
+        addr += 4;
    }
 }

 /*
 * 16 color mode, dup2 horizontal
 */
-static void vga_draw_line4d2(VGACommonState *s1, uint8_t *d,
-                             const uint8_t *s, int width)
+static void vga_draw_line4d2(VGACommonState *vga, uint8_t *d,
+                             uint32_t addr, int width)
 {
    uint32_t plane_mask, data, v, *palette;
    int x;

-    palette = s1->last_palette;
-    plane_mask = mask16[s1->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
+    palette = vga->last_palette;
+    plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
    width >>= 3;
    for(x = 0; x < width; x++) {
-        data = ((uint32_t *)s)[0];
+        data = vga_read_dword_le(vga, addr);
        data &= plane_mask;
        v = expand4[GET_PLANE(data, 0)];
        v |= expand4[GET_PLANE(data, 1)] << 1;
@@ -224,7 +250,7 @@ static void vga_draw_line4d2(VGACommonState *s1, uint8_t *d,
        PUT_PIXEL2(d, 6, palette[(v >> 4) & 0xf]);
        PUT_PIXEL2(d, 7, palette[(v >> 0) & 0xf]);
        d += 64;
-        s += 4;
+        addr += 4;
    }
 }

@@ -233,21 +259,21 @@ static void vga_draw_line4d2(VGACommonState *s1, uint8_t *d,
 *
 * XXX: add plane_mask support (never used in standard VGA modes)
 */
-static void vga_draw_line8d2(VGACommonState *s1, uint8_t *d,
-                             const uint8_t *s, int width)
+static void vga_draw_line8d2(VGACommonState *vga, uint8_t *d,
+                             uint32_t addr, int width)
 {
    uint32_t *palette;
    int x;

-    palette = s1->last_palette;
+    palette = vga->last_palette;
    width >>= 3;
    for(x = 0; x < width; x++) {
-        PUT_PIXEL2(d, 0, palette[s[0]]);
-        PUT_PIXEL2(d, 1, palette[s[1]]);
-        PUT_PIXEL2(d, 2, palette[s[2]]);
-        PUT_PIXEL2(d, 3, palette[s[3]]);
+        PUT_PIXEL2(d, 0, palette[vga_read_byte(vga, addr + 0)]);
+        PUT_PIXEL2(d, 1, palette[vga_read_byte(vga, addr + 1)]);
+        PUT_PIXEL2(d, 2, palette[vga_read_byte(vga, addr + 2)]);
+        PUT_PIXEL2(d, 3, palette[vga_read_byte(vga, addr + 3)]);
        d += 32;
-        s += 4;
+        addr += 4;
    }
 }

@@ -256,63 +282,63 @@ static void vga_draw_line8d2(VGACommonState *s1, uint8_t *d,
 *
 * XXX: add plane_mask support (never used in standard VGA modes)
 */
-static void vga_draw_line8(VGACommonState *s1, uint8_t *d,
-                           const uint8_t *s, int width)
+static void vga_draw_line8(VGACommonState *vga, uint8_t *d,
+                           uint32_t addr, int width)
 {
    uint32_t *palette;
    int x;

-    palette = s1->last_palette;
+    palette = vga->last_palette;
    width >>= 3;
    for(x = 0; x < width; x++) {
-        ((uint32_t *)d)[0] = palette[s[0]];
-        ((uint32_t *)d)[1] = palette[s[1]];
-        ((uint32_t *)d)[2] = palette[s[2]];
-        ((uint32_t *)d)[3] = palette[s[3]];
-        ((uint32_t *)d)[4] = palette[s[4]];
-        ((uint32_t *)d)[5] = palette[s[5]];
-        ((uint32_t *)d)[6] = palette[s[6]];
-        ((uint32_t *)d)[7] = palette[s[7]];
+        ((uint32_t *)d)[0] = palette[vga_read_byte(vga, addr + 0)];
+        ((uint32_t *)d)[1] = palette[vga_read_byte(vga, addr + 1)];
+        ((uint32_t *)d)[2] = palette[vga_read_byte(vga, addr + 2)];
+        ((uint32_t *)d)[3] = palette[vga_read_byte(vga, addr + 3)];
+        ((uint32_t *)d)[4] = palette[vga_read_byte(vga, addr + 4)];
+        ((uint32_t *)d)[5] = palette[vga_read_byte(vga, addr + 5)];
+        ((uint32_t *)d)[6] = palette[vga_read_byte(vga, addr + 6)];
+        ((uint32_t *)d)[7] = palette[vga_read_byte(vga, addr + 7)];
        d += 32;
-        s += 8;
+        addr += 8;
    }
 }

 /*
 * 15 bit color
 */
-static void vga_draw_line15_le(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line15_le(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
    int w;
    uint32_t v, r, g, b;

    w = width;
    do {
-        v = lduw_le_p((void *)s);
+        v = vga_read_word_le(vga, addr);
        r = (v >> 7) & 0xf8;
        g = (v >> 2) & 0xf8;
        b = (v << 3) & 0xf8;
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 2;
+        addr += 2;
        d += 4;
    } while (--w != 0);
 }

-static void vga_draw_line15_be(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line15_be(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
    int w;
    uint32_t v, r, g, b;

    w = width;
    do {
-        v = lduw_be_p((void *)s);
+        v = vga_read_word_be(vga, addr);
        r = (v >> 7) & 0xf8;
        g = (v >> 2) & 0xf8;
        b = (v << 3) & 0xf8;
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 2;
+        addr += 2;
        d += 4;
    } while (--w != 0);
 }
@@ -320,38 +346,38 @@ static void vga_draw_line15_be(VGACommonState *s1, uint8_t *d,
 /*
 * 16 bit color
 */
-static void vga_draw_line16_le(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line16_le(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
    int w;
    uint32_t v, r, g, b;

    w = width;
    do {
-        v = lduw_le_p((void *)s);
+        v = vga_read_word_le(vga, addr);
        r = (v >> 8) & 0xf8;
        g = (v >> 3) & 0xfc;
        b = (v << 3) & 0xf8;
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 2;
+        addr += 2;
        d += 4;
    } while (--w != 0);
 }

-static void vga_draw_line16_be(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line16_be(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
    int w;
    uint32_t v, r, g, b;

    w = width;
    do {
-        v = lduw_be_p((void *)s);
+        v = vga_read_word_be(vga, addr);
        r = (v >> 8) & 0xf8;
        g = (v >> 3) & 0xfc;
        b = (v << 3) & 0xf8;
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 2;
+        addr += 2;
        d += 4;
    } while (--w != 0);
 }
@@ -359,36 +385,36 @@ static void vga_draw_line16_be(VGACommonState *s1, uint8_t *d,
 /*
 * 24 bit color
 */
-static void vga_draw_line24_le(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line24_le(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
    int w;
    uint32_t r, g, b;

    w = width;
    do {
-        b = s[0];
-        g = s[1];
-        r = s[2];
+        b = vga_read_byte(vga, addr + 0);
+        g = vga_read_byte(vga, addr + 1);
+        r = vga_read_byte(vga, addr + 2);
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 3;
+        addr += 3;
        d += 4;
    } while (--w != 0);
 }

-static void vga_draw_line24_be(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line24_be(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
    int w;
    uint32_t r, g, b;

    w = width;
    do {
-        r = s[0];
-        g = s[1];
-        b = s[2];
+        r = vga_read_byte(vga, addr + 0);
+        g = vga_read_byte(vga, addr + 1);
+        b = vga_read_byte(vga, addr + 2);
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 3;
+        addr += 3;
        d += 4;
    } while (--w != 0);
 }
@@ -396,44 +422,36 @@ static void vga_draw_line24_be(VGACommonState *s1, uint8_t *d,
 /*
 * 32 bit color
 */
-static void vga_draw_line32_le(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line32_le(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
-#ifndef HOST_WORDS_BIGENDIAN
-    memcpy(d, s, width * 4);
-#else
    int w;
    uint32_t r, g, b;

    w = width;
    do {
-        b = s[0];
-        g = s[1];
-        r = s[2];
+        b = vga_read_byte(vga, addr + 0);
+        g = vga_read_byte(vga, addr + 1);
+        r = vga_read_byte(vga, addr + 2);
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 4;
+        addr += 4;
        d += 4;
    } while (--w != 0);
-#endif
 }

-static void vga_draw_line32_be(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line32_be(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
-#ifdef HOST_WORDS_BIGENDIAN
-    memcpy(d, s, width * 4);
-#else
    int w;
    uint32_t r, g, b;

    w = width;
    do {
-        r = s[1];
-        g = s[2];
-        b = s[3];
+        r = vga_read_byte(vga, addr + 1);
+        g = vga_read_byte(vga, addr + 2);
+        b = vga_read_byte(vga, addr + 3);
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 4;
+        addr += 4;
        d += 4;
    } while (--w != 0);
-#endif
 }
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -149,6 +149,11 @@ static inline bool vbe_enabled(VGACommonState *s)
    return s->vbe_regs[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED;
 }

+static inline uint8_t sr(VGACommonState *s, int idx)
+{
+    return vbe_enabled(s) ? s->sr_vbe[idx] : s->sr[idx];
+}
+
 static void vga_update_memory_access(VGACommonState *s)
 {
    hwaddr base, offset, size;
@@ -163,8 +168,8 @@ static void vga_update_memory_access(VGACommonState *s)
        s->has_chain4_alias = false;
        s->plane_updated = 0xf;
    }
-    if ((s->sr[VGA_SEQ_PLANE_WRITE] & VGA_SR02_ALL_PLANES) ==
-        VGA_SR02_ALL_PLANES && s->sr[VGA_SEQ_MEMORY_MODE] & VGA_SR04_CHN_4M) {
+    if ((sr(s, VGA_SEQ_PLANE_WRITE) & VGA_SR02_ALL_PLANES) ==
+        VGA_SR02_ALL_PLANES && sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_CHN_4M) {
        offset = 0;
        switch ((s->gr[VGA_GFX_MISC] >> 2) & 3) {
        case 0:
@@ -234,7 +239,7 @@ static void vga_precise_update_retrace_info(VGACommonState *s)
          ((s->cr[VGA_CRTC_OVERFLOW] >> 6) & 2)) << 8);
    vretr_end_line = s->cr[VGA_CRTC_V_SYNC_END] & 0xf;

-    clocking_mode = (s->sr[VGA_SEQ_CLOCK_MODE] >> 3) & 1;
+    clocking_mode = (sr(s, VGA_SEQ_CLOCK_MODE) >> 3) & 1;
    clock_sel = (s->msr >> 2) & 3;
    dots = (s->msr & 1) ? 8 : 9;

@@ -486,7 +491,6 @@ void vga_ioport_write(void *opaque, uint32_t addr, uint32_t val)
        printf("vga: write SR%x = 0x%02x\n", s->sr_index, val);
 #endif
        s->sr[s->sr_index] = val & sr_mask[s->sr_index];
-        vbe_update_vgaregs(s);
        if (s->sr_index == VGA_SEQ_CLOCK_MODE) {
            s->update_retrace_info(s);
        }
@@ -680,13 +684,13 @@ static void vbe_update_vgaregs(VGACommonState *s)

    if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4) {
        shift_control = 0;
-        s->sr[VGA_SEQ_CLOCK_MODE] &= ~8; /* no double line */
+        s->sr_vbe[VGA_SEQ_CLOCK_MODE] &= ~8; /* no double line */
    } else {
        shift_control = 2;
        /* set chain 4 mode */
-        s->sr[VGA_SEQ_MEMORY_MODE] |= VGA_SR04_CHN_4M;
+        s->sr_vbe[VGA_SEQ_MEMORY_MODE] |= VGA_SR04_CHN_4M;
        /* activate all planes */
-        s->sr[VGA_SEQ_PLANE_WRITE] |= VGA_SR02_ALL_PLANES;
+        s->sr_vbe[VGA_SEQ_PLANE_WRITE] |= VGA_SR02_ALL_PLANES;
    }
    s->gr[VGA_GFX_MODE] = (s->gr[VGA_GFX_MODE] & ~0x60) |
        (shift_control << 5);
@@ -836,7 +840,7 @@ uint32_t vga_mem_readb(VGACommonState *s, hwaddr addr)
        break;
    }

-    if (s->sr[VGA_SEQ_MEMORY_MODE] & VGA_SR04_CHN_4M) {
+    if (sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_CHN_4M) {
        /* chain 4 mode : simplest access */
        assert(addr < s->vram_size);
        ret = s->vram_ptr[addr];
@@ -904,11 +908,11 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val)
        break;
    }

-    if (s->sr[VGA_SEQ_MEMORY_MODE] & VGA_SR04_CHN_4M) {
+    if (sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_CHN_4M) {
        /* chain 4 mode : simplest access */
        plane = addr & 3;
        mask = (1 << plane);
-        if (s->sr[VGA_SEQ_PLANE_WRITE] & mask) {
+        if (sr(s, VGA_SEQ_PLANE_WRITE) & mask) {
            assert(addr < s->vram_size);
            s->vram_ptr[addr] = val;
 #ifdef DEBUG_VGA_MEM
@@ -921,7 +925,7 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val)
        /* odd/even mode (aka text mode mapping) */
        plane = (s->gr[VGA_GFX_PLANE_READ] & 2) | (addr & 1);
        mask = (1 << plane);
-        if (s->sr[VGA_SEQ_PLANE_WRITE] & mask) {
+        if (sr(s, VGA_SEQ_PLANE_WRITE) & mask) {
            addr = ((addr & ~1) << 1) | plane;
            if (addr >= s->vram_size) {
                return;
@@ -996,7 +1000,7 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val)

    do_write:
        /* mask data according to sr[2] */
-        mask = s->sr[VGA_SEQ_PLANE_WRITE];
+        mask = sr(s, VGA_SEQ_PLANE_WRITE);
        s->plane_updated |= mask; /* only used to detect font change */
        write_mask = mask16[mask];
        if (addr * sizeof(uint32_t) >= s->vram_size) {
@@ -1014,7 +1018,7 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val)
 }

 typedef void vga_draw_line_func(VGACommonState *s1, uint8_t *d,
-                                const uint8_t *s, int width);
+                                uint32_t srcaddr, int width);

 #include "vga-helpers.h"

@@ -1152,10 +1156,10 @@ static void vga_get_text_resolution(VGACommonState *s, int *pwidth, int *pheight
    /* total width & height */
    cheight = (s->cr[VGA_CRTC_MAX_SCAN] & 0x1f) + 1;
    cwidth = 8;
-    if (!(s->sr[VGA_SEQ_CLOCK_MODE] & VGA_SR01_CHAR_CLK_8DOTS)) {
+    if (!(sr(s, VGA_SEQ_CLOCK_MODE) & VGA_SR01_CHAR_CLK_8DOTS)) {
        cwidth = 9;
    }
-    if (s->sr[VGA_SEQ_CLOCK_MODE] & 0x08) {
+    if (sr(s, VGA_SEQ_CLOCK_MODE) & 0x08) {
        cwidth = 16; /* NOTE: no 18 pixel wide */
    }
    width = (s->cr[VGA_CRTC_H_DISP] + 1);
@@ -1197,7 +1201,7 @@ static void vga_draw_text(VGACommonState *s, int full_update)
    int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);

    /* compute font data address (in plane 2) */
-    v = s->sr[VGA_SEQ_CHARACTER_MAP];
+    v = sr(s, VGA_SEQ_CHARACTER_MAP);
    offset = (((v >> 4) & 1) | ((v << 1) & 6)) * 8192 * 4 + 2;
    if (offset != s->font_offsets[0]) {
        s->font_offsets[0] = offset;
@@ -1289,6 +1293,9 @@ static void vga_draw_text(VGACommonState *s, int full_update)
        cx_min = width;
        cx_max = -1;
        for(cx = 0; cx < width; cx++) {
+            if (src + sizeof(uint16_t) > s->vram_ptr + s->vram_size) {
+                break;
+            }
            ch_attr = *(uint16_t *)src;
            if (full_update || ch_attr != *ch_attr_ptr || src == cursor_ptr) {
                if (cx < cx_min)
@@ -1443,6 +1450,14 @@ void vga_invalidate_scanlines(VGACommonState *s, int y1, int y2)
    }
 }

+static bool vga_scanline_invalidated(VGACommonState *s, int y)
+{
+    if (y >= VGA_MAX_HEIGHT) {
+        return false;
+    }
+    return s->invalidated_y_table[y >> 5] & (1 << (y & 0x1f));
+}
+
 void vga_sync_dirty_bitmap(VGACommonState *s)
 {
    memory_region_sync_dirty_bitmap(&s->vram);
@@ -1465,13 +1480,14 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
 {
    DisplaySurface *surface = qemu_console_surface(s->con);
    int y1, y, update, linesize, y_start, double_scan, mask, depth;
-    int width, height, shift_control, line_offset, bwidth, bits;
-    ram_addr_t page0, page1, page_min, page_max;
+    int width, height, shift_control, bwidth, bits;
+    ram_addr_t page0, page1, region_start, region_end;
+    DirtyBitmapSnapshot *snap = NULL;
    int disp_width, multi_scan, multi_run;
    uint8_t *d;
    uint32_t v, addr1, addr;
    vga_draw_line_func *vga_draw_line = NULL;
-    bool share_surface;
+    bool share_surface, force_shadow = false;
    pixman_format_code_t format;
 #ifdef HOST_WORDS_BIGENDIAN
    bool byteswap = !s->big_endian_fb;
@@ -1481,11 +1497,34 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)

    full_update |= update_basic_params(s);

-    if (!full_update)
-        vga_sync_dirty_bitmap(s);
-
    s->get_resolution(s, &width, &height);
    disp_width = width;
+    depth = s->get_bpp(s);
+
+    region_start = (s->start_addr * 4);
+    region_end = region_start + (ram_addr_t)s->line_offset * height;
+    region_end += width * depth / 8; /* scanline length */
+    region_end -= s->line_offset;
+    if (region_end > s->vbe_size || depth == 0 || depth == 15) {
+        /*
+         * We land here on:
+         *  - wraps around (can happen with cirrus vbe modes)
+         *  - depth == 0 (256 color palette video mode)
+         *  - depth == 15
+         *
+         * Take the safe and slow route:
+         *   - create a dirty bitmap snapshot for all vga memory.
+         *   - force shadowing (so all vga memory access goes
+         *     through vga_read_*() helpers).
+         *
+         * Given this affects only vga features which are pretty much
+         * unused by modern guests there should be no performance
+         * impact.
+         */
+        region_start = 0;
+        region_end = s->vbe_size;
+        force_shadow = true;
+    }

    shift_control = (s->gr[VGA_GFX_MODE] >> 5) & 3;
    double_scan = (s->cr[VGA_CRTC_MAX_SCAN] >> 7);
@@ -1506,17 +1545,15 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
    }

    if (shift_control == 0) {
-        if (s->sr[VGA_SEQ_CLOCK_MODE] & 8) {
+        if (sr(s, VGA_SEQ_CLOCK_MODE) & 8) {
            disp_width <<= 1;
        }
    } else if (shift_control == 1) {
-        if (s->sr[VGA_SEQ_CLOCK_MODE] & 8) {
+        if (sr(s, VGA_SEQ_CLOCK_MODE) & 8) {
            disp_width <<= 1;
        }
    }

-    depth = s->get_bpp(s);
-
    /*
     * Check whether we can share the surface with the backend
     * or whether we need a shadow surface. We share native
@@ -1526,7 +1563,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
    format = qemu_default_pixman_format(depth, !byteswap);
    if (format) {
        share_surface = dpy_gfx_check_format(s->con, format)
-            && !s->force_shadow;
+            && !s->force_shadow && !force_shadow;
    } else {
        share_surface = false;
    }
@@ -1574,7 +1611,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)

    if (shift_control == 0) {
        full_update |= update_palette16(s);
-        if (s->sr[VGA_SEQ_CLOCK_MODE] & 8) {
+        if (sr(s, VGA_SEQ_CLOCK_MODE) & 8) {
            v = VGA_DRAW_LINE4D2;
        } else {
            v = VGA_DRAW_LINE4;
@@ -1582,7 +1619,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
        bits = 4;
    } else if (shift_control == 1) {
        full_update |= update_palette16(s);
-        if (s->sr[VGA_SEQ_CLOCK_MODE] & 8) {
+        if (sr(s, VGA_SEQ_CLOCK_MODE) & 8) {
            v = VGA_DRAW_LINE2D2;
        } else {
            v = VGA_DRAW_LINE2;
@@ -1625,20 +1662,29 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
        s->cursor_invalidate(s);
    }

-    line_offset = s->line_offset;
 #if 0
    printf("w=%d h=%d v=%d line_offset=%d cr[0x09]=0x%02x cr[0x17]=0x%02x linecmp=%d sr[0x01]=0x%02x\n",
           width, height, v, line_offset, s->cr[9], s->cr[VGA_CRTC_MODE],
-           s->line_compare, s->sr[VGA_SEQ_CLOCK_MODE]);
+           s->line_compare, sr(s, VGA_SEQ_CLOCK_MODE));
 #endif
    addr1 = (s->start_addr * 4);
-    bwidth = (width * bits + 7) / 8;
+    bwidth = DIV_ROUND_UP(width * bits, 8);
    y_start = -1;
-    page_min = -1;
-    page_max = 0;
    d = surface_data(surface);
    linesize = surface_stride(surface);
    y1 = 0;
+
+    if (!full_update) {
+        vga_sync_dirty_bitmap(s);
+        if (s->line_compare < height) {
+            /* split screen mode */
+            region_start = 0;
+        }
+        snap = memory_region_snapshot_and_clear_dirty(&s->vram, region_start,
+                                                      region_end - region_start,
+                                                      DIRTY_MEMORY_VGA);
+    }
+
    for(y = 0; y < height; y++) {
        addr = addr1;
        if (!(s->cr[VGA_CRTC_MODE] & 1)) {
@@ -1651,21 +1697,28 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
            addr = (addr & ~0x8000) | ((y1 & 2) << 14);
        }
        update = full_update;
-        page0 = addr;
-        page1 = addr + bwidth - 1;
-        update |= memory_region_get_dirty(&s->vram, page0, page1 - page0,
-                                          DIRTY_MEMORY_VGA);
-        /* explicit invalidation for the hardware cursor */
-        update |= (s->invalidated_y_table[y >> 5] >> (y & 0x1f)) & 1;
+        page0 = addr & s->vbe_size_mask;
+        page1 = (addr + bwidth - 1) & s->vbe_size_mask;
+        if (full_update) {
+            update = 1;
+        } else if (page1 < page0) {
+            /* scanline wraps from end of video memory to the start */
+            assert(force_shadow);
+            update = memory_region_snapshot_get_dirty(&s->vram, snap,
+                                                      page0, s->vbe_size - page0);
+            update |= memory_region_snapshot_get_dirty(&s->vram, snap,
+                                                       0, page1);
+        } else {
+            update = memory_region_snapshot_get_dirty(&s->vram, snap,
+                                                      page0, page1 - page0);
+        }
+        /* explicit invalidation for the hardware cursor (cirrus only) */
+        update |= vga_scanline_invalidated(s, y);
        if (update) {
            if (y_start < 0)
                y_start = y;
-            if (page0 < page_min)
-                page_min = page0;
-            if (page1 > page_max)
-                page_max = page1;
            if (!(is_buffer_shared(surface))) {
-                vga_draw_line(s, d, s->vram_ptr + addr, width);
+                vga_draw_line(s, d, addr, width);
                if (s->cursor_draw_line)
                    s->cursor_draw_line(s, d, y);
            }
@@ -1680,7 +1733,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
        if (!multi_run) {
            mask = (s->cr[VGA_CRTC_MODE] & 3) ^ 3;
            if ((y1 & mask) == mask)
-                addr1 += line_offset;
+                addr1 += s->line_offset;
            y1++;
            multi_run = multi_scan;
        } else {
@@ -1696,14 +1749,8 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
        dpy_gfx_update(s->con, 0, y_start,
                       disp_width, y - y_start);
    }
-    /* reset modified pages */
-    if (page_max >= page_min) {
-        memory_region_reset_dirty(&s->vram,
-                                  page_min,
-                                  page_max - page_min,
-                                  DIRTY_MEMORY_VGA);
-    }
-    memset(s->invalidated_y_table, 0, ((height + 31) >> 5) * 4);
+    g_free(snap);
+    memset(s->invalidated_y_table, 0, sizeof(s->invalidated_y_table));
 }

 static void vga_draw_blank(VGACommonState *s, int full_update)
@@ -1781,6 +1828,7 @@ void vga_common_reset(VGACommonState *s)
 {
    s->sr_index = 0;
    memset(s->sr, '\0', sizeof(s->sr));
+    memset(s->sr_vbe, '\0', sizeof(s->sr_vbe));
    s->gr_index = 0;
    memset(s->gr, '\0', sizeof(s->gr));
    s->ar_index = 0;
@@ -1883,10 +1931,10 @@ static void vga_update_text(void *opaque, console_ch_t *chardata)
        /* total width & height */
        cheight = (s->cr[VGA_CRTC_MAX_SCAN] & 0x1f) + 1;
        cw = 8;
-        if (!(s->sr[VGA_SEQ_CLOCK_MODE] & VGA_SR01_CHAR_CLK_8DOTS)) {
+        if (!(sr(s, VGA_SEQ_CLOCK_MODE) & VGA_SR01_CHAR_CLK_8DOTS)) {
            cw = 9;
        }
-        if (s->sr[VGA_SEQ_CLOCK_MODE] & 0x08) {
+        if (sr(s, VGA_SEQ_CLOCK_MODE) & 0x08) {
            cw = 16; /* NOTE: no 18 pixel wide */
        }
        width = (s->cr[VGA_CRTC_H_DISP] + 1);
@@ -2053,6 +2101,7 @@ static int vga_common_post_load(void *opaque, int version_id)

    /* force refresh */
    s->graphic_mode = -1;
+    vbe_update_vgaregs(s);
    return 0;
 }

@@ -2173,6 +2222,7 @@ void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate)
    if (!s->vbe_size) {
        s->vbe_size = s->vram_size;
    }
+    s->vbe_size_mask = s->vbe_size - 1;

    s->is_vbe_vmstate = 1;
    memory_region_init_ram(&s->vram, obj, "vga.vram", s->vram_size,
--- a/hw/display/vga_int.h
+++ b/hw/display/vga_int.h
@@ -93,11 +93,13 @@ typedef struct VGACommonState {
    uint32_t vram_size;
    uint32_t vram_size_mb; /* property */
    uint32_t vbe_size;
+    uint32_t vbe_size_mask;
    uint32_t latch;
    bool has_chain4_alias;
    MemoryRegion chain4_alias;
    uint8_t sr_index;
    uint8_t sr[256];
+    uint8_t sr_vbe[256];
    uint8_t gr_index;
    uint8_t gr[256];
    uint8_t ar_index;
--- a/hw/display/virtio-gpu-3d.c
+++ b/hw/display/virtio-gpu-3d.c
@@ -76,10 +76,18 @@ static void virgl_cmd_resource_unref(VirtIOGPU *g,
                                     struct virtio_gpu_ctrl_command *cmd)
 {
    struct virtio_gpu_resource_unref unref;
+    struct iovec *res_iovs = NULL;
+    int num_iovs = 0;

    VIRTIO_GPU_FILL_CMD(unref);
    trace_virtio_gpu_cmd_res_unref(unref.resource_id);

+    virgl_renderer_resource_detach_iov(unref.resource_id,
+                                       &res_iovs,
+                                       &num_iovs);
+    if (res_iovs != NULL && num_iovs != 0) {
+        virtio_gpu_cleanup_mapping_iov(res_iovs, num_iovs);
+    }
    virgl_renderer_resource_unref(unref.resource_id);
 }

@@ -127,7 +135,7 @@ static void virgl_cmd_resource_flush(VirtIOGPU *g,
    trace_virtio_gpu_cmd_res_flush(rf.resource_id,
                                   rf.r.width, rf.r.height, rf.r.x, rf.r.y);

-    for (i = 0; i < VIRTIO_GPU_MAX_SCANOUT; i++) {
+    for (i = 0; i < g->conf.max_outputs; i++) {
        if (g->scanout[i].resource_id != rf.resource_id) {
            continue;
        }
@@ -146,7 +154,7 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g,
    trace_virtio_gpu_cmd_set_scanout(ss.scanout_id, ss.resource_id,
                                     ss.r.width, ss.r.height, ss.r.x, ss.r.y);

-    if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUT) {
+    if (ss.scanout_id >= g->conf.max_outputs) {
        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d",
                      __func__, ss.scanout_id);
        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
@@ -289,8 +297,11 @@ static void virgl_resource_attach_backing(VirtIOGPU *g,
        return;
    }

-    virgl_renderer_resource_attach_iov(att_rb.resource_id,
-                                       res_iovs, att_rb.nr_entries);
+    ret = virgl_renderer_resource_attach_iov(att_rb.resource_id,
+                                             res_iovs, att_rb.nr_entries);
+
+    if (ret != 0)
+        virtio_gpu_cleanup_mapping_iov(res_iovs, att_rb.nr_entries);
 }

 static void virgl_resource_detach_backing(VirtIOGPU *g,
@@ -345,6 +356,7 @@ static void virgl_cmd_get_capset_info(VirtIOGPU *g,

    VIRTIO_GPU_FILL_CMD(info);

+    memset(&resp, 0, sizeof(resp));
    if (info.capset_index == 0) {
        resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL;
        virgl_renderer_get_cap_set(resp.capset_id,
@@ -368,8 +380,12 @@ static void virgl_cmd_get_capset(VirtIOGPU *g,

    virgl_renderer_get_cap_set(gc.capset_id, &max_ver,
                               &max_size);
-    resp = g_malloc(sizeof(*resp) + max_size);
+    if (!max_size) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }

+    resp = g_malloc0(sizeof(*resp) + max_size);
    resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET;
    virgl_renderer_fill_caps(gc.capset_id,
                             gc.capset_version,
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -19,10 +19,13 @@
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-gpu.h"
 #include "hw/virtio/virtio-bus.h"
+#include "qapi/error.h"

 static struct virtio_gpu_simple_resource*
 virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id);

+static void virtio_gpu_cleanup_mapping(struct virtio_gpu_simple_resource *res);
+
 #ifdef CONFIG_VIRGL
 #include "virglrenderer.h"
 #define VIRGL(_g, _virgl, _simple, ...)                     \
@@ -79,6 +82,7 @@ static void update_cursor_data_virgl(VirtIOGPU *g,

    if (width != s->current_cursor->width ||
        height != s->current_cursor->height) {
+        free(data);
        return;
    }

@@ -323,6 +327,7 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
        qemu_log_mask(LOG_GUEST_ERROR,
                      "%s: host couldn't handle guest format %d\n",
                      __func__, c2d.format);
+        g_free(res);
        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
        return;
    }
@@ -347,6 +352,7 @@ static void virtio_gpu_resource_destroy(VirtIOGPU *g,
                                        struct virtio_gpu_simple_resource *res)
 {
    pixman_image_unref(res->image);
+    virtio_gpu_cleanup_mapping(res);
    QTAILQ_REMOVE(&g->reslist, res, next);
    g_free(res);
 }
@@ -464,7 +470,7 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,

    pixman_region_init_rect(&flush_region,
                            rf.r.x, rf.r.y, rf.r.width, rf.r.height);
-    for (i = 0; i < VIRTIO_GPU_MAX_SCANOUT; i++) {
+    for (i = 0; i < g->conf.max_outputs; i++) {
        struct virtio_gpu_scanout *scanout;
        pixman_region16_t region, finalregion;
        pixman_box16_t *extents;
@@ -507,6 +513,13 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g,
    trace_virtio_gpu_cmd_set_scanout(ss.scanout_id, ss.resource_id,
                                     ss.r.width, ss.r.height, ss.r.x, ss.r.y);

+    if (ss.scanout_id >= g->conf.max_outputs) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d",
+                      __func__, ss.scanout_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
+        return;
+    }
+
    g->enable = 1;
    if (ss.resource_id == 0) {
        scanout = &g->scanout[ss.scanout_id];
@@ -516,8 +529,7 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g,
                res->scanout_bitmask &= ~(1 << ss.scanout_id);
            }
        }
-        if (ss.scanout_id == 0 ||
-            ss.scanout_id >= g->conf.max_outputs) {
+        if (ss.scanout_id == 0) {
            qemu_log_mask(LOG_GUEST_ERROR,
                          "%s: illegal scanout id specified %d",
                          __func__, ss.scanout_id);
@@ -532,14 +544,6 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g,
    }

    /* create a surface for this scanout */
-    if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUT ||
-        ss.scanout_id >= g->conf.max_outputs) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d",
-                      __func__, ss.scanout_id);
-        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
-        return;
-    }
-
    res = virtio_gpu_find_resource(g, ss.resource_id);
    if (!res) {
        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n",
@@ -671,6 +675,11 @@ virtio_gpu_resource_attach_backing(VirtIOGPU *g,
        return;
    }

+    if (res->iov) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        return;
+    }
+
    ret = virtio_gpu_create_mapping_iov(&ab, cmd, &res->iov);
    if (ret != 0) {
        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
@@ -929,6 +938,11 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
    bool have_virgl;
    int i;

+    if (g->conf.max_outputs > VIRTIO_GPU_MAX_SCANOUTS) {
+        error_setg(errp, "invalid max_outputs > %d", VIRTIO_GPU_MAX_SCANOUTS);
+        return;
+    }
+
    g->config_size = sizeof(struct virtio_gpu_config);
    g->virtio_config.num_scanouts = g->conf.max_outputs;
    virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU,
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -66,17 +66,11 @@ struct vmsvga_state_s {
    uint8_t *fifo_ptr;
    unsigned int fifo_size;

-    union {
-        uint32_t *fifo;
-        struct QEMU_PACKED {
-            uint32_t min;
-            uint32_t max;
-            uint32_t next_cmd;
-            uint32_t stop;
-            /* Add registers here when adding capabilities.  */
-            uint32_t fifo[0];
-        } *cmd;
-    };
+    uint32_t *fifo;
+    uint32_t fifo_min;
+    uint32_t fifo_max;
+    uint32_t fifo_next;
+    uint32_t fifo_stop;

 #define REDRAW_FIFO_LEN  512
    struct vmsvga_rect_s {
@@ -198,7 +192,7 @@ enum {
     */
    SVGA_FIFO_MIN = 0,
    SVGA_FIFO_MAX,      /* The distance from MIN to MAX must be at least 10K */
-    SVGA_FIFO_NEXT_CMD,
+    SVGA_FIFO_NEXT,
    SVGA_FIFO_STOP,

    /*
@@ -511,6 +505,8 @@ static inline void vmsvga_cursor_define(struct vmsvga_state_s *s,
    int i, pixels;

    qc = cursor_alloc(c->width, c->height);
+    assert(qc != NULL);
+
    qc->hot_x = c->hot_x;
    qc->hot_y = c->hot_y;
    switch (c->bpp) {
@@ -546,8 +542,6 @@ static inline void vmsvga_cursor_define(struct vmsvga_state_s *s,
 }
 #endif

-#define CMD(f)  le32_to_cpu(s->cmd->f)
-
 static inline int vmsvga_fifo_length(struct vmsvga_state_s *s)
 {
    int num;
@@ -555,21 +549,45 @@ static inline int vmsvga_fifo_length(struct vmsvga_state_s *s)
    if (!s->config || !s->enable) {
        return 0;
    }
-    num = CMD(next_cmd) - CMD(stop);
+
+    s->fifo_min  = le32_to_cpu(s->fifo[SVGA_FIFO_MIN]);
+    s->fifo_max  = le32_to_cpu(s->fifo[SVGA_FIFO_MAX]);
+    s->fifo_next = le32_to_cpu(s->fifo[SVGA_FIFO_NEXT]);
+    s->fifo_stop = le32_to_cpu(s->fifo[SVGA_FIFO_STOP]);
+
+    /* Check range and alignment.  */
+    if ((s->fifo_min | s->fifo_max | s->fifo_next | s->fifo_stop) & 3) {
+        return 0;
+    }
+    if (s->fifo_min < sizeof(uint32_t) * 4) {
+        return 0;
+    }
+    if (s->fifo_max > SVGA_FIFO_SIZE ||
+        s->fifo_min >= SVGA_FIFO_SIZE ||
+        s->fifo_stop >= SVGA_FIFO_SIZE ||
+        s->fifo_next >= SVGA_FIFO_SIZE) {
+        return 0;
+    }
+    if (s->fifo_max < s->fifo_min + 10 * 1024) {
+        return 0;
+    }
+
+    num = s->fifo_next - s->fifo_stop;
    if (num < 0) {
-        num += CMD(max) - CMD(min);
+        num += s->fifo_max - s->fifo_min;
    }
    return num >> 2;
 }

 static inline uint32_t vmsvga_fifo_read_raw(struct vmsvga_state_s *s)
 {
-    uint32_t cmd = s->fifo[CMD(stop) >> 2];
+    uint32_t cmd = s->fifo[s->fifo_stop >> 2];

-    s->cmd->stop = cpu_to_le32(CMD(stop) + 4);
-    if (CMD(stop) >= CMD(max)) {
-        s->cmd->stop = s->cmd->min;
+    s->fifo_stop += 4;
+    if (s->fifo_stop >= s->fifo_max) {
+        s->fifo_stop = s->fifo_min;
    }
+    s->fifo[SVGA_FIFO_STOP] = cpu_to_le32(s->fifo_stop);
    return cmd;
 }

@@ -581,15 +599,15 @@ static inline uint32_t vmsvga_fifo_read(struct vmsvga_state_s *s)
 static void vmsvga_fifo_run(struct vmsvga_state_s *s)
 {
    uint32_t cmd, colour;
-    int args, len;
+    int args, len, maxloop = 1024;
    int x, y, dx, dy, width, height;
    struct vmsvga_cursor_definition_s cursor;
    uint32_t cmd_start;

    len = vmsvga_fifo_length(s);
-    while (len > 0) {
+    while (len > 0 && --maxloop > 0) {
        /* May need to go back to the start of the command if incomplete */
-        cmd_start = s->cmd->stop;
+        cmd_start = s->fifo_stop;

        switch (cmd = vmsvga_fifo_read(s)) {
        case SVGA_CMD_UPDATE:
@@ -660,11 +678,13 @@ static void vmsvga_fifo_run(struct vmsvga_state_s *s)
            cursor.bpp = vmsvga_fifo_read(s);

            args = SVGA_BITMAP_SIZE(x, y) + SVGA_PIXMAP_SIZE(x, y, cursor.bpp);
-            if (cursor.width > 256 ||
-                cursor.height > 256 ||
-                cursor.bpp > 32 ||
-                SVGA_BITMAP_SIZE(x, y) > sizeof cursor.mask ||
-                SVGA_PIXMAP_SIZE(x, y, cursor.bpp) > sizeof cursor.image) {
+            if (cursor.width > 256
+                || cursor.height > 256
+                || cursor.bpp > 32
+                || SVGA_BITMAP_SIZE(x, y)
+                    > sizeof(cursor.mask) / sizeof(cursor.mask[0])
+                || SVGA_PIXMAP_SIZE(x, y, cursor.bpp)
+                    > sizeof(cursor.image) / sizeof(cursor.image[0])) {
                    goto badcmd;
            }

@@ -748,7 +768,8 @@ static void vmsvga_fifo_run(struct vmsvga_state_s *s)
            break;

        rewind:
-            s->cmd->stop = cmd_start;
+            s->fifo_stop = cmd_start;
+            s->fifo[SVGA_FIFO_STOP] = cpu_to_le32(s->fifo_stop);
            break;
        }
    }
@@ -1005,19 +1026,6 @@ static void vmsvga_value_write(void *opaque, uint32_t address, uint32_t value)
    case SVGA_REG_CONFIG_DONE:
        if (value) {
            s->fifo = (uint32_t *) s->fifo_ptr;
-            /* Check range and alignment.  */
-            if ((CMD(min) | CMD(max) | CMD(next_cmd) | CMD(stop)) & 3) {
-                break;
-            }
-            if (CMD(min) < (uint8_t *) s->cmd->fifo - (uint8_t *) s->fifo) {
-                break;
-            }
-            if (CMD(max) > SVGA_FIFO_SIZE) {
-                break;
-            }
-            if (CMD(max) < CMD(min) + 10 * 1024) {
-                break;
-            }
            vga_dirty_log_stop(&s->vga);
        }
        s->config = !!value;
--- a/hw/dma/rc4030.c
+++ b/hw/dma/rc4030.c
@@ -459,7 +459,7 @@ static void rc4030_write(void *opaque, hwaddr addr, uint64_t data,
        break;
    /* Interval timer reload */
    case 0x0228:
-        s->itr = val;
+        s->itr = val & 0x01FF;
        qemu_irq_lower(s->timer_irq);
        set_next_tick(s);
        break;
--- a/hw/i2c/imx_i2c.c
+++ b/hw/i2c/imx_i2c.c
@@ -247,7 +247,7 @@ static void imx_i2c_write(void *opaque, hwaddr offset,
            if (s->address == ADDR_RESET) {
                if (i2c_start_transfer(s->bus, extract32(s->i2dr_write, 1, 7),
                                       extract32(s->i2dr_write, 0, 1))) {
-                    /* if non zero is returned, the adress is not valid */
+                    /* if non zero is returned, the address is not valid */
                    s->i2sr |= I2SR_RXAK;
                } else {
                    s->address = s->i2dr_write;
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -957,6 +957,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
        mask = 7;   /* Mask bit 2:0 in the SID field */
        break;
    }
+    mask = ~mask;
    VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16
                    " mask %"PRIu16, source_id, mask);
    vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -52,7 +52,7 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s)
 {
    CPUState *cpu = first_cpu;
    CPUX86State *env = cpu->env_ptr;
-    hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL;
+    hwaddr kvmclock_struct_pa;
    uint64_t migration_tsc = env->tsc;
    struct pvclock_vcpu_time_info time;
    uint64_t delta;
@@ -60,11 +60,14 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s)
    uint64_t nsec_hi;
    uint64_t nsec;

+    cpu_synchronize_state(cpu);
+
    if (!(env->system_time_msr & 1ULL)) {
        /* KVM clock not active */
        return 0;
    }

+    kvmclock_struct_pa = env->system_time_msr & ~1ULL;
    cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));

    assert(time.tsc_timestamp <= migration_tsc);
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1891,8 +1891,4 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev)
    pci_assign_dev_load_option_rom(&dev->dev, OBJECT(dev), &size,
                                   dev->host.domain, dev->host.bus,
                                   dev->host.slot, dev->host.function);
-
-    if (!size) {
-        error_report("pci-assign: Invalid ROM.");
-    }
 }
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -394,7 +394,7 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
    CPUX86State *env = &cpu->env;
    VAPICHandlers *handlers;
    uint8_t opcode[2];
-    uint32_t imm32;
+    uint32_t imm32 = 0;
    target_ulong current_pc = 0;
    target_ulong current_cs_base = 0;
    int current_flags = 0;
--- a/hw/i386/multiboot.c
+++ b/hw/i386/multiboot.c
@@ -221,15 +221,38 @@ int load_multiboot(FWCfgState *fw_cfg,
        uint32_t mh_header_addr = ldl_p(header+i+12);
        uint32_t mh_load_end_addr = ldl_p(header+i+20);
        uint32_t mh_bss_end_addr = ldl_p(header+i+24);
+
        mh_load_addr = ldl_p(header+i+16);
+        if (mh_header_addr < mh_load_addr) {
+            fprintf(stderr, "invalid mh_load_addr address\n");
+            exit(1);
+        }
+        if (mh_load_end_addr > mh_bss_end_addr) {
+            fprintf(stderr, "invalid mh_load_end_addr address\n");
+            exit(1);
+        }
+
        uint32_t mb_kernel_text_offset = i - (mh_header_addr - mh_load_addr);
        uint32_t mb_load_size = 0;
        mh_entry_addr = ldl_p(header+i+28);

        if (mh_load_end_addr) {
+            if (mh_bss_end_addr < mh_load_addr) {
+                fprintf(stderr, "invalid mh_bss_end_addr address\n");
+                exit(1);
+            }
            mb_kernel_size = mh_bss_end_addr - mh_load_addr;
+
+            if (mh_load_end_addr < mh_load_addr) {
+                fprintf(stderr, "invalid mh_load_end_addr address\n");
+                exit(1);
+            }
            mb_load_size = mh_load_end_addr - mh_load_addr;
        } else {
+            if (kernel_file_size < mb_kernel_text_offset) {
+                fprintf(stderr, "invalid kernel_file_size\n");
+                exit(1);
+            }
            mb_kernel_size = kernel_file_size - mb_kernel_text_offset;
            mb_load_size = mb_kernel_size;
        }
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -727,7 +727,32 @@ DEFINE_I440FX_MACHINE(v1_0, "pc-1.0", pc_compat_1_2,


 #define PC_COMPAT_0_15 \
-        PC_COMPAT_1_0
+        PC_COMPAT_1_0 \
+        {\
+            .driver   = "VGA",\
+            .property = "vgamem_mb",\
+            .value    = stringify(16),\
+        },{\
+            .driver   = "vmware-svga",\
+            .property = "vgamem_mb",\
+            .value    = stringify(16),\
+        },{\
+            .driver   = "qxl-vga",\
+            .property = "vgamem_mb",\
+            .value    = stringify(16),\
+        },{\
+            .driver   = "qxl",\
+            .property = "vgamem_mb",\
+            .value    = stringify(16),\
+        },{\
+            .driver   = "isa-cirrus-vga",\
+            .property = "vgamem_mb",\
+            .value    = stringify(16),\
+        },{\
+            .driver   = "cirrus-vga",\
+            .property = "vgamem_mb",\
+            .value    = stringify(16),\
+        },

 static void pc_i440fx_0_15_machine_options(MachineClass *m)
 {
--- a/hw/i386/pci-assign-load-rom.c
+++ b/hw/i386/pci-assign-load-rom.c
@@ -40,6 +40,9 @@ void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner,
             domain, bus, slot, function);

    if (stat(rom_file, &st)) {
+        if (errno != ENOENT) {
+            error_report("pci-assign: Invalid ROM.");
+        }
        return NULL;
    }

--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -115,6 +115,11 @@ static void unplug_disks(PCIBus *b, PCIDevice *d, void *o)
            && strcmp(d->name, "xen-pci-passthrough") != 0) {
        pci_piix3_xen_ide_unplug(DEVICE(d));
    }
+    else if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
+            PCI_CLASS_STORAGE_SCSI
+            && strcmp(d->name, "xen-pci-passthrough") != 0) {
+        object_unparent(OBJECT(d));
+    }
 }

 static void pci_unplug_disks(PCIBus *bus)
@@ -134,8 +139,6 @@ static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t v
           devices, and bit 2 the non-primary-master IDE devices. */
        if (val & UNPLUG_ALL_IDE_DISKS) {
            DPRINTF("unplug disks\n");
-            blk_drain_all();
-            blk_flush_all();
            pci_unplug_disks(pci_dev->bus);
        }
        if (val & UNPLUG_ALL_NICS) {
@@ -314,6 +317,26 @@ static void xen_platform_ioport_writeb(void *opaque, hwaddr addr,
    case 0: /* Platform flags */
        platform_fixed_ioport_writeb(opaque, 0, (uint32_t)val);
        break;
+    case 4:
+        if (val == 1 && size == 1) {
+            /*
+             * SUSE unplug for Xenlinux
+             * xen-kmp used this since xen-3.0.4, instead the official protocol from xen-3.3+
+             * It did an unconditional "outl(1, (ioaddr + 4));"
+             * This approach was used until openSUSE 12.3, up to SLE11SP3 and in SLE10.
+             * Starting with openSUSE 13.1, SLE11SP4 and SLE12 the official protocol is used.
+             * pre VMDP 1.7 made use of 4 and 8 depending on how vmdp was configured.
+             * If VMDP was to control both disk and LAN it would use 4.
+             * If it controlled just disk or just LAN, it would use 8 below.
+             */
+            PCIDevice *pci_dev = PCI_DEVICE(s);
+            DPRINTF("unplug disks\n");
+            pci_unplug_disks(pci_dev->bus);
+            DPRINTF("unplug nics\n");
+            pci_unplug_nics(pci_dev->bus);
+            DPRINTF("done\n");
+        }
+        break;
    case 8:
        log_writeb(s, (uint32_t)val);
        break;
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .6.0
 .6.2